diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2699f0b24af192..f8dea4b3953250 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -129,6 +129,7 @@ TECH_DOCS += technical/long-running-process-protocol
TECH_DOCS += technical/multi-pack-index
TECH_DOCS += technical/packfile-uri
TECH_DOCS += technical/pack-heuristics
+TECH_DOCS += technical/paint-down-to-common
TECH_DOCS += technical/parallel-checkout
TECH_DOCS += technical/partial-clone
TECH_DOCS += technical/platform-support
diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build
index ec07088c57617f..9ce11d5e484d9c 100644
--- a/Documentation/technical/meson.build
+++ b/Documentation/technical/meson.build
@@ -18,6 +18,7 @@ articles = [
'multi-pack-index.adoc',
'packfile-uri.adoc',
'pack-heuristics.adoc',
+ 'paint-down-to-common.adoc',
'parallel-checkout.adoc',
'partial-clone.adoc',
'platform-support.adoc',
diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc
new file mode 100644
index 00000000000000..983dfcf2332e96
--- /dev/null
+++ b/Documentation/technical/paint-down-to-common.adoc
@@ -0,0 +1,128 @@
+Merge-Base Computation and paint_down_to_common()
+==================================================
+
+The function `paint_down_to_common()` in `commit-reach.c` computes merge
+bases by walking the commit graph backwards from two sets of tips and
+finding where their ancestry meets.
+
+Use cases
+---------
+
+Computing merge bases is used in two different ways:
+
+ 1. *Finding all merge bases* (`merge-base --all`, `merge-tree`,
+ `merge`, `rebase`). A merge base is a common ancestor that is
+ not itself an ancestor of another common ancestor.
+
+ 2. *Ancestry checks* (`in_merge_bases`, used by `merge-base
+ --is-ancestor`, `branch -d`, `fetch`). These ask: "is commit A
+ an ancestor of commit B?" If a common ancestor equals one of the
+ inputs, that input is necessarily the only merge base -- no other
+ common ancestor can be both as recent and not an ancestor of it.
+
+Both use cases share the same algorithm and implementation.
+
+Algorithm
+---------
+
+Given a commit `one` and a set of commits `twos[]`, the walk paints
+commits with two colors:
+
+ - PARENT1: reachable from `one`
+ - PARENT2: reachable from any commit in `twos[]`
+
+The walk uses a priority queue ordered by generation number (falling
+back to commit date when generation numbers are unavailable). Each
+step dequeues the highest-priority commit (this is when we say a
+commit is "visited") and propagates its paint flags to its parents,
+enqueuing them if they gained new flags. When a commit receives
+both PARENT1 and PARENT2, it is a merge-base candidate. A candidate
+gains the STALE flag so its ancestors propagate staleness -- any
+deeper common ancestor is necessarily redundant.
+
+INFINITY and finite generation regions
+--------------------------------------
+
+The commit-graph stores a generation number for each commit. Commits
+not in the commit-graph have generation `GENERATION_NUMBER_INFINITY`. The
+graph is closed under reachability: if a commit is in the graph, all
+its ancestors are too. This partitions the commit graph into two regions:
+
+....
+ +---------------------------------------+
+ | INFINITY region |
+ | generation = INFINITY |
+ | queue order: heuristic (commit date) |
+ +---------------------------------------+
+ |
+ v
+ +---------------------------------------+
+ | Finite region |
+ | generation = finite |
+ | queue order: topological |
+ +---------------------------------------+
+....
+
+When the commit-graph is enabled, the INFINITY region is typically
+very small -- it only contains commits added since the last
+commit-graph refresh.
+
+All reachable INFINITY-generation commits are visited before any
+finite-generation commit, because INFINITY is larger than any finite
+value. Once the walk crosses into the finite region, it stays there.
+
+In the finite region, generation ordering guarantees topological
+traversal: children are always visited before their parents. This
+means that paint on already-visited commits is final -- no future
+traversal step can add paint to them.
+
+In the INFINITY region, commit-date ordering can violate this: a
+parent with a later date can be visited before a child with an earlier
+date. Paint flags are therefore NOT final at visit time, and a
+commit visited with only one side's paint may later gain the other.
+
+Paint flags are only added, never removed. Since each flag can be set
+at most once per commit, the number of times a commit can be
+re-enqueued is bounded by the number of flag transitions.
+
+Termination
+-----------
+
+The walk tracks the number of commits of each type in the queue
+(PARENT1-only, PARENT2-only, pending merge-base). The main loop
+ends when one of the following conditions holds:
+
+ 1. The queue is empty.
+ 2. The queue contains only stale entries.
+ 3. Side exhaustion: no pure PARENT1 or pure PARENT2 commits
+ remain in the queue, no pending merge-base candidates exist,
+ and the walk has entered the finite-generation region.
+
+Stale entry condition
+~~~~~~~~~~~~~~~~~~~~~
+Once all queued entries are stale, no new merge-base candidates can
+be discovered -- that requires at least one non-stale commit from
+each side meeting. Continuing the walk could still invalidate
+existing candidates by proving one is an ancestor of another, but
+`remove_redundant()` handles that as a post-processing step, so it
+is safe to exit early.
+
+Side-exhaustion condition
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A new merge-base requires commits from both sides to meet. When one
+side's exclusive counter reaches zero and there are no pending
+merge-base candidates, no future traversal step can produce a new
+candidate.
+
+This optimization only activates in the finite-generation region
+where topological ordering holds. In that region, children are
+always visited before parents, so paint flags are final at visit
+time and an exhausted side cannot reappear. In the INFINITY region,
+commit-date ordering can violate this guarantee, so the check is
+skipped.
+
+Related documentation
+---------------------
+
+ - `Documentation/technical/commit-graph.adoc` -- generation numbers
+ and the reachability closure property.
diff --git a/commit-reach.c b/commit-reach.c
index 5df471a313cf6b..3ad20e726279e0 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -11,6 +11,7 @@
#include "tag.h"
#include "commit-reach.h"
#include "ewah/ewok.h"
+#include "trace2.h"
/* Remember to update object flag allocation in object.h */
#define PARENT1 (1u<<16)
@@ -78,25 +79,91 @@ static void clear_nonstale_queue(struct nonstale_queue *queue)
queue->max_nonstale = NULL;
}
-static void nonstale_queue_put_dedup(struct nonstale_queue *queue,
- struct commit *c)
+/*
+ * Priority queue with per-side commit counters for paint_down_to_common().
+ * Each non-stale queued commit occupies exactly one bucket: PARENT1-only,
+ * PARENT2-only, or both (a pending merge-base candidate).
+ */
+struct paint_state {
+ struct prio_queue queue;
+ int p1_count;
+ int p2_count;
+ int pending_merge_bases;
+};
+
+static void paint_count_update(struct paint_state *state,
+ unsigned flags, int delta)
{
- if (c->object.flags & ENQUEUED)
- return;
- c->object.flags |= ENQUEUED;
- nonstale_queue_put(queue, c);
+ switch (flags & (PARENT1 | PARENT2 | STALE)) {
+ case PARENT1:
+ state->p1_count += delta;
+ break;
+
+ case PARENT2:
+ state->p2_count += delta;
+ break;
+
+ case PARENT1 | PARENT2:
+ state->pending_merge_bases += delta;
+ break;
+
+ case PARENT1 | PARENT2 | STALE:
+ break;
+
+ default:
+ BUG("unexpected paint state");
+ }
+}
+
+static void paint_queue_put(struct paint_state *state,
+ struct commit *c, unsigned add_flags)
+{
+ unsigned old_flags = c->object.flags;
+ c->object.flags |= add_flags;
+
+ if (old_flags & ENQUEUED) {
+ paint_count_update(state, old_flags, -1);
+ paint_count_update(state, c->object.flags, 1);
+ } else {
+ c->object.flags |= ENQUEUED;
+ prio_queue_put(&state->queue, c);
+ paint_count_update(state, c->object.flags, 1);
+ }
}
-static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue)
+/*
+ * Dequeue the next commit for the paint walk, or return NULL when
+ * no more merge bases can be discovered.
+ */
+static struct commit *paint_queue_get(struct paint_state *state)
{
- struct commit *commit = nonstale_queue_get(queue);
+ struct commit *commit = prio_queue_get(&state->queue);
+
+ if (!commit)
+ return NULL;
+
+ commit->object.flags &= ~ENQUEUED;
- if (commit)
- commit->object.flags &= ~ENQUEUED;
+ if (!state->pending_merge_bases) {
+ /* only stale entries remain */
+ if (!state->p1_count && !state->p2_count)
+ return NULL;
+
+ /* one side is exhausted */
+ if ((!state->p1_count || !state->p2_count) &&
+ commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY)
+ return NULL;
+ }
+
+ paint_count_update(state, commit->object.flags, -1);
return commit;
}
-/* all input commits in one and twos[] must have been parsed! */
+/*
+ * See Documentation/technical/paint-down-to-common.adoc
+ *
+ * All input commits in one and twos[] must have been parsed!
+ */
static int paint_down_to_common(struct repository *r,
struct commit *one, int n,
struct commit **twos,
@@ -104,33 +171,33 @@ static int paint_down_to_common(struct repository *r,
enum merge_base_flags mb_flags,
struct commit_list **result)
{
- struct nonstale_queue queue = {
- { compare_commits_by_gen_then_commit_date }
+ struct paint_state state = {
+ .queue = { compare_commits_by_gen_then_commit_date }
};
+ struct commit *commit;
int i;
+ int steps = 0;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;
if (!min_generation && !corrected_commit_dates_enabled(r))
- queue.pq.compare = compare_commits_by_commit_date;
+ state.queue.compare = compare_commits_by_commit_date;
one->object.flags |= PARENT1;
if (!n) {
commit_list_append(one, result);
return 0;
}
- nonstale_queue_put_dedup(&queue, one);
+ paint_queue_put(&state, one, 0);
- for (i = 0; i < n; i++) {
- twos[i]->object.flags |= PARENT2;
- nonstale_queue_put_dedup(&queue, twos[i]);
- }
+ for (i = 0; i < n; i++)
+ paint_queue_put(&state, twos[i], PARENT2);
- while (queue.max_nonstale) {
- struct commit *commit = nonstale_queue_get_dedup(&queue);
+ while ((commit = paint_queue_get(&state))) {
struct commit_list *parents;
int flags;
timestamp_t generation = commit_graph_generation(commit);
+ steps++;
if (min_generation && generation > last_gen)
BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
@@ -165,7 +232,7 @@ static int paint_down_to_common(struct repository *r,
if ((p->object.flags & flags) == flags)
continue;
if (repo_parse_commit(r, p)) {
- clear_nonstale_queue(&queue);
+ clear_prio_queue(&state.queue);
commit_list_free(*result);
*result = NULL;
/*
@@ -180,12 +247,13 @@ static int paint_down_to_common(struct repository *r,
return error(_("could not parse commit %s"),
oid_to_hex(&p->object.oid));
}
- p->object.flags |= flags;
- nonstale_queue_put_dedup(&queue, p);
+ paint_queue_put(&state, p, flags);
}
}
- clear_nonstale_queue(&queue);
+ clear_prio_queue(&state.queue);
+ trace2_data_intmax("paint_down_to_common", r,
+ "steps", steps);
commit_list_sort_by_date(result);
return 0;
}
diff --git a/t/meson.build b/t/meson.build
index 3219264fe7d497..ee6ebdffb92840 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -786,6 +786,7 @@ integration_tests = [
't6041-bisect-submodule.sh',
't6050-replace.sh',
't6060-merge-index.sh',
+ 't6099-merge-base-side-exhaustion.sh',
't6100-rev-list-in-order.sh',
't6101-rev-parse-parents.sh',
't6102-rev-list-unexpected-objects.sh',
diff --git a/t/t6099-merge-base-side-exhaustion.sh b/t/t6099-merge-base-side-exhaustion.sh
new file mode 100755
index 00000000000000..4f1e0d50efbf38
--- /dev/null
+++ b/t/t6099-merge-base-side-exhaustion.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+
+test_description='merge-base with ancestor among merge-base candidates
+
+Test that merge-base --all correctly handles cases where
+multiple merge-base candidates exist and one is an ancestor
+of another. The side-exhaustion optimization in
+paint_down_to_common may exit before STALE propagation
+removes the ancestor, but remove_redundant catches it.
+
+Graph shape (parents are below children):
+
+ A ----------- X
+ |\ /|
+ | B---------/ |
+ | | |
+ e2 \ f2
+ | | |
+ e1 d1 f1
+ \ | /
+ \ | /
+ \| /
+ C
+
+A and X are the two tips.
+B and C are both reachable from A and X.
+B reaches C through d1.
+Only B should appear in merge-base --all output.
+'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'setup ancestor merge-base candidate' '
+ test_commit C &&
+
+ git checkout -b d-chain HEAD &&
+ test_commit d1 &&
+ test_commit B &&
+
+ git checkout -b e-path C &&
+ test_commit e1 &&
+ test_commit e2 &&
+
+ git checkout -b f-path C &&
+ test_commit f1 &&
+ test_commit f2 &&
+
+ git checkout -b branch-A e-path &&
+ test_merge A B &&
+
+ git checkout -b branch-X f-path &&
+ test_merge X B &&
+
+ git commit-graph write --reachable
+'
+
+test_expect_success 'merge-base --all excludes ancestor candidate' '
+ git rev-parse B >expected &&
+ git merge-base --all A X >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'merge-base (single) finds shallowest' '
+ git rev-parse B >expected &&
+ git merge-base A X >actual &&
+ test_cmp expected actual
+'
+
+# Without commit-graph: generation numbers are INFINITY,
+# side-exhaustion optimization does not fire.
+test_expect_success 'merge-base --all without commit-graph' '
+ rm -f .git/objects/info/commit-graph &&
+ git rev-parse B >expected &&
+ git merge-base --all A X >actual &&
+ test_cmp expected actual
+'
+
+test_done
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index b5b314e57068f9..63650075603109 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -49,6 +49,62 @@ test_expect_success 'setup' '
git tag -a -m "$x-$i" tag-$x-$i commit-$x-$i || return 1
done
done &&
+
+ # Build a small side topology to exercise the (PARENT1|PARENT2) ->
+ # (PARENT1|PARENT2|STALE) transition in paint_down_to_common(); the
+ # 10x10 grid above does not exercise it because no merge-base candidate
+ # there is a descendant of another, so STALE never reaches a
+ # still-pending candidate.
+ #
+ # ps-X
+ # /|\
+ # / | \
+ # ps-Z ps-B ps-W
+ # | / \ |
+ # | / \ |
+ # |/ \|
+ # ps-T1 ps-T2
+ #
+ # where ps-T1=merge(ps-Z,ps-B), ps-T2=merge(ps-W,ps-B), so
+ # merge-base(ps-T1,ps-T2) = ps-B. During the walk, ps-X transitions
+ # to (PARENT1|PARENT2) via ps-Z and ps-W before ps-B is dequeued;
+ # then the STALE-walk from ps-B transitions ps-X to
+ # (PARENT1|PARENT2|STALE).
+ git checkout --orphan ps-orphan &&
+ test_commit ps-X &&
+ git checkout -b ps-B-br ps-X && test_commit ps-B &&
+ git checkout -b ps-Z-br ps-X && test_commit ps-Z &&
+ git checkout -b ps-W-br ps-X && test_commit ps-W &&
+ git checkout -b ps-T1 ps-Z &&
+ git merge --no-ff -m ps-T1 ps-B &&
+ git checkout -b ps-T2 ps-W &&
+ git merge --no-ff -m ps-T2 ps-B &&
+
+ # Build a side topology that lives entirely outside the half
+ # commit-graph and has non-monotonic commit dates, to exercise the
+ # INFINITY-gate in paint_down_to_common. With both tips outside
+ # the graph, generation is INFINITY and the queue falls back to
+ # commit-date order, which here is non-monotonic.
+ #
+ # pi-X (date 500, PARENT1 tip) --> pi-P, pi-D
+ # pi-D (date 480) --> pi-C
+ # pi-C (date 200) --> pi-B
+ # pi-B (date 100, PARENT2 tip) --> pi-P
+ # pi-P (date 450, root)
+ #
+ # merge-base(pi-X, pi-B) = pi-B (it is an ancestor of pi-X and is
+ # itself one of the queried tips).
+ git checkout --orphan pi-orphan &&
+ test_commit --date "@450 +0000" pi-P &&
+ test_commit --date "@100 +0000" pi-B &&
+ test_commit --date "@200 +0000" pi-C &&
+ test_commit --date "@480 +0000" pi-D &&
+ GIT_AUTHOR_DATE="@500 +0000" GIT_COMMITTER_DATE="@500 +0000" \
+ git commit-tree -p pi-D -p pi-P -m pi-X pi-D^{tree} >pi-X-oid &&
+ pi_x="$(cat pi-X-oid)" &&
+ git branch -f pi-X-br "$pi_x" &&
+ git tag pi-X "$pi_x" &&
+
git commit-graph write --reachable &&
mv .git/objects/info/commit-graph commit-graph-full &&
chmod u+w commit-graph-full &&
@@ -62,24 +118,34 @@ test_expect_success 'setup' '
'
run_all_modes () {
- test_when_finished rm -rf .git/objects/info/commit-graph &&
- "$@" actual &&
- test_cmp expect actual &&
- cp commit-graph-full .git/objects/info/commit-graph &&
- "$@" actual &&
- test_cmp expect actual &&
- cp commit-graph-half .git/objects/info/commit-graph &&
- "$@" actual &&
- test_cmp expect actual &&
- cp commit-graph-no-gdat .git/objects/info/commit-graph &&
- "$@" actual &&
- test_cmp expect actual
+ graph=.git/objects/info/commit-graph &&
+ test_when_finished rm -rf "$graph" "${graph}s" &&
+ rm -f trace-mode-*.txt &&
+
+ for mode in none full half no-gdat
+ do
+ rm -rf "$graph" "${graph}s" &&
+ cp "commit-graph-${mode}" "$graph" 2>/dev/null ||
+ true &&
+ GIT_TRACE2_EVENT="$(pwd)/trace-mode-${mode}.txt" \
+ "$@" actual &&
+ test_cmp expect actual || return 1
+ done
}
test_all_modes () {
run_all_modes test-tool reach "$@"
}
+test_paint_down_steps () {
+ for mode in none full half no-gdat
+ do
+ test_trace2_data paint_down_to_common steps "$1" \
+ <"trace-mode-${mode}.txt" || return 1
+ shift
+ done
+}
+
test_expect_success 'ref_newer:miss' '
cat >input <<-\EOF &&
A:commit-5-7
@@ -146,6 +212,17 @@ test_expect_success 'in_merge_bases_many:miss-heuristic' '
test_all_modes in_merge_bases_many
'
+test_expect_success 'in_merge_bases_many:self' '
+ cat >input <<-\EOF &&
+ A:commit-6-8
+ X:commit-5-9
+ X:commit-6-8
+ EOF
+ echo "in_merge_bases_many(A,X):1" >expect &&
+ test_all_modes in_merge_bases_many &&
+ test_paint_down_steps 45 1 25 1
+'
+
test_expect_success 'is_descendant_of:hit' '
cat >input <<-\EOF &&
A:commit-5-7
@@ -183,6 +260,86 @@ test_expect_success 'get_merge_bases_many' '
test_all_modes get_merge_bases_many
'
+test_expect_success 'get_merge_bases_many:duplicate-twos' '
+ cat >input <<-\EOF &&
+ A:commit-5-7
+ X:commit-4-8
+ X:commit-4-8
+ X:commit-6-6
+ X:commit-6-6
+ X:commit-8-3
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse commit-5-6 \
+ commit-4-7 | sort
+ } >expect &&
+ test_all_modes get_merge_bases_many
+'
+
+test_expect_success 'get_merge_bases_many:pending-stale' '
+ # Exercises the (PARENT1|PARENT2) -> (...|STALE) transition path in
+ # paint_down_to_common(). See the topology comment in the setup test.
+ cat >input <<-\EOF &&
+ A:ps-T1
+ X:ps-T2
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-B
+ } >expect &&
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 5 5 5 5
+'
+
+test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+ # Exercises the push-time INFINITY-gate in paint_down_to_common(). See
+ # the pi-* topology comment in the setup test.
+ cat >input <<-\EOF &&
+ A:pi-X
+ X:pi-B
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse pi-B
+ } >expect &&
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 5 4 5 5
+'
+
+test_expect_success 'setup mixed finite/INFINITY topology' '
+ # Create a commit outside all saved commit-graph files so it always
+ # has INFINITY generation, while its parent (ps-X) is in the graph
+ # with a finite generation. Use the ps-* orphan topology so we do
+ # not pollute the grid-based rev-list tests.
+ git checkout ps-X &&
+ test_env GIT_TEST_COMMIT_GRAPH= test_commit pm-INF
+'
+
+test_expect_success 'get_merge_bases_many:mixed-finite-infinity' '
+ # One tip (pm-INF) is outside the commit-graph with INFINITY
+ # generation; the other (ps-B) is in the graph with finite
+ # generation. The walk starts in the INFINITY region and crosses
+ # into the finite region where side-exhaustion can fire.
+ cat >input <<-\EOF &&
+ A:pm-INF
+ X:ps-B
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-X
+ } >expect &&
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 3 3 3 3
+'
+
+test_expect_success 'merge-base --all commit-walk steps' '
+ >input &&
+ git rev-parse commit-9-1 >expect &&
+ run_all_modes git merge-base --all commit-9-9 commit-9-1 &&
+ test_paint_down_steps 81 9 57 10
+'
+
test_expect_success 'reduce_heads' '
cat >input <<-\EOF &&
X:commit-1-10