From a138931cbaf67c0f0dc3adee98dce7f5fdd80955 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 23:33:56 +0200 Subject: [PATCH 1/7] repo: split annotated tags out from total tag count in structure Mirror what git survey already reports: lightweight tags (pointing straight at a commit/tree/blob) and annotated tags (pointing at an OBJ_TAG that is itself stored as a separate object) are different things in many monorepo contexts, and one of the differences git survey users routinely care about. Add an annotated_tags counter to struct ref_stats, populate it in count_references() by peeking at the ref OID's object type, and expose it as a sub-row under Tags in the table output and as references.tags.annotated.count in the machine-readable formats. Step toward pivoting the standalone git survey command onto git repo structure; this fills the first of the four feature gaps documented in the assessment. Tests in t1901 widened to assert the new row and key. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- builtin/repo.c | 10 ++++++++++ t/t1901-repo-structure.sh | 3 +++ 2 files changed, 13 insertions(+) diff --git a/builtin/repo.c b/builtin/repo.c index 71a5c1c29c05fe..d3efda94a49159 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -269,6 +269,7 @@ struct ref_stats { size_t branches; size_t remotes; size_t tags; + size_t annotated_tags; size_t others; }; @@ -454,6 +455,8 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, ref_total, " * %s", _("Count")); stats_table_count_addf(table, refs->branches, " * %s", _("Branches")); stats_table_count_addf(table, refs->tags, " * %s", _("Tags")); + stats_table_count_addf(table, refs->annotated_tags, + " * %s", _("Annotated")); stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); @@ -637,6 +640,8 @@ static void structure_keyvalue_print(struct repo_structure *stats, stats->refs.branches, value_delim); print_keyvalue("references.tags.count", key_delim, stats->refs.tags, value_delim); + print_keyvalue("references.tags.annotated.count", key_delim, + stats->refs.annotated_tags, value_delim); print_keyvalue("references.remotes.count", key_delim, stats->refs.remotes, value_delim); print_keyvalue("references.others.count", key_delim, @@ -689,6 +694,7 @@ static void structure_keyvalue_print(struct repo_structure *stats, struct count_references_data { struct ref_stats *stats; struct rev_info *revs; + struct repository *repo; struct progress *progress; }; @@ -707,6 +713,9 @@ static int count_references(const struct reference *ref, void *cb_data) break; case FILTER_REFS_TAGS: stats->tags++; + if (odb_read_object_info(data->repo->objects, + ref->oid, NULL) == OBJ_TAG) + stats->annotated_tags++; break; case FILTER_REFS_OTHERS: stats->others++; @@ -735,6 +744,7 @@ static void structure_count_references(struct ref_stats *stats, struct count_references_data data = { .stats = stats, .revs = revs, + .repo = repo, }; if (show_progress) diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 10050abd70fd67..f3a8e68d5c40c0 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -33,6 +33,7 @@ test_expect_success 'empty repository' ' | * Count | 0 | | * Branches | 0 | | * Tags | 0 | + | * Annotated | 0 | | * Remotes | 0 | | * Others | 0 | | | | @@ -97,6 +98,7 @@ test_expect_success SHA1 'repository with references and objects' ' | * Count | 4 | | * Branches | 1 | | * Tags | 1 | + | * Annotated | 1 | | * Remotes | 1 | | * Others | 1 | | | | @@ -155,6 +157,7 @@ test_expect_success SHA1 'lines and nul format' ' cat >expect <<-EOF && references.branches.count=1 references.tags.count=1 + references.tags.annotated.count=1 references.remotes.count=0 references.others.count=0 objects.commits.count=42 From b0ba112a0597ca0d0e06fbbb6c163d4433651699 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 00:47:53 +0200 Subject: [PATCH 2/7] repo: filter the structure scope via --ref-filter= `git repo structure` walks every reference enumerated by `refs_for_each_ref()` and feeds each reference's tip into the path walk that produces the object counts. There is no way to scope the inquiry to a subset of refs, even though that is the most common need when an operator is investigating what part of the history is driving cost: only branches, only release tags, only one remote's view, etc. Add a single `--ref-filter=` option that, when given, restricts both the reference count and the object walk to refs whose full name matches one of the patterns. The option is repeatable; multiple patterns form a union, so `--ref-filter='refs/heads/*' --ref-filter='refs/tags/v*'` includes local branches and tags whose short name starts with `v`. Patterns use `wildmatch()` with `WM_PATHNAME` semantics so a `*` does not cross `/`, matching the convention used by `git for-each-ref` positional arguments. Choosing a single flexible filter, rather than a proliferation of per-kind flags like `--branches`, `--tags`, `--remotes`, keeps the option surface small and lets the same mechanism express narrow selections the per-kind flags could not, such as "only release tags" (`'refs/tags/v*'`) or "only one remote's branches" (`'refs/remotes/origin/*'`). Without `--ref-filter`, behaviour is unchanged: every ref `refs_for_each_ref()` enumerates contributes. Both the reference counter and the path-walk seeding (via `add_pending_oid()`) sit on the same callback, so an early return when no pattern matches naturally excludes a ref from both. No separate object-walk machinery is needed. Cover the two interesting code paths with tests in t1901: a single filter narrowing to branches, and two filters unioning to include both branches and tags. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Documentation/git-repo.adoc | 19 +++++++++++++++++-- builtin/repo.c | 28 ++++++++++++++++++++++++++-- t/t1901-repo-structure.sh | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 42262c198347e5..280122cd7b963d 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -10,7 +10,7 @@ SYNOPSIS [synopsis] git repo info [--format=(lines|nul) | -z] [--all | ...] git repo info --keys [--format=(lines|nul) | -z] -git repo structure [--format=(table|lines|nul) | -z] +git repo structure [] DESCRIPTION ----------- @@ -56,7 +56,7 @@ supported: `nul`::: Similar to `lines`, but using a _NUL_ character after each value. -`structure [--format=(table|lines|nul) | -z]`:: +`structure`:: Retrieve statistics about the current repository structure. The following kinds of information are reported: + @@ -66,6 +66,21 @@ supported: * Total disk size of reachable objects by type * Largest reachable objects in the repository by type + +By default every reference enumerated by `for-each-ref` contributes to +the counts and object walk. Use `--ref-filter` to narrow the scope. + + `--ref-filter=`;; + Only count references whose full name matches one of the + given s, and only seed the object walk from + those references. The option is repeatable; multiple + patterns form a union. Patterns use the same `wildmatch` + semantics as `git for-each-ref`'s positional arguments, + so `*` does not cross `/`. Examples: + `--ref-filter='refs/heads/*'` for local branches, + `--ref-filter='refs/tags/v*'` for release tags, + `--ref-filter='refs/remotes/origin/*'` for a single + remote's branches. ++ The output format can be chosen through the flag `--format`. Three formats are supported: + diff --git a/builtin/repo.c b/builtin/repo.c index d3efda94a49159..646fbdcf9188c2 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -19,13 +19,14 @@ #include "tree.h" #include "tree-walk.h" #include "utf8.h" +#include "wildmatch.h" #define REPO_INFO_USAGE \ "git repo info [--format=(lines|nul) | -z] [--all | ...]", \ "git repo info --keys [--format=(lines|nul) | -z]" #define REPO_STRUCTURE_USAGE \ - "git repo structure [--format=(table|lines|nul) | -z]" + "git repo structure []" static const char *const repo_usage[] = { REPO_INFO_USAGE, @@ -695,15 +696,30 @@ struct count_references_data { struct ref_stats *stats; struct rev_info *revs; struct repository *repo; + const struct string_list *filters; struct progress *progress; }; +static int ref_matches_any_filter(const char *refname, + const struct string_list *filters) +{ + if (!filters->nr) + return 1; + for (size_t i = 0; i < filters->nr; i++) + if (!wildmatch(filters->items[i].string, refname, WM_PATHNAME)) + return 1; + return 0; +} + static int count_references(const struct reference *ref, void *cb_data) { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; size_t ref_count; + if (!ref_matches_any_filter(ref->name, data->filters)) + return 0; + switch (ref_kind_from_refname(ref->name)) { case FILTER_REFS_BRANCHES: stats->branches++; @@ -739,12 +755,14 @@ static int count_references(const struct reference *ref, void *cb_data) static void structure_count_references(struct ref_stats *stats, struct rev_info *revs, struct repository *repo, + const struct string_list *filters, int show_progress) { struct count_references_data data = { .stats = stats, .revs = revs, .repo = repo, + .filters = filters, }; if (show_progress) @@ -893,6 +911,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct repo_structure stats = { 0 }; struct rev_info revs; int show_progress = -1; + struct string_list ref_filters = STRING_LIST_INIT_DUP; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), N_("output format"), @@ -902,6 +921,9 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, PARSE_OPT_NONEG | PARSE_OPT_NOARG, parse_format_cb), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), + OPT_STRING_LIST(0, "ref-filter", &ref_filters, N_("pattern"), + N_("only count refs matching ; " + "repeat to union multiple patterns")), OPT_END() }; @@ -914,7 +936,8 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, if (show_progress < 0) show_progress = isatty(2); - structure_count_references(&stats.refs, &revs, repo, show_progress); + structure_count_references(&stats.refs, &revs, repo, &ref_filters, + show_progress); structure_count_objects(&stats.objects, &revs, repo, show_progress); switch (format) { @@ -933,6 +956,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, } stats_table_clear(&table); + string_list_clear(&ref_filters, 0); release_revisions(&revs); return 0; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index f3a8e68d5c40c0..9552f489d75b0d 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -227,6 +227,41 @@ test_expect_success 'progress meter option' ' ) ' +test_expect_success '--ref-filter narrows the set of refs' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + git tag v1 && + git update-ref refs/remotes/origin/main HEAD && + + git repo structure --format=lines \ + --ref-filter="refs/heads/*" >out && + grep "^references.branches.count=1$" out && + grep "^references.tags.count=0$" out && + grep "^references.remotes.count=0$" out + ) +' + +test_expect_success '--ref-filter unions multiple patterns' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + git tag v1 && + git update-ref refs/remotes/origin/main HEAD && + + git repo structure --format=lines \ + --ref-filter="refs/heads/*" \ + --ref-filter="refs/tags/*" >out && + grep "^references.branches.count=1$" out && + grep "^references.tags.count=2$" out && + grep "^references.remotes.count=0$" out + ) +' + test_expect_success 'git repo structure -h shows only repo structure usage' ' test_must_fail git repo structure -h >actual && test_grep "git repo structure" actual && From 94a4060bb28e4a15762103d7902c4d99e5896376 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 00:05:16 +0200 Subject: [PATCH 3/7] repo: report top-N paths by count, disk, and inflated size in structure `git survey` distinguishes itself from `git repo structure` largely by its path-level reporting: in addition to whole-repo totals it lists the paths whose object histories dominate the repository, ranked by raw count, on-disk size, and inflated size, separately for trees and blobs. That is often the most actionable output from `git survey`, since it points an operator at the directories and files that should be reviewed for cleanup, sparse-checkout exclusion, or rewriting. `git repo structure` already drives the same path-walk traversal that `git survey` uses to gather its per-path numbers; the callback simply discards the path. Aggregate per-(path, type) summaries inside that existing callback and add a bounded, descending-sorted "top-N" table keyed by each of the three axes. Gate the feature behind a new `--top=` option, defaulting to 0, so unadorned invocations are unaffected and pay no extra work for the top-N tracking. Mirror the sort and eviction strategy from `builtin/survey.c`: keep an array of at most N entries sorted from largest to smallest, walk it from the bottom on each candidate, and shift entries down when a new one belongs. Compared to `builtin/survey.c`, drop the void-pointer indirection in the table data, type the comparator's arguments, and fold the trivial comparators into the `(a > b) - (a < b)` idiom. For the human-readable `table` output, extend the existing nested bullet layout with two new top-level sections, `* Top trees` and `* Top blobs`, each containing three sub-tables (`Top by count`, `Top by disk size`, `Top by inflated size`). The path becomes the row name and the relevant scalar becomes the value, reusing `stats_table_count_addf` and `stats_table_size_addf` so units and column alignment match the rest of the table. For the `lines`/`nul` key-value formats, emit one `objects..top.by_..path=` entry alongside an `objects..top.by_..=` entry per ranked path, so consumers can dispatch by axis without parsing the schema. The root tree's path is the empty string as produced by the path-walk machinery; preserve that as-is to stay faithful to the upstream representation rather than fabricating a placeholder. This is the first piece of folding `git survey`'s functionality into `git repo structure`. Subsequent commits will add the corresponding configuration knob and, eventually, turn `git survey` into a thin deprecated shim over `git repo structure`. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Documentation/git-repo.adoc | 11 ++ builtin/repo.c | 245 +++++++++++++++++++++++++++++++++++- 2 files changed, 253 insertions(+), 3 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 280122cd7b963d..494fe555e92de3 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -65,6 +65,8 @@ supported: * Total inflated size of reachable objects by type * Total disk size of reachable objects by type * Largest reachable objects in the repository by type +* Optionally, the top-_N_ largest paths by count, on-disk size, and + inflated size (see `--top` below) + By default every reference enumerated by `for-each-ref` contributes to the counts and object walk. Use `--ref-filter` to narrow the scope. @@ -80,6 +82,15 @@ the counts and object walk. Use `--ref-filter` to narrow the scope. `--ref-filter='refs/tags/v*'` for release tags, `--ref-filter='refs/remotes/origin/*'` for a single remote's branches. + + `--top=`;; + Also report the _n_ largest paths in the repository, + separately for trees and blobs and separately ranked by + object count, on-disk size, and inflated size. Defaults + to `0`, which suppresses the detail tables. The default + can also be set via the `repo.structure.top` configuration + variable; an explicit `--top=` on the command line + overrides the configured value. + The output format can be chosen through the flag `--format`. Three formats are supported: diff --git a/builtin/repo.c b/builtin/repo.c index 646fbdcf9188c2..8b998133482015 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -266,6 +266,40 @@ struct largest_objects { struct object_data tree_entries; }; +/* + * Per-path summary of all objects that share a given (path, type) under the + * path-walk traversal: the count of objects, their on-disk size, and their + * inflated size. + */ +struct path_size_summary { + char *path; + size_t nr; + size_t disk_size; + size_t inflated_size; +}; + +typedef int (*path_summary_cmp)(const struct path_size_summary *, + const struct path_size_summary *); + +/* + * A bounded, descending-sorted list of the largest summaries seen so far, + * with a fixed comparison function defining "largest". New summaries are + * inserted with maybe_insert_into_top_paths(); smaller ones fall off the + * end of the list. + */ +struct top_paths_table { + path_summary_cmp cmp_fn; + size_t nr; + size_t alloc; + struct path_size_summary *data; +}; + +struct top_paths { + struct top_paths_table by_count; + struct top_paths_table by_disk; + struct top_paths_table by_inflated; +}; + struct ref_stats { size_t branches; size_t remotes; @@ -286,6 +320,8 @@ struct object_stats { struct object_values inflated_sizes; struct object_values disk_sizes; struct largest_objects largest; + struct top_paths top_trees; + struct top_paths top_blobs; }; struct repo_structure { @@ -530,6 +566,41 @@ static void stats_table_setup_structure(struct stats_table *table, " * %s", _("Maximum size")); } +static void stats_table_add_top_paths(struct stats_table *table, + const struct top_paths *top, + const char *header) +{ + if (!top->by_count.nr && !top->by_disk.nr && !top->by_inflated.nr) + return; + + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", header); + + stats_table_addf(table, " * %s", _("Top by count")); + for (size_t i = 0; i < top->by_count.nr; i++) + stats_table_count_addf(table, top->by_count.data[i].nr, + " * %s", top->by_count.data[i].path); + + stats_table_addf(table, " * %s", _("Top by disk size")); + for (size_t i = 0; i < top->by_disk.nr; i++) + stats_table_size_addf(table, top->by_disk.data[i].disk_size, + " * %s", top->by_disk.data[i].path); + + stats_table_addf(table, " * %s", _("Top by inflated size")); + for (size_t i = 0; i < top->by_inflated.nr; i++) + stats_table_size_addf(table, + top->by_inflated.data[i].inflated_size, + " * %s", + top->by_inflated.data[i].path); +} + +static void stats_table_setup_top_paths(struct stats_table *table, + struct object_stats *objects) +{ + stats_table_add_top_paths(table, &objects->top_trees, _("Top trees")); + stats_table_add_top_paths(table, &objects->top_blobs, _("Top blobs")); +} + #define INDEX_WIDTH 4 static void stats_table_print_structure(const struct stats_table *table) @@ -634,6 +705,49 @@ static void print_object_data(const char *key, char key_delim, value_delim); } +static void print_keyvalue_path(const char *key, char key_delim, + const char *path, char value_delim) +{ + printf("%s%c%s%c", key, key_delim, path, value_delim); +} + +static void top_paths_keyvalue_print(const char *prefix, + const struct top_paths *top, + char key_delim, char value_delim) +{ + for (size_t i = 0; i < top->by_count.nr; i++) { + printf("%s.by_count.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue_path("path", key_delim, + top->by_count.data[i].path, value_delim); + printf("%s.by_count.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue("count", key_delim, + top->by_count.data[i].nr, value_delim); + } + for (size_t i = 0; i < top->by_disk.nr; i++) { + printf("%s.by_disk_size.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue_path("path", key_delim, + top->by_disk.data[i].path, value_delim); + printf("%s.by_disk_size.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue("disk_size", key_delim, + top->by_disk.data[i].disk_size, value_delim); + } + for (size_t i = 0; i < top->by_inflated.nr; i++) { + printf("%s.by_inflated_size.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue_path("path", key_delim, + top->by_inflated.data[i].path, value_delim); + printf("%s.by_inflated_size.%" PRIuMAX ".", + prefix, (uintmax_t)(i + 1)); + print_keyvalue("inflated_size", key_delim, + top->by_inflated.data[i].inflated_size, + value_delim); + } +} + static void structure_keyvalue_print(struct repo_structure *stats, char key_delim, char value_delim) { @@ -689,6 +803,11 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_object_data("objects.trees.max_entries", key_delim, &stats->objects.largest.tree_entries, value_delim); + top_paths_keyvalue_print("objects.trees.top", &stats->objects.top_trees, + key_delim, value_delim); + top_paths_keyvalue_print("objects.blobs.top", &stats->objects.top_blobs, + key_delim, value_delim); + fflush(stdout); } @@ -777,8 +896,88 @@ struct count_objects_data { struct object_database *odb; struct object_stats *stats; struct progress *progress; + size_t top_nr; }; +static int cmp_by_nr(const struct path_size_summary *s1, + const struct path_size_summary *s2) +{ + return (s1->nr > s2->nr) - (s1->nr < s2->nr); +} + +static int cmp_by_disk_size(const struct path_size_summary *s1, + const struct path_size_summary *s2) +{ + return (s1->disk_size > s2->disk_size) - + (s1->disk_size < s2->disk_size); +} + +static int cmp_by_inflated_size(const struct path_size_summary *s1, + const struct path_size_summary *s2) +{ + return (s1->inflated_size > s2->inflated_size) - + (s1->inflated_size < s2->inflated_size); +} + +static void init_top_paths_table(struct top_paths_table *top, size_t limit, + path_summary_cmp cmp) +{ + top->cmp_fn = cmp; + top->alloc = limit; + top->nr = 0; + CALLOC_ARRAY(top->data, limit); +} + +static void init_top_paths(struct top_paths *top, size_t limit) +{ + init_top_paths_table(&top->by_count, limit, cmp_by_nr); + init_top_paths_table(&top->by_disk, limit, cmp_by_disk_size); + init_top_paths_table(&top->by_inflated, limit, cmp_by_inflated_size); +} + +static void clear_top_paths_table(struct top_paths_table *top) +{ + for (size_t i = 0; i < top->nr; i++) + free(top->data[i].path); + free(top->data); +} + +static void clear_top_paths(struct top_paths *top) +{ + clear_top_paths_table(&top->by_count); + clear_top_paths_table(&top->by_disk); + clear_top_paths_table(&top->by_inflated); +} + +/* + * Insert 'summary' into 'top' if it ranks among the top alloc entries by the + * table's comparator. The list is kept sorted from largest (index 0) to + * smallest. If the table is already full, the smallest entry is evicted to + * make room. + */ +static void maybe_insert_into_top_paths(struct top_paths_table *top, + const struct path_size_summary *summary) +{ + size_t pos = top->nr; + + while (pos > 0 && top->cmp_fn(&top->data[pos - 1], summary) < 0) + pos--; + + if (pos >= top->alloc) + return; + + if (top->nr == top->alloc) + free(top->data[top->nr - 1].path); + else + top->nr++; + + for (size_t i = top->nr - 1; i > pos; i--) + top->data[i] = top->data[i - 1]; + + top->data[pos] = *summary; + top->data[pos].path = xstrdup(summary->path); +} + static void check_largest(struct object_data *data, struct object_id *oid, size_t value) { @@ -802,11 +1001,12 @@ static size_t count_tree_entries(struct object *obj) return count; } -static int count_objects(const char *path UNUSED, struct oid_array *oids, +static int count_objects(const char *path, struct oid_array *oids, enum object_type type, void *cb_data) { struct count_objects_data *data = cb_data; struct object_stats *stats = data->stats; + struct path_size_summary summary = { .path = (char *)path }; size_t object_count; for (size_t i = 0; i < oids->nr; i++) { @@ -830,6 +1030,10 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, obj = parse_object_buffer(the_repository, &oids->oid[i], type, inflated, content, &eaten); + summary.nr++; + summary.disk_size += disk; + summary.inflated_size += inflated; + switch (type) { case OBJ_TAG: stats->type_counts.tags++; @@ -872,6 +1076,22 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, free(content); } + if (data->top_nr) { + struct top_paths *top = NULL; + + if (type == OBJ_TREE) + top = &stats->top_trees; + else if (type == OBJ_BLOB) + top = &stats->top_blobs; + + if (top) { + maybe_insert_into_top_paths(&top->by_count, &summary); + maybe_insert_into_top_paths(&top->by_disk, &summary); + maybe_insert_into_top_paths(&top->by_inflated, + &summary); + } + } + object_count = get_total_object_values(&stats->type_counts); display_progress(data->progress, object_count); @@ -880,12 +1100,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, static void structure_count_objects(struct object_stats *stats, struct rev_info *revs, - struct repository *repo, int show_progress) + struct repository *repo, size_t top_nr, + int show_progress) { struct path_walk_info info = PATH_WALK_INFO_INIT; struct count_objects_data data = { .odb = repo->objects, .stats = stats, + .top_nr = top_nr, }; info.revs = revs; @@ -911,6 +1133,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct repo_structure stats = { 0 }; struct rev_info revs; int show_progress = -1; + int top_nr = 0; struct string_list ref_filters = STRING_LIST_INIT_DUP; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), @@ -924,25 +1147,37 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, OPT_STRING_LIST(0, "ref-filter", &ref_filters, N_("pattern"), N_("only count refs matching ; " "repeat to union multiple patterns")), + OPT_INTEGER(0, "top", &top_nr, + N_("report the top largest paths " + "per category")), OPT_END() }; argc = parse_options(argc, argv, prefix, options, repo_structure_usage, 0); if (argc) usage(_("too many arguments")); + if (top_nr < 0) + die(_("--top= must be non-negative")); repo_init_revisions(repo, &revs, prefix); if (show_progress < 0) show_progress = isatty(2); + if (top_nr) { + init_top_paths(&stats.objects.top_trees, top_nr); + init_top_paths(&stats.objects.top_blobs, top_nr); + } + structure_count_references(&stats.refs, &revs, repo, &ref_filters, show_progress); - structure_count_objects(&stats.objects, &revs, repo, show_progress); + structure_count_objects(&stats.objects, &revs, repo, top_nr, + show_progress); switch (format) { case FORMAT_TABLE: stats_table_setup_structure(&table, &stats); + stats_table_setup_top_paths(&table, &stats.objects); stats_table_print_structure(&table); break; case FORMAT_NEWLINE_TERMINATED: @@ -957,6 +1192,10 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, stats_table_clear(&table); string_list_clear(&ref_filters, 0); + if (top_nr) { + clear_top_paths(&stats.objects.top_trees); + clear_top_paths(&stats.objects.top_blobs); + } release_revisions(&revs); return 0; From 7ac330ffc47a0005a93ec7addcb53856f42297bf Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 00:13:46 +0200 Subject: [PATCH 4/7] t1901: cover the --top option of `git repo structure` The preceding commit added `--top=` to `git repo structure`, reporting the top-N paths per type ranked by count, on-disk size, and inflated size. Cover the three behaviors that matter for that option: * Without `--top`, the key-value output emits no `top.*` keys, so existing parsers stay unaffected. * `--top=N` produces exactly N ranked entries on each of the six `objects..top.by_` axes (count/disk_size/inflated_size crossed with trees/blobs), and a constructed input where one blob is several orders of magnitude bigger than the other lets us assert the ordering on the disk-size and inflated-size axes. * A negative `--top` is rejected with a non-zero exit and a message naming the constraint, so a typo cannot silently degrade into the default zero. Avoid grep patterns starting with `--`; grep would parse the leading double dash as an option terminator. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- t/t1901-repo-structure.sh | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 9552f489d75b0d..1137a19a041d06 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -262,6 +262,56 @@ test_expect_success '--ref-filter unions multiple patterns' ' ) ' +test_expect_success '--top omitted: no top.* keys' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + + git repo structure --format=lines >out && + ! grep "\.top\." out + ) +' + +test_expect_success '--top=N reports the N largest paths per axis' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + mkdir -p dir1 dir2 && + echo small >dir1/small.txt && + printf "%010000d" 0 >dir2/big.txt && + git add . && + test_tick && + git commit -m commit && + + git repo structure --format=lines --top=2 >out && + + # Two ranked entries on each axis for both types. + for axis in by_count by_disk_size by_inflated_size + do + for type in trees blobs + do + key=objects.${type}.top.${axis} && + grep -E "^${key}\.1\.path=" out && + grep -E "^${key}\.2\.path=" out && + ! grep -E "^${key}\.3\." out || return 1 + done + done && + + # The big blob outranks the small one on disk and inflated. + key=objects.blobs.top && + grep "^${key}.by_disk_size.1.path=dir2/big.txt$" out && + grep "^${key}.by_inflated_size.1.path=dir2/big.txt$" out + ) +' + +test_expect_success '--top rejects negative values' ' + test_must_fail git repo structure --top=-1 2>err && + test_grep "must be non-negative" err +' + test_expect_success 'git repo structure -h shows only repo structure usage' ' test_must_fail git repo structure -h >actual && test_grep "git repo structure" actual && From 42da23870b421dd182667d2b517afdd59d7357eb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 00:23:37 +0200 Subject: [PATCH 5/7] repo: read the `--top` default from `repo.structure.top` `git survey` exposes its `--top` default via `survey.top` so that a site or per-repository operator can switch the detail tables on once and have every subsequent invocation include them. Mirror that ergonomics for `git repo structure` so that, as `git survey`'s functionality is folded into `git repo structure`, the configuration side of the migration story stays equivalent. Add a small `git_config_int` callback bound to `repo.structure.top` and invoke it before `parse_options()`, so a `--top=` on the command line cleanly overrides the configured default (including `--top=0` to opt out of the detail tables when configuration enables them). Reject negative configured values with the same wording as the command-line guard, since `git_config_int()` happily returns negative integers. Document the new variable in a fresh `Documentation/config/repo.adoc` and wire it into the alphabetical includes in `Documentation/config.adoc` between `repack.adoc` and `rerere.adoc`. Cover the precedence behaviour with a t1901 test: a configured value enables the tables by default, and a command-line `--top=0` suppresses them again. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Documentation/config.adoc | 2 ++ Documentation/config/repo.adoc | 11 +++++++++++ builtin/repo.c | 19 +++++++++++++++++++ t/t1901-repo-structure.sh | 17 +++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 Documentation/config/repo.adoc diff --git a/Documentation/config.adoc b/Documentation/config.adoc index bd7187c7b48e4b..2f6724a7af4fb4 100644 --- a/Documentation/config.adoc +++ b/Documentation/config.adoc @@ -511,6 +511,8 @@ include::config/remotes.adoc[] include::config/repack.adoc[] +include::config/repo.adoc[] + include::config/rerere.adoc[] include::config/revert.adoc[] diff --git a/Documentation/config/repo.adoc b/Documentation/config/repo.adoc new file mode 100644 index 00000000000000..7f8cd632965aab --- /dev/null +++ b/Documentation/config/repo.adoc @@ -0,0 +1,11 @@ +repo.structure.*:: + These variables adjust the default behavior of the + `git repo structure` command. ++ +-- + top:: + This integer value implies `--top=`, specifying the + number of largest paths to report in each detail table. + Must be non-negative; defaults to `0`, which disables the + detail tables. +-- diff --git a/builtin/repo.c b/builtin/repo.c index 8b998133482015..c094f33c778eee 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "commit.h" +#include "config.h" #include "environment.h" #include "hash.h" #include "hex.h" @@ -1122,6 +1123,22 @@ static void structure_count_objects(struct object_stats *stats, stop_progress(&data.progress); } +static int repo_structure_config_cb(const char *var, const char *value, + const struct config_context *cctx, + void *cb) +{ + int *top_nr = cb; + + if (!strcmp(var, "repo.structure.top")) { + *top_nr = git_config_int(var, value, cctx->kvi); + if (*top_nr < 0) + die(_("repo.structure.top must be non-negative")); + return 0; + } + + return 0; +} + static int cmd_repo_structure(int argc, const char **argv, const char *prefix, struct repository *repo) { @@ -1153,6 +1170,8 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, OPT_END() }; + repo_config(repo, repo_structure_config_cb, &top_nr); + argc = parse_options(argc, argv, prefix, options, repo_structure_usage, 0); if (argc) usage(_("too many arguments")); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 1137a19a041d06..ff1ea7ea3045b7 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -312,6 +312,23 @@ test_expect_success '--top rejects negative values' ' test_grep "must be non-negative" err ' +test_expect_success 'repo.structure.top supplies the default for --top' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit foo && + + git -c repo.structure.top=2 \ + repo structure --format=lines >with-config && + grep "^objects.blobs.top.by_count.1.path=" with-config && + + git -c repo.structure.top=2 \ + repo structure --format=lines --top=0 >cli-override && + ! grep "\.top\." cli-override + ) +' + test_expect_success 'git repo structure -h shows only repo structure usage' ' test_must_fail git repo structure -h >actual && test_grep "git repo structure" actual && From 75e707673af7e12a704d12e3f94b954a831d83f2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 01:31:22 +0200 Subject: [PATCH 6/7] git-survey: announce the upcoming pivot into `git repo structure` `git survey` started life as an experimental scale-measurement tool; the preceding commits give `git repo structure` the path-level detail tables and ref-scoping mechanism that were `git survey`'s main draw, so the two now overlap substantially. Plan the migration explicitly: add a short notice at the top of the description making clear which of `git survey`'s knobs map to which `git repo structure` option, and state that a future release will turn `git survey` into a thin shim over `git repo structure`. Putting the notice in the description (rather than only the synopsis) ensures it shows up in `git help survey` rendering before the reader sees any option specifics, so an operator skimming the page learns about the replacement before adopting any survey-specific flags. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Documentation/git-survey.adoc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/git-survey.adoc b/Documentation/git-survey.adoc index 44f3a0568b7697..cbd13c5551a8ac 100644 --- a/Documentation/git-survey.adoc +++ b/Documentation/git-survey.adoc @@ -8,11 +8,18 @@ git-survey - EXPERIMENTAL: Measure various repository dimensions of scale SYNOPSIS -------- [verse] -(EXPERIMENTAL!) 'git survey' +(DEPRECATED!) 'git survey' DESCRIPTION ----------- +NOTE: `git survey` is being superseded by `git repo structure`. New +deployments and new features should use `git repo structure`; its +`--ref-filter=` option subsumes the various `--branches`, +`--tags`, and `--remotes` flags here, and `--top=` provides the +same detail tables. A future release will turn `git survey` into a +thin shim over `git repo structure`. See linkgit:git-repo[1]. + Survey the repository and measure various dimensions of scale. As repositories grow to "monorepo" size, certain data shapes can cause From 46e1492ad5c97d49aa5aad31892f337658445086 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jun 2026 01:46:57 +0200 Subject: [PATCH 7/7] survey: turn into a thin shim over `git repo structure` `git survey` was an experimental scale-measurement tool whose distinctive features (ref-kind filters, top-N path tables) are now all available in `git repo structure`. With the path-level reporting in place (commits "repo: filter the structure scope via --ref-filter=" and "repo: report top-N paths by count, disk, and inflated size in structure"), there is no functionality `git survey` provides that `git repo structure` cannot. Replace the 764-line `git survey` implementation with a roughly hundred-line shim that: * Accepts the existing `git survey` command line so callers in scripts continue to parse without changes. * Emits a deprecation warning naming the replacement command, so interactive users learn about the migration target. * Translates the survey-specific knobs into the equivalent `git repo structure` invocation and re-execs the canonical command via `execv_git_cmd()`. Per-kind ref selectors fan out into the corresponding `refs/heads/*`, `refs/tags/*`, etc. `--ref-filter` patterns; `--top=` is forwarded directly; `--all-refs` becomes the absence of any `--ref-filter`. Two survey options have no `git repo structure` counterpart: `--verbose` controlled per-step trace output the new command does not emit, and `--[no-]detached` selected the detached HEAD which `git repo structure` does not enumerate separately. Both are silently accepted and produce a single warning each, so old invocations keep working while the absence of these knobs in `git repo structure` is made visible. Rewrite t8100 to assert the shim's contract: the deprecation warning is printed, the output is byte-identical to a corresponding `git repo structure` invocation, and the per-kind selector translation produces the right `--ref-filter` pattern. The preceding survey-specific output assertions (the multi-column plaintext tables) no longer apply, since `git repo structure`'s output format is now the canonical one and is covered by t1901. The `survey.*` configuration keys (`survey.top`, `survey.progress`, `survey.verbose`) are no longer honored by the shim. They were mirrored by the preceding `repo.structure.top` work for the most useful knob; users with `survey.top` set in config should migrate to `repo.structure.top`. This is a backward-incompatible removal documented by the deprecation notice in `git-survey.adoc`. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- builtin/survey.c | 1007 ++++------------------------------------- t/t8100-git-survey.sh | 98 +--- 2 files changed, 114 insertions(+), 991 deletions(-) diff --git a/builtin/survey.c b/builtin/survey.c index f40905fb2fd57a..b979fcb7b7a722 100644 --- a/builtin/survey.c +++ b/builtin/survey.c @@ -1,934 +1,113 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" -#include "config.h" -#include "environment.h" -#include "hex.h" -#include "object.h" -#include "odb.h" -#include "object-name.h" +#include "color.h" +#include "exec-cmd.h" +#include "gettext.h" #include "parse-options.h" -#include "path-walk.h" -#include "progress.h" -#include "ref-filter.h" -#include "refs.h" -#include "revision.h" -#include "strbuf.h" #include "strvec.h" -#include "tag.h" -#include "trace2.h" -#include "color.h" static const char * const survey_usage[] = { - N_("(EXPERIMENTAL!) git survey "), + N_("(DEPRECATED!) git survey "), NULL, }; -struct survey_refs_wanted { - int want_all_refs; /* special override */ - - int want_branches; - int want_tags; - int want_remotes; - int want_detached; - int want_other; /* see FILTER_REFS_OTHERS -- refs/notes/, refs/stash/ */ -}; - -static struct survey_refs_wanted default_ref_options = { - .want_all_refs = 1, -}; - -struct survey_opts { - int verbose; - int show_progress; - int top_nr; - struct survey_refs_wanted refs; -}; - -struct survey_report_ref_summary { - size_t refs_nr; - size_t branches_nr; - size_t remote_refs_nr; - size_t tags_nr; - size_t tags_annotated_nr; - size_t others_nr; - size_t unknown_nr; -}; - -struct survey_report_object_summary { - size_t commits_nr; - size_t tags_nr; - size_t trees_nr; - size_t blobs_nr; -}; - -/** - * For some category given by 'label', count the number of objects - * that match that label along with the on-disk size and the size - * after decompressing (both with delta bases and zlib). - */ -struct survey_report_object_size_summary { - char *label; - size_t nr; - size_t disk_size; - size_t inflated_size; - size_t num_missing; -}; - -typedef int (*survey_top_cmp)(void *v1, void *v2); - -static int cmp_by_nr(void *v1, void *v2) -{ - struct survey_report_object_size_summary *s1 = v1; - struct survey_report_object_size_summary *s2 = v2; - - if (s1->nr < s2->nr) - return -1; - if (s1->nr > s2->nr) - return 1; - return 0; -} - -static int cmp_by_disk_size(void *v1, void *v2) -{ - struct survey_report_object_size_summary *s1 = v1; - struct survey_report_object_size_summary *s2 = v2; - - if (s1->disk_size < s2->disk_size) - return -1; - if (s1->disk_size > s2->disk_size) - return 1; - return 0; -} - -static int cmp_by_inflated_size(void *v1, void *v2) -{ - struct survey_report_object_size_summary *s1 = v1; - struct survey_report_object_size_summary *s2 = v2; - - if (s1->inflated_size < s2->inflated_size) - return -1; - if (s1->inflated_size > s2->inflated_size) - return 1; - return 0; -} - -/** - * Store a list of "top" categories by some sorting function. When - * inserting a new category, reorder the list and free the one that - * got ejected (if any). - */ -struct survey_report_top_table { - const char *name; - survey_top_cmp cmp_fn; - size_t nr; - size_t alloc; - - /** - * 'data' stores an array of structs and must be cast into - * the proper array type before evaluating an index. - */ - void *data; -}; - -static void init_top_sizes(struct survey_report_top_table *top, - size_t limit, const char *name, - survey_top_cmp cmp) -{ - struct survey_report_object_size_summary *sz_array; - - top->name = name; - top->cmp_fn = cmp; - top->alloc = limit; - top->nr = 0; - - CALLOC_ARRAY(sz_array, limit); - top->data = sz_array; -} - -MAYBE_UNUSED -static void clear_top_sizes(struct survey_report_top_table *top) -{ - struct survey_report_object_size_summary *sz_array = top->data; - - for (size_t i = 0; i < top->nr; i++) - free(sz_array[i].label); - free(sz_array); -} - -static void maybe_insert_into_top_size(struct survey_report_top_table *top, - struct survey_report_object_size_summary *summary) -{ - struct survey_report_object_size_summary *sz_array = top->data; - size_t pos = top->nr; - - /* Compare against list from the bottom. */ - while (pos > 0 && top->cmp_fn(&sz_array[pos - 1], summary) < 0) - pos--; - - /* Not big enough! */ - if (pos >= top->alloc) - return; - - /* We need to shift the data. */ - if (top->nr == top->alloc) - free(sz_array[top->nr - 1].label); - else - top->nr++; - - for (size_t i = top->nr - 1; i > pos; i--) - memcpy(&sz_array[i], &sz_array[i - 1], sizeof(*sz_array)); - - memcpy(&sz_array[pos], summary, sizeof(*summary)); - sz_array[pos].label = xstrdup(summary->label); -} - -/** - * This struct contains all of the information that needs to be printed - * at the end of the exploration of the repository and its references. - */ -struct survey_report { - struct survey_report_ref_summary refs; - struct survey_report_object_summary reachable_objects; - - struct survey_report_object_size_summary *by_type; - - struct survey_report_top_table *top_paths_by_count; - struct survey_report_top_table *top_paths_by_disk; - struct survey_report_top_table *top_paths_by_inflate; -}; - -#define REPORT_TYPE_COMMIT 0 -#define REPORT_TYPE_TREE 1 -#define REPORT_TYPE_BLOB 2 -#define REPORT_TYPE_TAG 3 -#define REPORT_TYPE_COUNT 4 - -struct survey_context { - struct repository *repo; - - /* Options that control what is done. */ - struct survey_opts opts; - - /* Info for output only. */ - struct survey_report report; - - /* - * The rest of the members are about enabling the activity - * of the 'git survey' command, including ref listings, object - * pointers, and progress. - */ - - struct progress *progress; - size_t progress_nr; - size_t progress_total; - - struct strvec refs; - struct ref_array ref_array; -}; - -static void clear_survey_context(struct survey_context *ctx) -{ - ref_array_clear(&ctx->ref_array); - strvec_clear(&ctx->refs); -} - -struct survey_table { - const char *table_name; - struct strvec header; - struct strvec *rows; - size_t rows_nr; - size_t rows_alloc; -}; - -#define SURVEY_TABLE_INIT { \ - .header = STRVEC_INIT, \ -} - -static void clear_table(struct survey_table *table) -{ - strvec_clear(&table->header); - for (size_t i = 0; i < table->rows_nr; i++) - strvec_clear(&table->rows[i]); - free(table->rows); -} - -static void insert_table_rowv(struct survey_table *table, ...) -{ - va_list ap; - char *arg; - ALLOC_GROW(table->rows, table->rows_nr + 1, table->rows_alloc); - - memset(&table->rows[table->rows_nr], 0, sizeof(struct strvec)); - - va_start(ap, table); - while ((arg = va_arg(ap, char *))) - strvec_push(&table->rows[table->rows_nr], arg); - va_end(ap); - - table->rows_nr++; -} - -#define SECTION_SEGMENT "========================================" -#define SECTION_SEGMENT_LEN 40 -static const char *section_line = SECTION_SEGMENT - SECTION_SEGMENT - SECTION_SEGMENT - SECTION_SEGMENT; -static const size_t section_len = 4 * SECTION_SEGMENT_LEN; - -static void print_table_title(const char *name, size_t *widths, size_t nr) -{ - size_t width = 3 * (nr - 1); - size_t min_width = strlen(name); - - for (size_t i = 0; i < nr; i++) - width += widths[i]; - - if (width < min_width) - width = min_width; - - if (width > section_len) - width = section_len; - - printf("\n%s\n%.*s\n", name, (int)width, section_line); -} - -static void print_row_plaintext(struct strvec *row, size_t *widths) -{ - static struct strbuf line = STRBUF_INIT; - strbuf_setlen(&line, 0); - - for (size_t i = 0; i < row->nr; i++) { - const char *str = row->v[i]; - size_t len = strlen(str); - if (i) - strbuf_add(&line, " | ", 3); - strbuf_addchars(&line, ' ', widths[i] - len); - strbuf_add(&line, str, len); - } - printf("%s\n", line.buf); -} - -static void print_divider_plaintext(size_t *widths, size_t nr) -{ - static struct strbuf line = STRBUF_INIT; - strbuf_setlen(&line, 0); - - for (size_t i = 0; i < nr; i++) { - if (i) - strbuf_add(&line, "-+-", 3); - strbuf_addchars(&line, '-', widths[i]); - } - printf("%s\n", line.buf); -} - -static void print_table_plaintext(struct survey_table *table) -{ - size_t *column_widths; - size_t columns_nr = table->header.nr; - CALLOC_ARRAY(column_widths, columns_nr); - - for (size_t i = 0; i < columns_nr; i++) { - column_widths[i] = strlen(table->header.v[i]); - - for (size_t j = 0; j < table->rows_nr; j++) { - size_t rowlen = strlen(table->rows[j].v[i]); - if (column_widths[i] < rowlen) - column_widths[i] = rowlen; - } - } - - print_table_title(table->table_name, column_widths, columns_nr); - print_row_plaintext(&table->header, column_widths); - print_divider_plaintext(column_widths, columns_nr); - - for (size_t j = 0; j < table->rows_nr; j++) - print_row_plaintext(&table->rows[j], column_widths); - - free(column_widths); -} - -static void survey_report_plaintext_refs(struct survey_context *ctx) -{ - struct survey_report_ref_summary *refs = &ctx->report.refs; - struct survey_table table = SURVEY_TABLE_INIT; - - table.table_name = _("REFERENCES SUMMARY"); - - strvec_push(&table.header, _("Ref Type")); - strvec_push(&table.header, _("Count")); - - if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_branches) { - char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->branches_nr); - insert_table_rowv(&table, _("Branches"), fmt, NULL); - free(fmt); - } - - if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_remotes) { - char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->remote_refs_nr); - insert_table_rowv(&table, _("Remote refs"), fmt, NULL); - free(fmt); - } - - if (ctx->opts.refs.want_all_refs || ctx->opts.refs.want_tags) { - char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->tags_nr); - insert_table_rowv(&table, _("Tags (all)"), fmt, NULL); - free(fmt); - fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->tags_annotated_nr); - insert_table_rowv(&table, _("Tags (annotated)"), fmt, NULL); - free(fmt); - } - - print_table_plaintext(&table); - clear_table(&table); -} - -static void survey_report_plaintext_reachable_object_summary(struct survey_context *ctx) -{ - struct survey_report_object_summary *objs = &ctx->report.reachable_objects; - struct survey_table table = SURVEY_TABLE_INIT; - char *fmt; - - table.table_name = _("REACHABLE OBJECT SUMMARY"); - - strvec_push(&table.header, _("Object Type")); - strvec_push(&table.header, _("Count")); - - fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)objs->tags_nr); - insert_table_rowv(&table, _("Tags"), fmt, NULL); - free(fmt); - - fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)objs->commits_nr); - insert_table_rowv(&table, _("Commits"), fmt, NULL); - free(fmt); - - fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)objs->trees_nr); - insert_table_rowv(&table, _("Trees"), fmt, NULL); - free(fmt); - - fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)objs->blobs_nr); - insert_table_rowv(&table, _("Blobs"), fmt, NULL); - free(fmt); - - print_table_plaintext(&table); - clear_table(&table); -} - -static void survey_report_object_sizes(const char *title, - const char *categories, - struct survey_report_object_size_summary *summary, - size_t summary_nr) -{ - struct survey_table table = SURVEY_TABLE_INIT; - table.table_name = title; - - strvec_push(&table.header, categories); - strvec_push(&table.header, _("Count")); - strvec_push(&table.header, _("Disk Size")); - strvec_push(&table.header, _("Inflated Size")); - - for (size_t i = 0; i < summary_nr; i++) { - char *label_str = xstrdup(summary[i].label); - char *nr_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].nr); - char *disk_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].disk_size); - char *inflate_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].inflated_size); - - insert_table_rowv(&table, label_str, nr_str, - disk_str, inflate_str, NULL); - - free(label_str); - free(nr_str); - free(disk_str); - free(inflate_str); - } - - print_table_plaintext(&table); - clear_table(&table); -} - -static void survey_report_plaintext_sorted_size( - struct survey_report_top_table *top) -{ - survey_report_object_sizes(top->name, _("Path"), - top->data, top->nr); -} - -static void survey_report_plaintext(struct survey_context *ctx) -{ - printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree); - printf("-----------------------------------------------------\n"); - survey_report_plaintext_refs(ctx); - survey_report_plaintext_reachable_object_summary(ctx); - survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"), - _("Object Type"), - ctx->report.by_type, - REPORT_TYPE_COUNT); - - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_count[REPORT_TYPE_TREE]); - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_count[REPORT_TYPE_BLOB]); - - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_disk[REPORT_TYPE_TREE]); - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB]); - - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE]); - survey_report_plaintext_sorted_size( - &ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB]); -} - /* - * After parsing the command line arguments, figure out which refs we - * should scan. - * - * If ANY were given in positive sense, then we ONLY include them and - * do not use the builtin values. + * `git survey` has been superseded by `git repo structure`. To keep + * older callers working while the migration completes, accept the + * `git survey` command line, translate the options into the + * equivalent `git repo structure` invocation, and re-exec. */ -static void fixup_refs_wanted(struct survey_context *ctx) -{ - struct survey_refs_wanted *rw = &ctx->opts.refs; - - /* - * `--all-refs` overrides and enables everything. - */ - if (rw->want_all_refs == 1) { - rw->want_branches = 1; - rw->want_tags = 1; - rw->want_remotes = 1; - rw->want_detached = 1; - rw->want_other = 1; - return; - } +int cmd_survey(int argc, const char **argv, const char *prefix, + struct repository *repo UNUSED) +{ + int verbose = 0; + int show_progress = -1; + int top_nr = 10; + int want_all_refs = -1; + int want_branches = -1; + int want_tags = -1; + int want_remotes = -1; + int want_detached = -1; + int want_other = -1; + struct strvec child_argv = STRVEC_INIT; + struct option options[] = { + OPT__VERBOSE(&verbose, N_("verbose output (ignored)")), + OPT_BOOL(0, "progress", &show_progress, N_("show progress")), + OPT_INTEGER('n', "top", &top_nr, + N_("number of entries to include in " + "detail tables")), + OPT_BOOL_F(0, "all-refs", &want_all_refs, + N_("include all refs"), PARSE_OPT_NONEG), + OPT_BOOL_F(0, "branches", &want_branches, + N_("include branches"), PARSE_OPT_NONEG), + OPT_BOOL_F(0, "tags", &want_tags, + N_("include tags"), PARSE_OPT_NONEG), + OPT_BOOL_F(0, "remotes", &want_remotes, + N_("include remote refs"), PARSE_OPT_NONEG), + OPT_BOOL_F(0, "detached", &want_detached, + N_("include detached HEAD (ignored)"), + PARSE_OPT_NONEG), + OPT_BOOL_F(0, "other", &want_other, + N_("include notes and stashes"), PARSE_OPT_NONEG), + OPT_END(), + }; - /* - * If none of the `--` were given, we assume all - * of the builtin unspecified values. - */ - if (rw->want_branches == -1 && - rw->want_tags == -1 && - rw->want_remotes == -1 && - rw->want_detached == -1 && - rw->want_other == -1) { - *rw = default_ref_options; - return; - } + argc = parse_options(argc, argv, prefix, options, survey_usage, 0); + if (argc) + usage(_("'git survey' takes no positional arguments")); + + warning(_("'git survey' is deprecated; " + "use 'git repo structure' instead")); + if (verbose) + warning(_("--verbose is ignored by 'git repo structure'")); + if (want_detached != -1) + warning(_("--[no-]detached is ignored by " + "'git repo structure'")); + + strvec_pushl(&child_argv, "repo", "structure", NULL); + if (show_progress == 1) + strvec_push(&child_argv, "--progress"); + else if (show_progress == 0) + strvec_push(&child_argv, "--no-progress"); + if (top_nr > 0) + strvec_pushf(&child_argv, "--top=%d", top_nr); /* - * Since we only allow positive boolean values on the command - * line, we will only have true values where they specified - * a `--`. - * - * So anything that still has an unspecified value should be - * set to false. + * Survey's default ref scope is branches+tags+remotes (not "other"). + * `--all-refs` widens to literally everything; the per-kind flags + * select specific subsets. `git repo structure` defaults to all + * refs and accepts a repeatable --ref-filter=, so the + * translation is straightforward. */ - if (rw->want_branches == -1) - rw->want_branches = 0; - if (rw->want_tags == -1) - rw->want_tags = 0; - if (rw->want_remotes == -1) - rw->want_remotes = 0; - if (rw->want_detached == -1) - rw->want_detached = 0; - if (rw->want_other == -1) - rw->want_other = 0; -} - -static int survey_load_config_cb(const char *var, const char *value, - const struct config_context *cctx, void *pvoid) -{ - struct survey_context *ctx = pvoid; - - if (!strcmp(var, "survey.verbose")) { - ctx->opts.verbose = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "survey.progress")) { - ctx->opts.show_progress = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "survey.top")) { - ctx->opts.top_nr = git_config_bool(var, value); - return 0; - } - - return git_default_config(var, value, cctx, pvoid); -} - -static void survey_load_config(struct survey_context *ctx) -{ - repo_config(the_repository, survey_load_config_cb, ctx); -} - -static void do_load_refs(struct survey_context *ctx, - struct ref_array *ref_array) -{ - struct ref_filter filter = REF_FILTER_INIT; - struct ref_sorting *sorting; - struct string_list sorting_options = STRING_LIST_INIT_DUP; - - string_list_append(&sorting_options, "objectname"); - sorting = ref_sorting_options(&sorting_options); - - if (ctx->opts.refs.want_detached) - strvec_push(&ctx->refs, "HEAD"); - - if (ctx->opts.refs.want_all_refs) { - strvec_push(&ctx->refs, "refs/"); - } else { - if (ctx->opts.refs.want_branches) - strvec_push(&ctx->refs, "refs/heads/"); - if (ctx->opts.refs.want_tags) - strvec_push(&ctx->refs, "refs/tags/"); - if (ctx->opts.refs.want_remotes) - strvec_push(&ctx->refs, "refs/remotes/"); - if (ctx->opts.refs.want_other) { - strvec_push(&ctx->refs, "refs/notes/"); - strvec_push(&ctx->refs, "refs/stash/"); - } - } - - filter.name_patterns = ctx->refs.v; - filter.ignore_case = 0; - filter.match_as_path = 1; - - if (ctx->opts.show_progress) { - ctx->progress_total = 0; - ctx->progress = start_progress(ctx->repo, - _("Scanning refs..."), 0); - } - - filter_refs(ref_array, &filter, FILTER_REFS_KIND_MASK); - - if (ctx->opts.show_progress) { - ctx->progress_total = ref_array->nr; - display_progress(ctx->progress, ctx->progress_total); - } - - ref_array_sort(sorting, ref_array); - - stop_progress(&ctx->progress); - ref_filter_clear(&filter); - ref_sorting_release(sorting); -} - -/* - * The REFS phase: - * - * Load the set of requested refs and assess them for scalablity problems. - * Use that set to start a treewalk to all reachable objects and assess - * them. - * - * This data will give us insights into the repository itself (the number - * of refs, the size and shape of the DAG, the number and size of the - * objects). - * - * Theoretically, this data is independent of the on-disk representation - * (e.g. independent of packing concerns). - */ -static void survey_phase_refs(struct survey_context *ctx) -{ - trace2_region_enter("survey", "phase/refs", ctx->repo); - do_load_refs(ctx, &ctx->ref_array); - - ctx->report.refs.refs_nr = ctx->ref_array.nr; - for (int i = 0; i < ctx->ref_array.nr; i++) { - unsigned long size; - struct ref_array_item *item = ctx->ref_array.items[i]; - - switch (item->kind) { - case FILTER_REFS_TAGS: - ctx->report.refs.tags_nr++; - if (odb_read_object_info(ctx->repo->objects, - &item->objectname, - &size) == OBJ_TAG) - ctx->report.refs.tags_annotated_nr++; - break; - - case FILTER_REFS_BRANCHES: - ctx->report.refs.branches_nr++; - break; - - case FILTER_REFS_REMOTES: - ctx->report.refs.remote_refs_nr++; - break; - - case FILTER_REFS_OTHERS: - ctx->report.refs.others_nr++; - break; - - default: - ctx->report.refs.unknown_nr++; - break; - } - } - - trace2_region_leave("survey", "phase/refs", ctx->repo); -} - -static void increment_object_counts( - struct survey_report_object_summary *summary, - enum object_type type, - size_t nr) -{ - switch (type) { - case OBJ_COMMIT: - summary->commits_nr += nr; - break; - - case OBJ_TREE: - summary->trees_nr += nr; - break; - - case OBJ_BLOB: - summary->blobs_nr += nr; - break; - - case OBJ_TAG: - summary->tags_nr += nr; - break; - - default: - break; - } -} - -static void increment_totals(struct survey_context *ctx, - struct oid_array *oids, - struct survey_report_object_size_summary *summary) -{ - for (size_t i = 0; i < oids->nr; i++) { - struct object_info oi = OBJECT_INFO_INIT; - unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH; - unsigned long object_length = 0; - off_t disk_sizep = 0; - enum object_type type; - - oi.typep = &type; - oi.sizep = &object_length; - oi.disk_sizep = &disk_sizep; - - if (odb_read_object_info_extended(ctx->repo->objects, - &oids->oid[i], - &oi, oi_flags) < 0) { - summary->num_missing++; - } else { - summary->nr++; - summary->disk_size += disk_sizep; - summary->inflated_size += object_length; + if (want_all_refs != 1) { + int branches = want_branches == 1; + int tags = want_tags == 1; + int remotes = want_remotes == 1; + int other = want_other == 1; + + if (!branches && !tags && !remotes && !other) + branches = tags = remotes = 1; + + if (branches) + strvec_push(&child_argv, + "--ref-filter=refs/heads/*"); + if (tags) + strvec_push(&child_argv, + "--ref-filter=refs/tags/*"); + if (remotes) + strvec_push(&child_argv, + "--ref-filter=refs/remotes/*"); + if (other) { + strvec_push(&child_argv, + "--ref-filter=refs/notes/*"); + strvec_push(&child_argv, + "--ref-filter=refs/stash"); } } -} - -static void increment_object_totals(struct survey_context *ctx, - struct oid_array *oids, - enum object_type type, - const char *path) -{ - struct survey_report_object_size_summary *total; - struct survey_report_object_size_summary summary = { 0 }; - - increment_totals(ctx, oids, &summary); - - switch (type) { - case OBJ_COMMIT: - total = &ctx->report.by_type[REPORT_TYPE_COMMIT]; - break; - - case OBJ_TREE: - total = &ctx->report.by_type[REPORT_TYPE_TREE]; - break; - - case OBJ_BLOB: - total = &ctx->report.by_type[REPORT_TYPE_BLOB]; - break; - - case OBJ_TAG: - total = &ctx->report.by_type[REPORT_TYPE_TAG]; - break; - - default: - BUG("No other type allowed"); - } - - total->nr += summary.nr; - total->disk_size += summary.disk_size; - total->inflated_size += summary.inflated_size; - total->num_missing += summary.num_missing; - - if (type == OBJ_TREE || type == OBJ_BLOB) { - int index = type == OBJ_TREE ? - REPORT_TYPE_TREE : REPORT_TYPE_BLOB; - struct survey_report_top_table *top; - - /* - * Temporarily store (const char *) here, but it will - * be duped if inserted and will not be freed. - */ - summary.label = (char *)path; - - top = ctx->report.top_paths_by_count; - maybe_insert_into_top_size(&top[index], &summary); - - top = ctx->report.top_paths_by_disk; - maybe_insert_into_top_size(&top[index], &summary); - - top = ctx->report.top_paths_by_inflate; - maybe_insert_into_top_size(&top[index], &summary); - } -} - -static int survey_objects_path_walk_fn(const char *path, - struct oid_array *oids, - enum object_type type, - void *data) -{ - struct survey_context *ctx = data; - - increment_object_counts(&ctx->report.reachable_objects, - type, oids->nr); - increment_object_totals(ctx, oids, type, path); - - ctx->progress_nr += oids->nr; - display_progress(ctx->progress, ctx->progress_nr); - - return 0; -} - -static void initialize_report(struct survey_context *ctx) -{ - CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT); - ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits")); - ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees")); - ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs")); - ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags")); - - CALLOC_ARRAY(ctx->report.top_paths_by_count, REPORT_TYPE_COUNT); - init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_TREE], - ctx->opts.top_nr, _("TOP DIRECTORIES BY COUNT"), cmp_by_nr); - init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB], - ctx->opts.top_nr, _("TOP FILES BY COUNT"), cmp_by_nr); - - CALLOC_ARRAY(ctx->report.top_paths_by_disk, REPORT_TYPE_COUNT); - init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE], - ctx->opts.top_nr, _("TOP DIRECTORIES BY DISK SIZE"), cmp_by_disk_size); - init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB], - ctx->opts.top_nr, _("TOP FILES BY DISK SIZE"), cmp_by_disk_size); - - CALLOC_ARRAY(ctx->report.top_paths_by_inflate, REPORT_TYPE_COUNT); - init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE], - ctx->opts.top_nr, _("TOP DIRECTORIES BY INFLATED SIZE"), cmp_by_inflated_size); - init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB], - ctx->opts.top_nr, _("TOP FILES BY INFLATED SIZE"), cmp_by_inflated_size); -} - -static void survey_phase_objects(struct survey_context *ctx) -{ - struct rev_info revs = REV_INFO_INIT; - struct path_walk_info info = PATH_WALK_INFO_INIT; - unsigned int add_flags = 0; - - trace2_region_enter("survey", "phase/objects", ctx->repo); - - info.revs = &revs; - info.path_fn = survey_objects_path_walk_fn; - info.path_fn_data = ctx; - - initialize_report(ctx); - - repo_init_revisions(ctx->repo, &revs, ""); - revs.tag_objects = 1; - - ctx->progress_nr = 0; - ctx->progress_total = ctx->ref_array.nr; - if (ctx->opts.show_progress) - ctx->progress = start_progress(ctx->repo, - _("Preparing object walk"), - ctx->progress_total); - for (int i = 0; i < ctx->ref_array.nr; i++) { - struct ref_array_item *item = ctx->ref_array.items[i]; - add_pending_oid(&revs, NULL, &item->objectname, add_flags); - display_progress(ctx->progress, ++(ctx->progress_nr)); - } - stop_progress(&ctx->progress); - - ctx->progress_nr = 0; - ctx->progress_total = 0; - if (ctx->opts.show_progress) - ctx->progress = start_progress(ctx->repo, - _("Walking objects"), 0); - walk_objects_by_path(&info); - stop_progress(&ctx->progress); - - release_revisions(&revs); - trace2_region_leave("survey", "phase/objects", ctx->repo); -} - -int cmd_survey(int argc, const char **argv, const char *prefix, struct repository *repo) -{ - static struct survey_context ctx = { - .opts = { - .verbose = 0, - .show_progress = -1, /* defaults to isatty(2) */ - .top_nr = 10, - - .refs.want_all_refs = -1, - - .refs.want_branches = -1, /* default these to undefined */ - .refs.want_tags = -1, - .refs.want_remotes = -1, - .refs.want_detached = -1, - .refs.want_other = -1, - }, - .refs = STRVEC_INIT, - }; - - static struct option survey_options[] = { - OPT__VERBOSE(&ctx.opts.verbose, N_("verbose output")), - OPT_BOOL(0, "progress", &ctx.opts.show_progress, N_("show progress")), - OPT_INTEGER('n', "top", &ctx.opts.top_nr, - N_("number of entries to include in detail tables")), - - OPT_BOOL_F(0, "all-refs", &ctx.opts.refs.want_all_refs, N_("include all refs"), PARSE_OPT_NONEG), - - OPT_BOOL_F(0, "branches", &ctx.opts.refs.want_branches, N_("include branches"), PARSE_OPT_NONEG), - OPT_BOOL_F(0, "tags", &ctx.opts.refs.want_tags, N_("include tags"), PARSE_OPT_NONEG), - OPT_BOOL_F(0, "remotes", &ctx.opts.refs.want_remotes, N_("include all remotes refs"), PARSE_OPT_NONEG), - OPT_BOOL_F(0, "detached", &ctx.opts.refs.want_detached, N_("include detached HEAD"), PARSE_OPT_NONEG), - OPT_BOOL_F(0, "other", &ctx.opts.refs.want_other, N_("include notes and stashes"), PARSE_OPT_NONEG), - - OPT_END(), - }; - - show_usage_with_options_if_asked(argc, argv, - survey_usage, survey_options); - - if (isatty(2)) - color_fprintf_ln(stderr, - want_color_fd(2, GIT_COLOR_AUTO) ? GIT_COLOR_YELLOW : "", - "(THIS IS EXPERIMENTAL, EXPECT THE OUTPUT FORMAT TO CHANGE!)"); - - ctx.repo = repo; - - prepare_repo_settings(ctx.repo); - survey_load_config(&ctx); - - argc = parse_options(argc, argv, prefix, survey_options, survey_usage, 0); - - if (ctx.opts.show_progress < 0) - ctx.opts.show_progress = isatty(2); - - fixup_refs_wanted(&ctx); - - survey_phase_refs(&ctx); - - survey_phase_objects(&ctx); - - survey_report_plaintext(&ctx); - clear_survey_context(&ctx); - return 0; + execv_git_cmd(child_argv.v); + /* unreachable: execv_git_cmd dies on failure */ + strvec_clear(&child_argv); + return 1; } diff --git a/t/t8100-git-survey.sh b/t/t8100-git-survey.sh index 1ba48cc47e1b35..dec9d81ec65c69 100755 --- a/t/t8100-git-survey.sh +++ b/t/t8100-git-survey.sh @@ -1,6 +1,6 @@ #!/bin/sh -test_description='git survey' +test_description='git survey (deprecated shim over `git repo structure`)' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME @@ -10,9 +10,9 @@ export TEST_PASSES_SANITIZE_LEAK . ./test-lib.sh -test_expect_success 'git survey -h shows experimental warning' ' +test_expect_success 'git survey -h shows the deprecated banner' ' test_expect_code 129 git survey -h >usage && - grep "EXPERIMENTAL!" usage + grep "DEPRECATED!" usage ' test_expect_success 'create a semi-interesting repo' ' @@ -25,84 +25,28 @@ test_expect_success 'create a semi-interesting repo' ' git update-ref -d refs/tags/two ' -test_expect_success 'git survey --progress' ' - GIT_PROGRESS_DELAY=0 git survey --all-refs --progress >out 2>err && - grep "Preparing object walk" err -' - -approximate_sizes() { - # very simplistic approximate rounding - sed -Ee "s/ *(1[0-9][0-9])( |$)/ ~0.1kB\2/g" \ - -e "s/ *(4[6-9][0-9]|5[0-6][0-9])( |$)/ ~0.5kB\2/g" \ - -e "s/ *(5[6-9][0-9]|6[0-6][0-9])( |$)/ ~0.6kB\2/g" \ - -e "s/ *1(4[89][0-9]|5[0-8][0-9])( |$)/ ~1.5kB\2/g" \ - -e "s/ *1(69[0-9]|7[0-9][0-9])( |$)/ ~1.7kB\2/g" \ - -e "s/ *1(79[0-9]|8[0-9][0-9])( |$)/ ~1.8kB\2/g" \ - -e "s/ *2(1[0-9][0-9]|20[0-1])( |$)/ ~2.1kB\2/g" \ - -e "s/ *2(3[0-9][0-9]|4[0-1][0-9])( |$)/ ~2.3kB\2/g" \ - -e "s/ *2(5[0-9][0-9]|6[0-1][0-9])( |$)/ ~2.5kB\2/g" \ - "$@" -} - -test_expect_success 'git survey (default)' ' +test_expect_success 'survey prints a deprecation warning' ' git survey --all-refs >out 2>err && - test_line_count = 0 err && - - test_oid_cache <<-EOF && - commits_sizes sha1:~1.5kB | ~2.1kB - commits_sizes sha256:~1.8kB | ~2.5kB - trees_sizes sha1:~0.5kB | ~1.7kB - trees_sizes sha256:~0.6kB | ~2.3kB - blobs_sizes sha1:~0.1kB | ~0.1kB - blobs_sizes sha256:~0.1kB | ~0.1kB - tags_sizes sha1:~0.5kB | ~0.5kB - tags_sizes sha256:~0.5kB | ~0.6kB - EOF - - tr , " " >expect <<-EOF && - GIT SURVEY for "$(pwd)" - ----------------------------------------------------- - - REFERENCES SUMMARY - ======================== - , Ref Type | Count - -----------------+------ - , Branches | 1 - Remote refs | 0 - Tags (all) | 2 - Tags (annotated) | 2 - - REACHABLE OBJECT SUMMARY - ======================== - Object Type | Count - ------------+------ - Tags | 4 - Commits | 10 - Trees | 10 - Blobs | 10 + grep "is deprecated" err +' - TOTAL OBJECT SIZES BY TYPE - =============================================== - Object Type | Count | Disk Size | Inflated Size - ------------+-------+-----------+-------------- - Commits | 10 | $(test_oid commits_sizes) - Trees | 10 | $(test_oid trees_sizes) - Blobs | 10 | $(test_oid blobs_sizes) - Tags | 4 | $(test_oid tags_sizes) - EOF +test_expect_success 'survey forwards to git repo structure' ' + git survey --all-refs >survey-out 2>survey-err && + git repo structure --top=10 >structure-out 2>structure-err && + test_cmp structure-out survey-out +' - approximate_sizes out >out-edited && - lines=$(wc -l out-trimmed && - test_cmp expect out-trimmed && +test_expect_success 'survey --top is translated' ' + git survey --top=3 --all-refs >out && + git repo structure --top=3 >expected && + test_cmp expected out +' - for type in "DIRECTORIES" "FILES" - do - for metric in "COUNT" "DISK SIZE" "INFLATED SIZE" - do - grep "TOP $type BY $metric" out || return 1 - done || return 1 - done +test_expect_success 'survey --branches translates to a refs/heads/* filter' ' + git survey --branches >out && + git repo structure --top=10 \ + --ref-filter="refs/heads/*" >expected && + test_cmp expected out ' test_done