git-mono split tests ===================== Setup: configure git and disable colors $ export NO_COLOR=1 $ export GIT_AUTHOR_NAME="Test User" $ export GIT_AUTHOR_EMAIL="test@example.com" $ export GIT_AUTHOR_DATE="2025-01-01T00:00:00+00:00" $ export GIT_COMMITTER_NAME="Test User" $ export GIT_COMMITTER_EMAIL="test@example.com" $ export GIT_COMMITTER_DATE="2025-01-01T00:00:00+00:00" Help and version ----------------- $ git-mono --version 4.0.0 Error: missing PREFIX argument ------------------------------- $ git-mono split 2>&1 Usage: git-mono split [--help] [OPTION]… PREFIX git-mono: required argument PREFIX is missing [124] Error: not a git repository ---------------------------- $ mkdir empty && cd empty $ git-mono split lib 2>&1 error: Could not resolve revision 'HEAD'. git-mono: could not resolve revision 'HEAD' [124] $ cd .. Single commit with prefix -------------------------- $ mkdir repo1 && cd repo1 $ git init -q $ mkdir lib $ echo "hello" > lib/foo.ml $ git add . && git commit -q -m "add lib/foo.ml" $ HASH=$(git-mono split lib) $ test -n "$HASH" && echo "got hash" got hash Verify the split commit has lib/ contents at root $ git ls-tree "$HASH" --name-only foo.ml $ git show "$HASH:foo.ml" hello $ cd .. Linear history -------------- $ mkdir repo2 && cd repo2 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ echo "v3" > lib/a.ml $ git add . && git commit -q -m "third" $ HASH=$(git-mono split lib) The split commit chain should have 3 commits $ git rev-list "$HASH" | wc -l | tr -d ' ' 3 Each split commit message matches the original $ git log --format="%s" "$HASH" third second first The latest tree should have a.ml with content "v3" $ git show "$HASH:a.ml" v3 $ cd .. No commits touch prefix ------------------------ $ mkdir repo3 && cd repo3 $ git init -q $ echo "root" > README.md $ git add . && git commit -q -m "root only" $ git-mono split nonexistent 2>&1 git-mono: [WARNING] No commits touch prefix 'nonexistent'. $ cd .. Cache: second run returns same hash ------------------------------------- $ mkdir repo4 && cd repo4 $ git init -q $ mkdir lib $ echo "cached" > lib/x.ml $ git add . && git commit -q -m "initial" $ HASH1=$(git-mono split lib) $ HASH2=$(git-mono split lib) $ test "$HASH1" = "$HASH2" && echo "cache hit: same hash" cache hit: same hash Verify cache file exists $ test -f .git/subtree-cache/lib && echo "cache file exists" cache file exists $ cd .. Incremental split ----------------- $ mkdir repo5 && cd repo5 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ HASH1=$(git-mono split lib) Add another commit $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ HASH2=$(git-mono split lib) Hashes differ $ test "$HASH1" != "$HASH2" && echo "hashes differ" hashes differ New split has 2 commits, not 1 $ git rev-list "$HASH2" | wc -l | tr -d ' ' 2 The parent of the new split head is the old split head $ PARENT=$(git rev-parse "$HASH2^") $ test "$PARENT" = "$HASH1" && echo "parent matches" parent matches $ cd .. Merge commits ------------- $ mkdir repo6 && cd repo6 $ git init -q $ mkdir lib $ echo "base" > lib/a.ml $ git add . && git commit -q -m "base" Create a branch and make changes $ git checkout -q -b feature $ echo "feature" > lib/b.ml $ git add . && git commit -q -m "add b.ml on feature" Go back to main and make a different change $ git checkout -q main 2>/dev/null || git checkout -q master $ echo "main" > lib/c.ml $ git add . && git commit -q -m "add c.ml on main" Merge $ git merge -q --no-edit feature Split should produce commits with merge parents $ HASH=$(git-mono split lib) $ git rev-list "$HASH" | wc -l | tr -d ' ' 4 The head commit should be a merge (2 parents) $ git cat-file -p "$HASH" | grep "^parent" | wc -l | tr -d ' ' 2 $ cd .. Nested prefix -------------- $ mkdir repo7 && cd repo7 $ git init -q $ mkdir -p src/lib $ echo "deep" > src/lib/deep.ml $ git add . && git commit -q -m "nested" $ HASH=$(git-mono split src/lib) $ git ls-tree "$HASH" --name-only deep.ml $ cd .. Commits that don't touch the prefix are skipped ------------------------------------------------- $ mkdir repo8 && cd repo8 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "touch lib" $ echo "unrelated" > README.md $ git add . && git commit -q -m "touch root only" $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "touch lib again" $ HASH=$(git-mono split lib) Only 2 split commits (the one touching root only is squashed since subtree didn't change) $ git rev-list "$HASH" | wc -l | tr -d ' ' 2 Messages are preserved from the commits that touch lib $ git log --format="%s" "$HASH" touch lib again touch lib $ cd .. Split from a specific revision ------------------------------- $ mkdir repo9 && cd repo9 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ FIRST=$(git rev-parse HEAD) $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ HASH_HEAD=$(git-mono split lib) $ HASH_FIRST=$(git-mono split --rev "$FIRST" lib) The rev-specific split should have fewer commits $ git rev-list "$HASH_FIRST" | wc -l | tr -d ' ' 1 $ git rev-list "$HASH_HEAD" | wc -l | tr -d ' ' 2 $ cd .. Split from a branch name ------------------------- $ mkdir repo10 && cd repo10 $ git init -q $ mkdir lib $ echo "main" > lib/a.ml $ git add . && git commit -q -m "on main" $ git checkout -q -b other $ echo "other" > lib/b.ml $ git add . && git commit -q -m "on other" $ git checkout -q main 2>/dev/null || git checkout -q master $ HASH=$(git-mono split --rev other lib) $ git show "$HASH:b.ml" other $ cd .. Bad revision ------------ $ mkdir repo11 && cd repo11 $ git init -q $ git-mono split --rev nonexistent lib 2>&1 error: Could not resolve revision 'nonexistent'. git-mono: could not resolve revision 'nonexistent' [124] $ cd .. Quiet mode (-q) suppresses warnings ------------------------------------- $ mkdir repo12 && cd repo12 $ git init -q $ echo "root" > README.md $ git add . && git commit -q -m "root only" $ git-mono split -q nonexistent 2>&1 $ cd .. Verbose mode shows info messages --------------------------------- $ mkdir repo13 && cd repo13 $ git init -q $ mkdir lib $ echo "hello" > lib/a.ml $ git add . && git commit -q -m "initial" $ git-mono split -v lib 2>&1 | grep -c "Splitting prefix" 1 $ cd .. Multiple prefixes use separate caches --------------------------------------- $ mkdir repo14 && cd repo14 $ git init -q $ mkdir lib bin $ echo "lib" > lib/a.ml $ echo "bin" > bin/main.ml $ git add . && git commit -q -m "initial" $ HASH_LIB=$(git-mono split lib) $ HASH_BIN=$(git-mono split bin) Different hashes (different content) $ test "$HASH_LIB" != "$HASH_BIN" && echo "different hashes" different hashes Separate cache files $ test -f .git/subtree-cache/lib && echo "lib cache exists" lib cache exists $ test -f .git/subtree-cache/bin && echo "bin cache exists" bin cache exists $ cd .. Author and committer are preserved ------------------------------------ $ mkdir repo15 && cd repo15 $ git init -q $ mkdir lib $ echo "hello" > lib/a.ml $ git add . $ GIT_AUTHOR_NAME="Alice Author" GIT_AUTHOR_EMAIL="alice@example.com" \ > GIT_COMMITTER_NAME="Bob Committer" GIT_COMMITTER_EMAIL="bob@example.com" \ > git commit -q -m "authored commit" $ HASH=$(git-mono split lib) $ git cat-file -p "$HASH" | grep "^author " | sed 's/ [0-9]* [+-][0-9]*//' author Alice Author $ git cat-file -p "$HASH" | grep "^committer " | sed 's/ [0-9]* [+-][0-9]*//' committer Bob Committer $ cd .. Dirty working tree does not affect split ------------------------------------------ $ mkdir repo16 && cd repo16 $ git init -q $ mkdir lib $ echo "committed" > lib/a.ml $ git add . && git commit -q -m "initial" $ HASH1=$(git-mono split lib) Make dirty changes (not committed) $ echo "dirty" > lib/a.ml $ echo "new file" > lib/b.ml Split should still work and return same hash (reads object store, not working tree) $ HASH2=$(git-mono split lib) $ test "$HASH1" = "$HASH2" && echo "dirty tree ignored" dirty tree ignored $ cd .. Using -C to specify repository path -------------------------------------- $ mkdir repo17 && cd repo17 $ git init -q $ mkdir lib $ echo "remote" > lib/a.ml $ git add . && git commit -q -m "initial" $ cd .. $ HASH=$(git-mono split -C repo17 lib) $ test -n "$HASH" && echo "got hash via -C" got hash via -C Large tree: split only extracts the right subtree --------------------------------------------------- $ mkdir repo19 && cd repo19 $ git init -q $ mkdir -p lib bin doc test $ echo "lib" > lib/a.ml $ echo "bin" > bin/main.ml $ echo "doc" > doc/README.md $ echo "test" > test/test.ml $ git add . && git commit -q -m "initial" $ HASH=$(git-mono split lib) $ git ls-tree "$HASH" --name-only a.ml $ cd .. Commit message is preserved ----------------------------- $ mkdir repo20 && cd repo20 $ git init -q $ mkdir lib $ echo "hello" > lib/a.ml $ git add . $ git commit -q -m "Detailed commit message" $ HASH=$(git-mono split lib) $ git log -1 --format="%s" "$HASH" Detailed commit message $ cd .. Prefix appearing partway through history ------------------------------------------ $ mkdir repo21 && cd repo21 $ git init -q $ echo "root" > README.md $ git add . && git commit -q -m "no lib yet" $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "add lib" $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "update lib" $ HASH=$(git-mono split lib) Only 2 commits in split (the "no lib yet" commit is skipped) $ git rev-list "$HASH" | wc -l | tr -d ' ' 2 $ git log --format="%s" "$HASH" update lib add lib $ cd .. Multiple files in prefix ------------------------- $ mkdir repo22 && cd repo22 $ git init -q $ mkdir lib $ echo "a" > lib/a.ml $ echo "b" > lib/b.ml $ echo "c" > lib/c.ml $ git add . && git commit -q -m "initial" $ HASH=$(git-mono split lib) $ git ls-tree "$HASH" --name-only | sort a.ml b.ml c.ml $ cd .. Nested directories in prefix ------------------------------- $ mkdir repo23 && cd repo23 $ git init -q $ mkdir -p lib/src lib/test $ echo "code" > lib/src/main.ml $ echo "test" > lib/test/test.ml $ git add . && git commit -q -m "initial" $ HASH=$(git-mono split lib) $ git ls-tree "$HASH" --name-only src test $ git ls-tree "$HASH:src" --name-only main.ml $ cd .. Cache file format: pairs of hex hashes ---------------------------------------- $ mkdir repo24 && cd repo24 $ git init -q $ mkdir lib $ echo "hello" > lib/a.ml $ git add . && git commit -q -m "initial" $ git-mono split lib > /dev/null $ wc -l < .git/subtree-cache/lib | tr -d ' ' 1 $ awk '{ print length($1), length($2) }' .git/subtree-cache/lib 40 40 $ cd .. Second split with same tree but different message creates new commit --------------------------------------------------------------------- $ mkdir repo25 && cd repo25 $ git init -q $ mkdir lib $ echo "same" > lib/a.ml $ git add . && git commit -q -m "commit A" $ echo "different" > lib/a.ml $ git add . && git commit -q -m "commit B" $ echo "same" > lib/a.ml $ git add . && git commit -q -m "commit C (same tree as A)" $ HASH=$(git-mono split lib) All 3 commits are present because the tree changes between adjacent parents $ git rev-list "$HASH" | wc -l | tr -d ' ' 3 $ cd .. Idempotency: running split multiple times produces identical results ---------------------------------------------------------------------- $ mkdir repo26 && cd repo26 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ echo "v3" > lib/a.ml $ git add . && git commit -q -m "third" First split (cold cache) $ HASH1=$(git-mono split lib) Second split (warm cache) $ HASH2=$(git-mono split lib) Third split (still warm cache) $ HASH3=$(git-mono split lib) All three runs produce the same hash $ test "$HASH1" = "$HASH2" && test "$HASH2" = "$HASH3" && echo "idempotent" idempotent $ cd .. Incremental: add commits one at a time ---------------------------------------- $ mkdir repo27 && cd repo27 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ HASH_A=$(git-mono split lib) $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ HASH_B=$(git-mono split lib) $ echo "v3" > lib/a.ml $ git add . && git commit -q -m "third" $ HASH_C=$(git-mono split lib) Each incremental split extends the chain $ git rev-list "$HASH_A" | wc -l | tr -d ' ' 1 $ git rev-list "$HASH_B" | wc -l | tr -d ' ' 2 $ git rev-list "$HASH_C" | wc -l | tr -d ' ' 3 The chain is consistent: each split head's parent is the previous split head $ test "$(git rev-parse "$HASH_B^")" = "$HASH_A" && echo "B parent = A" B parent = A $ test "$(git rev-parse "$HASH_C^")" = "$HASH_B" && echo "C parent = B" C parent = B All split commits are reachable from the latest $ git merge-base --is-ancestor "$HASH_A" "$HASH_C" && echo "A is ancestor of C" A is ancestor of C $ cd .. Bidirectional incremental: split two branches independently ------------------------------------------------------------ $ mkdir repo28 && cd repo28 $ git init -q $ mkdir lib $ echo "base" > lib/a.ml $ git add . && git commit -q -m "base commit" $ HASH_BASE=$(git-mono split lib) Create two branches from the same base $ git checkout -q -b branch-x $ echo "x" > lib/x.ml $ git add . && git commit -q -m "add x" $ HASH_X=$(git-mono split --rev branch-x lib) $ git checkout -q main 2>/dev/null || git checkout -q master $ echo "y" > lib/y.ml $ git add . && git commit -q -m "add y" $ HASH_Y=$(git-mono split lib) Both branches share the same split base $ test "$(git rev-parse "$HASH_X^")" = "$HASH_BASE" && echo "X parent = base" X parent = base $ test "$(git rev-parse "$HASH_Y^")" = "$HASH_BASE" && echo "Y parent = base" Y parent = base Now merge and split again $ git merge -q --no-edit branch-x $ HASH_MERGE=$(git-mono split lib) The merge split has both branch splits as parents $ git cat-file -p "$HASH_MERGE" | grep "^parent" | wc -l | tr -d ' ' 2 The merge split is a descendant of both branch splits $ git merge-base --is-ancestor "$HASH_X" "$HASH_MERGE" && echo "X ancestor of merge" X ancestor of merge $ git merge-base --is-ancestor "$HASH_Y" "$HASH_MERGE" && echo "Y ancestor of merge" Y ancestor of merge $ cd .. Incremental after cache: only new commits are processed --------------------------------------------------------- $ mkdir repo29 && cd repo29 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ git-mono split lib > /dev/null Cache should have 2 entries $ wc -l < .git/subtree-cache/lib | tr -d ' ' 2 Add a new commit and re-split $ echo "v3" > lib/a.ml $ git add . && git commit -q -m "third" $ git-mono split lib > /dev/null Cache should now have 3 entries (only 1 new entry added) $ wc -l < .git/subtree-cache/lib | tr -d ' ' 3 $ cd .. Gap commits: split bridges over commits that don't contain the prefix ----------------------------------------------------------------------- Some workflows (e.g. irmin auto-commits) can insert commits whose tree only contains a single subtree or is even empty. The split must bridge over these "gap" commits and maintain parent chain continuity. $ mkdir repo30 && cd repo30 $ git init -q $ mkdir -p lib bin $ echo "v1" > lib/a.ml $ echo "v1" > bin/main.ml $ git add . && git commit -q -m "initial: lib + bin" $ HASH1=$(git-mono split lib) Verify initial split has 1 commit $ git rev-list "$HASH1" | wc -l | tr -d ' ' 1 Now simulate a gap: create a commit with an empty tree (like an irmin "Initial commit") followed by a commit with only bin/ content. These represent commits that got into the main branch but don't have lib/. $ EMPTY_TREE=$(git hash-object -t tree /dev/null) $ GAP1=$(git commit-tree "$EMPTY_TREE" -p HEAD -m "gap: empty tree") $ git reset -q "$GAP1" Create another gap commit with only bin/ content (no lib/) $ mkdir -p /tmp/gap-work $ echo "bin-only" > /tmp/gap-work/main.ml $ BIN_BLOB=$(git hash-object -w /tmp/gap-work/main.ml) $ BIN_TREE=$(printf "100644 blob %s\tmain.ml\n" "$BIN_BLOB" | git mktree) $ TOP_TREE=$(printf "040000 tree %s\tbin\n" "$BIN_TREE" | git mktree) $ GAP2=$(git commit-tree "$TOP_TREE" -p HEAD -m "gap: only bin") $ git reset -q "$GAP2" Now add a normal commit that has lib/ again $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "restore lib with v2" $ HASH2=$(git-mono split lib) The split should have 2 commits (bridging over the 2 gap commits) $ git rev-list "$HASH2" | wc -l | tr -d ' ' 2 The parent chain should be connected: HASH1 is an ancestor of HASH2 $ git merge-base --is-ancestor "$HASH1" "$HASH2" && echo "parent chain preserved" parent chain preserved $ git log --format="%s" "$HASH2" restore lib with v2 initial: lib + bin Verify the cache correctly handles the gap commits $ HASH3=$(git-mono split lib) $ test "$HASH2" = "$HASH3" && echo "cache hit after gap" cache hit after gap $ cd .. Multiple gap commits in sequence ---------------------------------- $ mkdir repo31 && cd repo31 $ git init -q $ mkdir -p lib bin $ echo "v1" > lib/a.ml $ echo "v1" > bin/main.ml $ git add . && git commit -q -m "first with lib" $ HASH1=$(git-mono split lib) Create 3 gap commits in a row (none have lib/) $ EMPTY_TREE=$(git hash-object -t tree /dev/null) $ GAP1=$(git commit-tree "$EMPTY_TREE" -p HEAD -m "gap 1") $ GAP2=$(git commit-tree "$EMPTY_TREE" -p "$GAP1" -m "gap 2") $ GAP3=$(git commit-tree "$EMPTY_TREE" -p "$GAP2" -m "gap 3") $ git reset -q "$GAP3" Restore lib/ after 3 gaps $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "restore lib after 3 gaps" $ HASH2=$(git-mono split lib) Split should bridge over all 3 gap commits $ git rev-list "$HASH2" | wc -l | tr -d ' ' 2 $ git merge-base --is-ancestor "$HASH1" "$HASH2" && echo "bridged 3 gaps" bridged 3 gaps $ cd .. Verify repairs orphaned cache entries --------------------------------------- $ mkdir repo32 && cd repo32 $ git init -q $ mkdir lib $ echo "v1" > lib/a.ml $ git add . && git commit -q -m "first" $ HASH1=$(git-mono split lib) $ echo "v2" > lib/a.ml $ git add . && git commit -q -m "second" $ MONO_HEAD=$(git rev-parse HEAD) Corrupt the cache: add an orphaned split commit (no parents) for HEAD $ ORPHAN_TREE=$(git rev-parse "$HASH1^{tree}") $ ORPHAN=$(git commit-tree "$ORPHAN_TREE" -m "orphaned split") $ printf "%s %s\n" "$MONO_HEAD" "$ORPHAN" >> .git/subtree-cache/lib Split should detect the orphan, clear cache, and rebuild with proper parents $ HASH2=$(git-mono split lib) $ git rev-list "$HASH2" | wc -l | tr -d ' ' 2 $ git merge-base --is-ancestor "$HASH1" "$HASH2" && echo "ancestry restored" ancestry restored $ cd ..