1package git
2
3import (
4 "io"
5 "sort"
6
7 "github.com/go-git/go-git/v5/plumbing"
8 "github.com/go-git/go-git/v5/plumbing/object"
9 "github.com/go-git/go-git/v5/utils/diff"
10
11 "github.com/sergi/go-diff/diffmatchpatch"
12)
13
14// References returns a slice of Commits for the file at "path", starting from
15// the commit provided that contains the file from the provided path. The last
16// commit into the returned slice is the commit where the file was created.
17// If the provided commit does not contains the specified path, a nil slice is
18// returned. The commits are sorted in commit order, newer to older.
19//
20// Caveats:
21//
22// - Moves and copies are not currently supported.
23//
24// - Cherry-picks are not detected unless there are no commits between them and
25// therefore can appear repeated in the list. (see git path-id for hints on how
26// to fix this).
27func references(c *object.Commit, path string) ([]*object.Commit, error) {
28 var result []*object.Commit
29 seen := make(map[plumbing.Hash]struct{})
30 if err := walkGraph(&result, &seen, c, path); err != nil {
31 return nil, err
32 }
33
34 // TODO result should be returned without ordering
35 sortCommits(result)
36
37 // for merges of identical cherry-picks
38 return removeComp(path, result, equivalent)
39}
40
41type commitSorterer struct {
42 l []*object.Commit
43}
44
45func (s commitSorterer) Len() int {
46 return len(s.l)
47}
48
49func (s commitSorterer) Less(i, j int) bool {
50 return s.l[i].Committer.When.Before(s.l[j].Committer.When) ||
51 s.l[i].Committer.When.Equal(s.l[j].Committer.When) &&
52 s.l[i].Author.When.Before(s.l[j].Author.When)
53}
54
55func (s commitSorterer) Swap(i, j int) {
56 s.l[i], s.l[j] = s.l[j], s.l[i]
57}
58
59// SortCommits sorts a commit list by commit date, from older to newer.
60func sortCommits(l []*object.Commit) {
61 s := &commitSorterer{l}
62 sort.Sort(s)
63}
64
65// Recursive traversal of the commit graph, generating a linear history of the
66// path.
67func walkGraph(result *[]*object.Commit, seen *map[plumbing.Hash]struct{}, current *object.Commit, path string) error {
68 // check and update seen
69 if _, ok := (*seen)[current.Hash]; ok {
70 return nil
71 }
72 (*seen)[current.Hash] = struct{}{}
73
74 // if the path is not in the current commit, stop searching.
75 if _, err := current.File(path); err != nil {
76 return nil
77 }
78
79 // optimization: don't traverse branches that does not
80 // contain the path.
81 parents, err := parentsContainingPath(path, current)
82 if err != nil {
83 return err
84 }
85 switch len(parents) {
86 // if the path is not found in any of its parents, the path was
87 // created by this commit; we must add it to the revisions list and
88 // stop searching. This includes the case when current is the
89 // initial commit.
90 case 0:
91 *result = append(*result, current)
92 return nil
93 case 1: // only one parent contains the path
94 // if the file contents has change, add the current commit
95 different, err := differentContents(path, current, parents)
96 if err != nil {
97 return err
98 }
99 if len(different) == 1 {
100 *result = append(*result, current)
101 }
102 // in any case, walk the parent
103 return walkGraph(result, seen, parents[0], path)
104 default: // more than one parent contains the path
105 // TODO: detect merges that had a conflict, because they must be
106 // included in the result here.
107 for _, p := range parents {
108 err := walkGraph(result, seen, p, path)
109 if err != nil {
110 return err
111 }
112 }
113 }
114 return nil
115}
116
117func parentsContainingPath(path string, c *object.Commit) ([]*object.Commit, error) {
118 // TODO: benchmark this method making git.object.Commit.parent public instead of using
119 // an iterator
120 var result []*object.Commit
121 iter := c.Parents()
122 for {
123 parent, err := iter.Next()
124 if err == io.EOF {
125 return result, nil
126 }
127 if err != nil {
128 return nil, err
129 }
130 if _, err := parent.File(path); err == nil {
131 result = append(result, parent)
132 }
133 }
134}
135
136// Returns an slice of the commits in "cs" that has the file "path", but with different
137// contents than what can be found in "c".
138func differentContents(path string, c *object.Commit, cs []*object.Commit) ([]*object.Commit, error) {
139 result := make([]*object.Commit, 0, len(cs))
140 h, found := blobHash(path, c)
141 if !found {
142 return nil, object.ErrFileNotFound
143 }
144 for _, cx := range cs {
145 if hx, found := blobHash(path, cx); found && h != hx {
146 result = append(result, cx)
147 }
148 }
149 return result, nil
150}
151
152// blobHash returns the hash of a path in a commit
153func blobHash(path string, commit *object.Commit) (hash plumbing.Hash, found bool) {
154 file, err := commit.File(path)
155 if err != nil {
156 var empty plumbing.Hash
157 return empty, found
158 }
159 return file.Hash, true
160}
161
162type contentsComparatorFn func(path string, a, b *object.Commit) (bool, error)
163
164// Returns a new slice of commits, with duplicates removed. Expects a
165// sorted commit list. Duplication is defined according to "comp". It
166// will always keep the first commit of a series of duplicated commits.
167func removeComp(path string, cs []*object.Commit, comp contentsComparatorFn) ([]*object.Commit, error) {
168 result := make([]*object.Commit, 0, len(cs))
169 if len(cs) == 0 {
170 return result, nil
171 }
172 result = append(result, cs[0])
173 for i := 1; i < len(cs); i++ {
174 equals, err := comp(path, cs[i], cs[i-1])
175 if err != nil {
176 return nil, err
177 }
178 if !equals {
179 result = append(result, cs[i])
180 }
181 }
182 return result, nil
183}
184
185// Equivalent commits are commits whose patch is the same.
186func equivalent(path string, a, b *object.Commit) (bool, error) {
187 numParentsA := a.NumParents()
188 numParentsB := b.NumParents()
189
190 // the first commit is not equivalent to anyone
191 // and "I think" merges can not be equivalent to anything
192 if numParentsA != 1 || numParentsB != 1 {
193 return false, nil
194 }
195
196 diffsA, err := patch(a, path)
197 if err != nil {
198 return false, err
199 }
200 diffsB, err := patch(b, path)
201 if err != nil {
202 return false, err
203 }
204
205 return sameDiffs(diffsA, diffsB), nil
206}
207
208func patch(c *object.Commit, path string) ([]diffmatchpatch.Diff, error) {
209 // get contents of the file in the commit
210 file, err := c.File(path)
211 if err != nil {
212 return nil, err
213 }
214 content, err := file.Contents()
215 if err != nil {
216 return nil, err
217 }
218
219 // get contents of the file in the first parent of the commit
220 var contentParent string
221 iter := c.Parents()
222 parent, err := iter.Next()
223 if err != nil {
224 return nil, err
225 }
226 file, err = parent.File(path)
227 if err != nil {
228 contentParent = ""
229 } else {
230 contentParent, err = file.Contents()
231 if err != nil {
232 return nil, err
233 }
234 }
235
236 // compare the contents of parent and child
237 return diff.Do(content, contentParent), nil
238}
239
240func sameDiffs(a, b []diffmatchpatch.Diff) bool {
241 if len(a) != len(b) {
242 return false
243 }
244 for i := range a {
245 if !sameDiff(a[i], b[i]) {
246 return false
247 }
248 }
249 return true
250}
251
252func sameDiff(a, b diffmatchpatch.Diff) bool {
253 if a.Type != b.Type {
254 return false
255 }
256 switch a.Type {
257 case 0:
258 return countLines(a.Text) == countLines(b.Text)
259 case 1, -1:
260 return a.Text == b.Text
261 default:
262 panic("unreachable")
263 }
264}