1// Copyright 2019 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package diff
16
17import (
18 "cuelang.org/go/cue"
19)
20
21// Profile configures a diff operation.
22type Profile struct {
23 Concrete bool
24
25 // Hidden fields are only useful to compare when a values are from the same
26 // package.
27 SkipHidden bool
28
29 // TODO: Use this method instead of SkipHidden. To do this, we need to have
30 // access the package associated with a hidden field, which is only
31 // accessible through the Iterator API. And we should probably get rid of
32 // the cue.Struct API.
33 //
34 // HiddenPkg compares hidden fields for the package if this is not the empty
35 // string. Use "_" for the anonymous package.
36 // HiddenPkg string
37}
38
39var (
40 // Schema is the standard profile used for comparing schema.
41 Schema = &Profile{}
42
43 // Final is the standard profile for comparing data.
44 Final = &Profile{
45 Concrete: true,
46 }
47)
48
49// TODO: don't return Kind, which is always Modified or not.
50
51// Diff is a shorthand for Schema.Diff.
52func Diff(x, y cue.Value) (Kind, *EditScript) {
53 return Schema.Diff(x, y)
54}
55
56// Diff returns an edit script representing the difference between x and y.
57func (p *Profile) Diff(x, y cue.Value) (Kind, *EditScript) {
58 d := differ{cfg: *p}
59 k, es := d.diffValue(x, y)
60 if k == Modified && es == nil {
61 es = &EditScript{X: x, Y: y}
62 }
63 return k, es
64}
65
66// Kind identifies the kind of operation of an edit script.
67type Kind uint8
68
69const (
70 // Identity indicates that a value pair is identical in both list X and Y.
71 Identity Kind = iota
72 // UniqueX indicates that a value only exists in X and not Y.
73 UniqueX
74 // UniqueY indicates that a value only exists in Y and not X.
75 UniqueY
76 // Modified indicates that a value pair is a modification of each other.
77 Modified
78)
79
80// EditScript represents the series of differences between two CUE values.
81// x and y must be either both list or struct.
82type EditScript struct {
83 X, Y cue.Value
84 Edits []Edit
85}
86
87// Edit represents a single operation within an edit-script.
88type Edit struct {
89 Kind Kind
90 XSel cue.Selector // valid if UniqueY
91 YSel cue.Selector // valid if UniqueX
92 Sub *EditScript // non-nil if Modified
93}
94
95type differ struct {
96 cfg Profile
97}
98
99func (d *differ) diffValue(x, y cue.Value) (Kind, *EditScript) {
100 if d.cfg.Concrete {
101 x, _ = x.Default()
102 y, _ = y.Default()
103 }
104 if x.IncompleteKind() != y.IncompleteKind() {
105 return Modified, nil
106 }
107
108 switch xc, yc := x.IsConcrete(), y.IsConcrete(); {
109 case xc != yc:
110 return Modified, nil
111
112 case xc && yc:
113 switch k := x.Kind(); k {
114 case cue.StructKind:
115 return d.diffStruct(x, y)
116
117 case cue.ListKind:
118 return d.diffList(x, y)
119 }
120 fallthrough
121
122 default:
123 // In concrete mode we do not care about non-concrete values.
124 if d.cfg.Concrete {
125 return Identity, nil
126 }
127
128 if !x.Equals(y) {
129 return Modified, nil
130 }
131 }
132
133 return Identity, nil
134}
135
136type field struct {
137 sel cue.Selector
138 val cue.Value
139}
140
141// TODO(mvdan): use slices.Collect once we swap cue.Iterator for a Go iterator
142func (d *differ) collectFields(v cue.Value) []field {
143 iter, _ := v.Fields(cue.Hidden(!d.cfg.SkipHidden), cue.Definitions(true), cue.Optional(true))
144 var fields []field
145 for iter.Next() {
146 fields = append(fields, field{iter.Selector(), iter.Value()})
147 }
148 return fields
149}
150
151func (d *differ) diffStruct(x, y cue.Value) (Kind, *EditScript) {
152 xFields := d.collectFields(x)
153 yFields := d.collectFields(y)
154
155 // Best-effort topological sort, prioritizing x over y, using a variant of
156 // Kahn's algorithm (see, for instance
157 // https://www.geeksforgeeks.org/topological-sorting-indegree-based-solution/).
158 // We assume that the order of the elements of each value indicate an edge
159 // in the graph. This means that only the next unprocessed nodes can be
160 // those with no incoming edges.
161 xMap := make(map[cue.Selector]struct{}, len(xFields))
162 yMap := make(map[cue.Selector]int, len(yFields))
163 for _, f := range xFields {
164 xMap[f.sel] = struct{}{}
165 }
166 for i, f := range yFields {
167 yMap[f.sel] = i + 1
168 }
169
170 edits := []Edit{}
171 differs := false
172
173 for xi, yi := 0, 0; xi < len(xFields) || yi < len(yFields); {
174 // Process zero nodes
175 for ; xi < len(xFields); xi++ {
176 xf := xFields[xi]
177 yp := yMap[xf.sel]
178 if yp > 0 {
179 break
180 }
181 edits = append(edits, Edit{UniqueX, xf.sel, cue.Selector{}, nil})
182 differs = true
183 }
184 for ; yi < len(yFields); yi++ {
185 yf := yFields[yi]
186 if yMap[yf.sel] == 0 {
187 // already done
188 continue
189 }
190 if _, ok := xMap[yf.sel]; ok {
191 break
192 }
193 yMap[yf.sel] = 0
194 edits = append(edits, Edit{UniqueY, cue.Selector{}, yf.sel, nil})
195 differs = true
196 }
197
198 // Compare nodes
199 for ; xi < len(xFields); xi++ {
200 xf := xFields[xi]
201 yp := yMap[xf.sel]
202 if yp == 0 {
203 break
204 }
205 // If yp != xi+1, the topological sort was not possible.
206 yMap[xf.sel] = 0
207
208 yf := yFields[yp-1]
209
210 var kind Kind
211 var script *EditScript
212 switch {
213 case xf.sel.IsDefinition() != yf.sel.IsDefinition(), xf.sel.ConstraintType() != yf.sel.ConstraintType():
214 kind = Modified
215 default:
216 // TODO(perf): consider evaluating lazily.
217 kind, script = d.diffValue(xf.val, yf.val)
218 }
219
220 edits = append(edits, Edit{kind, xf.sel, yf.sel, script})
221 differs = differs || kind != Identity
222 }
223 }
224 if !differs {
225 return Identity, nil
226 }
227 return Modified, &EditScript{X: x, Y: y, Edits: edits}
228}
229
230// TODO: right now we do a simple element-by-element comparison. Instead,
231// use an algorithm that approximates a minimal Levenshtein distance, like the
232// one in github.com/google/go-cmp/internal/diff.
233func (d *differ) diffList(x, y cue.Value) (Kind, *EditScript) {
234 ix, _ := x.List()
235 iy, _ := y.List()
236
237 edits := []Edit{}
238 differs := false
239 i := 0
240
241 for {
242 // TODO: This would be much easier with a Next/Done API.
243 hasX := ix.Next()
244 hasY := iy.Next()
245 if !hasX {
246 for hasY {
247 differs = true
248 edits = append(edits, Edit{UniqueY, cue.Selector{}, cue.Index(i), nil})
249 hasY = iy.Next()
250 i++
251 }
252 break
253 }
254 if !hasY {
255 for hasX {
256 differs = true
257 edits = append(edits, Edit{UniqueX, cue.Index(i), cue.Selector{}, nil})
258 hasX = ix.Next()
259 i++
260 }
261 break
262 }
263
264 // Both x and y have a value.
265 kind, script := d.diffValue(ix.Value(), iy.Value())
266 if kind != Identity {
267 differs = true
268 }
269 edits = append(edits, Edit{kind, cue.Index(i), cue.Index(i), script})
270 i++
271 }
272 if !differs {
273 return Identity, nil
274 }
275 return Modified, &EditScript{X: x, Y: y, Edits: edits}
276}