Line data Source code
1 : // Copyright (C) 2009 The Android Open Source Project
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 :
16 : package com.google.gerrit.server.patch;
17 :
18 : import com.google.common.base.Throwables;
19 : import com.google.common.collect.ImmutableList;
20 : import com.google.common.collect.ImmutableSet;
21 : import com.google.common.flogger.FluentLogger;
22 : import com.google.gerrit.jgit.diff.ReplaceEdit;
23 : import com.google.gerrit.server.config.ConfigUtil;
24 : import com.google.gerrit.server.config.GerritServerConfig;
25 : import com.google.inject.Inject;
26 : import com.google.inject.assistedinject.Assisted;
27 : import java.util.ArrayList;
28 : import java.util.Arrays;
29 : import java.util.List;
30 : import java.util.Optional;
31 : import java.util.concurrent.Callable;
32 : import java.util.concurrent.ExecutionException;
33 : import java.util.concurrent.ExecutorService;
34 : import java.util.concurrent.Future;
35 : import java.util.concurrent.TimeUnit;
36 : import java.util.concurrent.TimeoutException;
37 : import java.util.regex.Pattern;
38 : import java.util.stream.Collectors;
39 : import org.eclipse.jgit.diff.Edit;
40 : import org.eclipse.jgit.diff.MyersDiff;
41 : import org.eclipse.jgit.lib.Config;
42 :
43 : class IntraLineLoader implements Callable<IntraLineDiff> {
44 6 : static final FluentLogger logger = FluentLogger.forEnclosingClass();
45 :
46 : interface Factory {
47 : IntraLineLoader create(IntraLineDiffKey key, IntraLineDiffArgs args);
48 : }
49 :
50 6 : private static final Pattern BLANK_LINE_RE =
51 6 : Pattern.compile("^[ \\t]*(|[{}]|/\\*\\*?|\\*)[ \\t]*$");
52 :
53 6 : private static final Pattern CONTROL_BLOCK_START_RE = Pattern.compile("[{:][ \\t]*$");
54 :
55 : private final ExecutorService diffExecutor;
56 : private final long timeoutMillis;
57 : private final IntraLineDiffKey key;
58 : private final IntraLineDiffArgs args;
59 :
60 : @Inject
61 : IntraLineLoader(
62 : @DiffExecutor ExecutorService diffExecutor,
63 : @GerritServerConfig Config cfg,
64 : @Assisted IntraLineDiffKey key,
65 5 : @Assisted IntraLineDiffArgs args) {
66 5 : this.diffExecutor = diffExecutor;
67 5 : timeoutMillis =
68 5 : ConfigUtil.getTimeUnit(
69 : cfg,
70 : "cache",
71 : PatchListCacheImpl.INTRA_NAME,
72 : "timeout",
73 5 : TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS),
74 : TimeUnit.MILLISECONDS);
75 5 : this.key = key;
76 5 : this.args = args;
77 5 : }
78 :
79 : @Override
80 : public IntraLineDiff call() throws Exception {
81 5 : Future<IntraLineDiff> result =
82 5 : diffExecutor.submit(
83 : () ->
84 5 : IntraLineLoader.compute(
85 5 : args.aText(), args.bText(), args.edits(), args.editsDueToRebase()));
86 : try {
87 5 : return result.get(timeoutMillis, TimeUnit.MILLISECONDS);
88 0 : } catch (InterruptedException | TimeoutException e) {
89 0 : logger.atWarning().log(
90 : "%s ms timeout reached for IntraLineDiff"
91 : + " in project %s on commit %s for path %s comparing %s..%s",
92 0 : timeoutMillis,
93 0 : args.project(),
94 0 : args.commit().name(),
95 0 : args.path(),
96 0 : key.getBlobA().name(),
97 0 : key.getBlobB().name());
98 0 : result.cancel(true);
99 0 : return new IntraLineDiff(IntraLineDiff.Status.TIMEOUT);
100 0 : } catch (ExecutionException e) {
101 : // If there was an error computing the result, carry it
102 : // up to the caller so the cache knows this key is invalid.
103 0 : Throwables.throwIfInstanceOf(e.getCause(), Exception.class);
104 0 : throw new Exception(e.getMessage(), e.getCause());
105 : }
106 : }
107 :
108 : static IntraLineDiff compute(
109 : Text aText,
110 : Text bText,
111 : ImmutableList<Edit> immutableEdits,
112 : ImmutableSet<Edit> immutableEditsDueToRebase) {
113 6 : List<Edit> edits = new ArrayList<>(immutableEdits);
114 6 : combineLineEdits(edits, immutableEditsDueToRebase, aText, bText);
115 :
116 6 : for (int i = 0; i < edits.size(); i++) {
117 6 : Edit e = edits.get(i);
118 :
119 6 : if (e.getType() == Edit.Type.REPLACE) {
120 6 : CharText a = new CharText(aText, e.getBeginA(), e.getEndA());
121 6 : CharText b = new CharText(bText, e.getBeginB(), e.getEndB());
122 6 : CharTextComparator cmp = new CharTextComparator();
123 :
124 6 : List<Edit> wordEdits = MyersDiff.INSTANCE.diff(cmp, a, b);
125 :
126 : // Combine edits that are really close together. If they are
127 : // just a few characters apart we tend to get better results
128 : // by joining them together and taking the whole span.
129 : //
130 6 : for (int j = 0; j < wordEdits.size() - 1; ) {
131 6 : Edit c = wordEdits.get(j);
132 6 : Edit n = wordEdits.get(j + 1);
133 :
134 6 : if (n.getBeginA() - c.getEndA() <= 5 || n.getBeginB() - c.getEndB() <= 5) {
135 6 : int ab = c.getBeginA();
136 6 : int ae = n.getEndA();
137 :
138 6 : int bb = c.getBeginB();
139 6 : int be = n.getEndB();
140 :
141 6 : if (canCoalesce(a, c.getEndA(), n.getBeginA())
142 6 : && canCoalesce(b, c.getEndB(), n.getBeginB())) {
143 6 : wordEdits.set(j, new Edit(ab, ae, bb, be));
144 6 : wordEdits.remove(j + 1);
145 6 : continue;
146 : }
147 : }
148 :
149 2 : j++;
150 2 : }
151 :
152 : // Apply some simple rules to fix up some of the edits. Our
153 : // logic above, along with our per-character difference tends
154 : // to produce some crazy stuff.
155 : //
156 6 : for (int j = 0; j < wordEdits.size(); j++) {
157 6 : Edit c = wordEdits.get(j);
158 6 : int ab = c.getBeginA();
159 6 : int ae = c.getEndA();
160 :
161 6 : int bb = c.getBeginB();
162 6 : int be = c.getEndB();
163 :
164 : // Sometimes the diff generator produces an INSERT or DELETE
165 : // right up against a REPLACE, but we only find this after
166 : // we've also played some shifting games on the prior edit.
167 : // If that happened to us, coalesce them together so we can
168 : // correct this mess for the user. If we don't we wind up
169 : // with silly stuff like "es" -> "es = Addresses".
170 : //
171 6 : if (1 < j) {
172 1 : Edit p = wordEdits.get(j - 1);
173 1 : if (p.getEndA() == ab || p.getEndB() == bb) {
174 0 : if (p.getEndA() == ab && p.getBeginA() < p.getEndA()) {
175 0 : ab = p.getBeginA();
176 : }
177 0 : if (p.getEndB() == bb && p.getBeginB() < p.getEndB()) {
178 0 : bb = p.getBeginB();
179 : }
180 0 : wordEdits.remove(--j);
181 : }
182 : }
183 :
184 : // We sometimes collapsed an edit together in a strange way,
185 : // such that the edges of each text is identical. Fix by
186 : // by dropping out that incorrectly replaced region.
187 : //
188 6 : while (ab < ae && bb < be && cmp.equals(a, ab, b, bb)) {
189 0 : ab++;
190 0 : bb++;
191 : }
192 6 : while (ab < ae && bb < be && cmp.equals(a, ae - 1, b, be - 1)) {
193 1 : ae--;
194 1 : be--;
195 : }
196 :
197 : // The leading part of an edit and its trailing part in the same
198 : // text might be identical. Slide down that edit and use the tail
199 : // rather than the leading bit.
200 : //
201 6 : while (0 < ab
202 : && ab < ae
203 3 : && a.charAt(ab - 1) != '\n'
204 3 : && cmp.equals(a, ab - 1, a, ae - 1)) {
205 2 : ab--;
206 2 : ae--;
207 : }
208 6 : if (!a.isLineStart(ab) || !a.contains(ab, ae, '\n')) {
209 6 : while (ab < ae && ae < a.size() && cmp.equals(a, ab, a, ae)) {
210 2 : ab++;
211 2 : ae++;
212 2 : if (a.charAt(ae - 1) == '\n') {
213 0 : break;
214 : }
215 : }
216 : }
217 :
218 6 : while (0 < bb
219 : && bb < be
220 3 : && b.charAt(bb - 1) != '\n'
221 3 : && cmp.equals(b, bb - 1, b, be - 1)) {
222 1 : bb--;
223 1 : be--;
224 : }
225 6 : if (!b.isLineStart(bb) || !b.contains(bb, be, '\n')) {
226 6 : while (bb < be && be < b.size() && cmp.equals(b, bb, b, be)) {
227 1 : bb++;
228 1 : be++;
229 1 : if (b.charAt(be - 1) == '\n') {
230 0 : break;
231 : }
232 : }
233 : }
234 :
235 : // If most of a line was modified except the LF was common, make
236 : // the LF part of the modification region. This is easier to read.
237 : //
238 6 : if (ab < ae //
239 6 : && (ab == 0 || a.charAt(ab - 1) == '\n') //
240 6 : && ae < a.size()
241 5 : && a.charAt(ae - 1) != '\n'
242 5 : && a.charAt(ae) == '\n') {
243 3 : ae++;
244 : }
245 6 : if (bb < be //
246 6 : && (bb == 0 || b.charAt(bb - 1) == '\n') //
247 6 : && be < b.size()
248 5 : && b.charAt(be - 1) != '\n'
249 5 : && b.charAt(be) == '\n') {
250 3 : be++;
251 : }
252 :
253 6 : wordEdits.set(j, new Edit(ab, ae, bb, be));
254 : }
255 :
256 : // Validate that the intra-line edits applied to the "a" text produces the "b" text. If this
257 : // check fails, fallback to a single replace edit that covers the whole area.
258 6 : if (isValidTransformation(a, b, wordEdits)) {
259 6 : edits.set(i, new ReplaceEdit(e, wordEdits));
260 : } else {
261 1 : edits.set(i, new ReplaceEdit(e, Arrays.asList(new Edit(0, a.size(), 0, b.size()))));
262 : }
263 : }
264 : }
265 :
266 6 : return new IntraLineDiff(edits);
267 : }
268 :
269 : /**
270 : * Validate that the application of the list of {@code edits} to the {@code lText} text produces
271 : * the {@code rText} text.
272 : *
273 : * @return true if {@code lText} + {@code edits} results in the {@code rText} text, and false
274 : * otherwise.
275 : */
276 : private static boolean isValidTransformation(CharText lText, CharText rText, List<Edit> edits) {
277 : // Apply replace and delete edits to the left text
278 6 : Optional<String> left =
279 6 : applyEditsToString(
280 6 : toStringBuilder(lText),
281 6 : toStringBuilder(rText).toString(),
282 6 : edits.stream()
283 6 : .filter(e -> e.getType() == Edit.Type.REPLACE || e.getType() == Edit.Type.DELETE)
284 6 : .collect(Collectors.toList()));
285 : // Remove insert edits from the right text
286 6 : Optional<String> right =
287 6 : applyEditsToString(
288 6 : toStringBuilder(rText),
289 : null,
290 6 : edits.stream()
291 6 : .filter(e -> e.getType() == Edit.Type.INSERT)
292 6 : .collect(Collectors.toList()));
293 :
294 6 : return left.isPresent() && right.isPresent() && left.get().contentEquals(right.get());
295 : }
296 :
297 : /**
298 : * Apply edits to the {@code target} string. Replace edits are applied to target and replaced with
299 : * a substring from {@code from}. Delete edits are applied to target. Insert edits are removed
300 : * from target.
301 : *
302 : * @return Optional containing the transformed string, or empty if the transformation fails (due
303 : * to index out of bounds).
304 : */
305 : private static Optional<String> applyEditsToString(
306 : StringBuilder target, String from, List<Edit> edits) {
307 : // Process edits right to left to avoid re-computation of indices when characters are removed.
308 : try {
309 6 : for (int i = edits.size() - 1; i >= 0; i--) {
310 6 : Edit edit = edits.get(i);
311 6 : if (edit.getType() == Edit.Type.REPLACE) {
312 6 : boundaryCheck(target, edit.getBeginA(), edit.getEndA() - 1);
313 6 : boundaryCheck(from, edit.getBeginB(), edit.getEndB() - 1);
314 6 : target.replace(
315 6 : edit.getBeginA(), edit.getEndA(), from.substring(edit.getBeginB(), edit.getEndB()));
316 3 : } else if (edit.getType() == Edit.Type.DELETE) {
317 3 : boundaryCheck(target, edit.getBeginA(), edit.getEndA() - 1);
318 3 : target.delete(edit.getBeginA(), edit.getEndA());
319 2 : } else if (edit.getType() == Edit.Type.INSERT) {
320 2 : boundaryCheck(target, edit.getBeginB(), edit.getEndB() - 1);
321 2 : target.delete(edit.getBeginB(), edit.getEndB());
322 : }
323 : }
324 6 : return Optional.of(target.toString());
325 0 : } catch (StringIndexOutOfBoundsException unused) {
326 0 : return Optional.empty();
327 : }
328 : }
329 :
330 : private static void boundaryCheck(StringBuilder s, int i1, int i2) {
331 6 : if (i1 >= 0 && i2 >= 0 && i1 < s.length() && i2 < s.length()) {
332 6 : return;
333 : }
334 0 : throw new StringIndexOutOfBoundsException();
335 : }
336 :
337 : private static void boundaryCheck(String s, int i1, int i2) {
338 6 : if (i1 >= 0 && i2 >= 0 && i1 < s.length() && i2 < s.length()) {
339 6 : return;
340 : }
341 0 : throw new StringIndexOutOfBoundsException();
342 : }
343 :
344 : private static StringBuilder toStringBuilder(CharText text) {
345 6 : StringBuilder result = new StringBuilder();
346 6 : for (int i = 0; i < text.size(); i++) {
347 6 : result.append(text.charAt(i));
348 : }
349 6 : return result;
350 : }
351 :
352 : private static void combineLineEdits(
353 : List<Edit> edits, ImmutableSet<Edit> editsDueToRebase, Text a, Text b) {
354 6 : for (int j = 0; j < edits.size() - 1; ) {
355 2 : Edit c = edits.get(j);
356 2 : Edit n = edits.get(j + 1);
357 :
358 2 : if (editsDueToRebase.contains(c) || editsDueToRebase.contains(n)) {
359 : // Don't combine any edits which were identified as being introduced by a rebase as we would
360 : // lose that information because of the combination.
361 1 : j++;
362 1 : continue;
363 : }
364 :
365 : // Combine edits that are really close together. Right now our rule
366 : // is, coalesce two line edits which are only one line apart if that
367 : // common context line is either a "pointless line", or is identical
368 : // on both sides and starts a new block of code. These are mostly
369 : // block reindents to add or remove control flow operators.
370 : //
371 2 : final int ad = n.getBeginA() - c.getEndA();
372 2 : final int bd = n.getBeginB() - c.getEndB();
373 2 : if ((1 <= ad && isBlankLineGap(a, c.getEndA(), n.getBeginA()))
374 2 : || (1 <= bd && isBlankLineGap(b, c.getEndB(), n.getBeginB()))
375 2 : || (ad == 1 && bd == 1 && isControlBlockStart(a, c.getEndA()))) {
376 1 : int ab = c.getBeginA();
377 1 : int ae = n.getEndA();
378 :
379 1 : int bb = c.getBeginB();
380 1 : int be = n.getEndB();
381 :
382 1 : edits.set(j, new Edit(ab, ae, bb, be));
383 1 : edits.remove(j + 1);
384 1 : continue;
385 : }
386 :
387 2 : j++;
388 2 : }
389 6 : }
390 :
391 : private static boolean isBlankLineGap(Text a, int b, int e) {
392 2 : for (; b < e; b++) {
393 2 : if (!BLANK_LINE_RE.matcher(a.getString(b)).matches()) {
394 2 : return false;
395 : }
396 : }
397 1 : return true;
398 : }
399 :
400 : private static boolean isControlBlockStart(Text a, int idx) {
401 2 : return CONTROL_BLOCK_START_RE.matcher(a.getString(idx)).find();
402 : }
403 :
404 : private static boolean canCoalesce(CharText a, int b, int e) {
405 6 : while (b < e) {
406 6 : if (a.charAt(b++) == '\n') {
407 1 : return false;
408 : }
409 : }
410 6 : return true;
411 : }
412 : }
|