Line data Source code
1 : // Copyright (C) 2020 The Android Open Source Project
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : package com.google.gerrit.server.patch.gitfilediff;
16 :
17 : import static java.util.function.Function.identity;
18 :
19 : import com.google.auto.value.AutoValue;
20 : import com.google.common.base.Throwables;
21 : import com.google.common.cache.CacheLoader;
22 : import com.google.common.cache.LoadingCache;
23 : import com.google.common.collect.ImmutableList;
24 : import com.google.common.collect.ImmutableMap;
25 : import com.google.common.collect.ImmutableSet;
26 : import com.google.common.collect.Iterables;
27 : import com.google.common.collect.ListMultimap;
28 : import com.google.common.collect.MultimapBuilder;
29 : import com.google.common.collect.Multimaps;
30 : import com.google.common.collect.Streams;
31 : import com.google.gerrit.entities.Patch;
32 : import com.google.gerrit.entities.Project;
33 : import com.google.gerrit.extensions.client.DiffPreferencesInfo.Whitespace;
34 : import com.google.gerrit.metrics.Counter0;
35 : import com.google.gerrit.metrics.Description;
36 : import com.google.gerrit.metrics.MetricMaker;
37 : import com.google.gerrit.server.cache.CacheModule;
38 : import com.google.gerrit.server.config.ConfigUtil;
39 : import com.google.gerrit.server.config.GerritServerConfig;
40 : import com.google.gerrit.server.git.GitRepositoryManager;
41 : import com.google.gerrit.server.logging.Metadata;
42 : import com.google.gerrit.server.logging.TraceContext;
43 : import com.google.gerrit.server.logging.TraceContext.TraceTimer;
44 : import com.google.gerrit.server.patch.DiffExecutor;
45 : import com.google.gerrit.server.patch.DiffNotAvailableException;
46 : import com.google.gerrit.server.util.git.CloseablePool;
47 : import com.google.inject.Inject;
48 : import com.google.inject.Module;
49 : import com.google.inject.Singleton;
50 : import com.google.inject.name.Named;
51 : import java.io.IOException;
52 : import java.util.ArrayList;
53 : import java.util.Collection;
54 : import java.util.Comparator;
55 : import java.util.List;
56 : import java.util.Map;
57 : import java.util.Set;
58 : import java.util.concurrent.ExecutionException;
59 : import java.util.concurrent.ExecutorService;
60 : import java.util.concurrent.Future;
61 : import java.util.concurrent.TimeUnit;
62 : import java.util.concurrent.TimeoutException;
63 : import java.util.stream.Collectors;
64 : import org.eclipse.jgit.diff.DiffEntry;
65 : import org.eclipse.jgit.diff.DiffEntry.ChangeType;
66 : import org.eclipse.jgit.diff.DiffFormatter;
67 : import org.eclipse.jgit.diff.HistogramDiff;
68 : import org.eclipse.jgit.diff.RawTextComparator;
69 : import org.eclipse.jgit.lib.AbbreviatedObjectId;
70 : import org.eclipse.jgit.lib.Config;
71 : import org.eclipse.jgit.lib.ObjectId;
72 : import org.eclipse.jgit.lib.Repository;
73 : import org.eclipse.jgit.patch.FileHeader;
74 : import org.eclipse.jgit.util.io.DisabledOutputStream;
75 :
76 : /** Implementation of the {@link GitFileDiffCache} */
77 : @Singleton
78 : public class GitFileDiffCacheImpl implements GitFileDiffCache {
79 : private static final String GIT_DIFF = "git_file_diff";
80 :
81 : public static Module module() {
82 152 : return new CacheModule() {
83 : @Override
84 : protected void configure() {
85 152 : bind(GitFileDiffCache.class).to(GitFileDiffCacheImpl.class);
86 152 : persist(GIT_DIFF, GitFileDiffCacheKey.class, GitFileDiff.class)
87 152 : .maximumWeight(10 << 20)
88 152 : .weigher(GitFileDiffWeigher.class)
89 152 : .keySerializer(GitFileDiffCacheKey.Serializer.INSTANCE)
90 152 : .valueSerializer(GitFileDiff.Serializer.INSTANCE)
91 152 : .version(3)
92 152 : .loader(GitFileDiffCacheImpl.Loader.class);
93 152 : }
94 : };
95 : }
96 :
97 : @Singleton
98 : static class Metrics {
99 : final Counter0 timeouts;
100 :
101 : @Inject
102 152 : Metrics(MetricMaker metricMaker) {
103 152 : timeouts =
104 152 : metricMaker.newCounter(
105 : "caches/diff/timeouts",
106 : new Description(
107 : "Total number of git file diff computations that resulted in timeouts.")
108 152 : .setRate()
109 152 : .setUnit("count"));
110 152 : }
111 : }
112 :
113 : /** Enum for the supported diff algorithms for the file diff computation. */
114 153 : public enum DiffAlgorithm {
115 153 : HISTOGRAM_WITH_FALLBACK_MYERS,
116 153 : HISTOGRAM_NO_FALLBACK
117 : }
118 :
119 : /** Creates a new JGit diff algorithm instance using the Gerrit's {@link DiffAlgorithm} enum. */
120 0 : public static class DiffAlgorithmFactory {
121 : public static org.eclipse.jgit.diff.DiffAlgorithm create(DiffAlgorithm diffAlgorithm) {
122 104 : HistogramDiff result = new HistogramDiff();
123 104 : if (diffAlgorithm.equals(DiffAlgorithm.HISTOGRAM_NO_FALLBACK)) {
124 0 : result.setFallbackAlgorithm(null);
125 : }
126 104 : return result;
127 : }
128 : }
129 :
130 : private final LoadingCache<GitFileDiffCacheKey, GitFileDiff> cache;
131 :
132 : @Inject
133 : public GitFileDiffCacheImpl(
134 146 : @Named(GIT_DIFF) LoadingCache<GitFileDiffCacheKey, GitFileDiff> cache) {
135 146 : this.cache = cache;
136 146 : }
137 :
138 : @Override
139 : public GitFileDiff get(GitFileDiffCacheKey key) throws DiffNotAvailableException {
140 : try {
141 0 : return cache.get(key);
142 0 : } catch (ExecutionException e) {
143 0 : throw new DiffNotAvailableException(e);
144 : }
145 : }
146 :
147 : @Override
148 : public ImmutableMap<GitFileDiffCacheKey, GitFileDiff> getAll(Iterable<GitFileDiffCacheKey> keys)
149 : throws DiffNotAvailableException {
150 : try {
151 104 : return cache.getAll(keys);
152 0 : } catch (ExecutionException e) {
153 0 : throw new DiffNotAvailableException(e);
154 : }
155 : }
156 :
157 : static class Loader extends CacheLoader<GitFileDiffCacheKey, GitFileDiff> {
158 : private final GitRepositoryManager repoManager;
159 : private final ExecutorService diffExecutor;
160 : private final long timeoutMillis;
161 : private final Metrics metrics;
162 :
163 : @Inject
164 : public Loader(
165 : @GerritServerConfig Config cfg,
166 : GitRepositoryManager repoManager,
167 : @DiffExecutor ExecutorService de,
168 152 : Metrics metrics) {
169 152 : this.repoManager = repoManager;
170 152 : this.diffExecutor = de;
171 152 : this.timeoutMillis =
172 152 : ConfigUtil.getTimeUnit(
173 : cfg,
174 : "cache",
175 : GIT_DIFF,
176 : "timeout",
177 152 : TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS),
178 : TimeUnit.MILLISECONDS);
179 152 : this.metrics = metrics;
180 152 : }
181 :
182 : @Override
183 : public GitFileDiff load(GitFileDiffCacheKey key) throws IOException, DiffNotAvailableException {
184 0 : try (TraceTimer timer =
185 0 : TraceContext.newTimer(
186 : "Loading a single key from git file diff cache",
187 0 : Metadata.builder()
188 0 : .diffAlgorithm(key.diffAlgorithm().name())
189 0 : .filePath(key.newFilePath())
190 0 : .build())) {
191 0 : return loadAll(ImmutableList.of(key)).get(key);
192 : }
193 : }
194 :
195 : @Override
196 : public Map<GitFileDiffCacheKey, GitFileDiff> loadAll(
197 : Iterable<? extends GitFileDiffCacheKey> keys)
198 : throws IOException, DiffNotAvailableException {
199 93 : try (TraceTimer timer =
200 93 : TraceContext.newTimer("Loading multiple keys from git file diff cache")) {
201 93 : ImmutableMap.Builder<GitFileDiffCacheKey, GitFileDiff> result =
202 93 : ImmutableMap.builderWithExpectedSize(Iterables.size(keys));
203 :
204 93 : Map<Project.NameKey, List<GitFileDiffCacheKey>> byProject =
205 93 : Streams.stream(keys)
206 93 : .distinct()
207 93 : .collect(Collectors.groupingBy(GitFileDiffCacheKey::project));
208 :
209 93 : for (Map.Entry<Project.NameKey, List<GitFileDiffCacheKey>> entry : byProject.entrySet()) {
210 93 : try (Repository repo = repoManager.openRepository(entry.getKey())) {
211 :
212 : // Grouping keys by diff options because each group of keys will be processed with a
213 : // separate call to JGit using the DiffFormatter object.
214 93 : Map<DiffOptions, List<GitFileDiffCacheKey>> optionsGroups =
215 93 : entry.getValue().stream().collect(Collectors.groupingBy(DiffOptions::fromKey));
216 :
217 : for (Map.Entry<DiffOptions, List<GitFileDiffCacheKey>> group :
218 93 : optionsGroups.entrySet()) {
219 93 : result.putAll(loadAllImpl(repo, group.getKey(), group.getValue()));
220 93 : }
221 : }
222 93 : }
223 93 : return result.build();
224 : }
225 : }
226 :
227 : /**
228 : * Loads the git file diffs for all keys of the same repository, and having the same diff {@code
229 : * options}.
230 : *
231 : * @return The git file diffs for all input keys.
232 : */
233 : private Map<GitFileDiffCacheKey, GitFileDiff> loadAllImpl(
234 : Repository repo, DiffOptions options, List<GitFileDiffCacheKey> keys)
235 : throws IOException, DiffNotAvailableException {
236 93 : ImmutableMap.Builder<GitFileDiffCacheKey, GitFileDiff> result =
237 93 : ImmutableMap.builderWithExpectedSize(keys.size());
238 93 : Map<GitFileDiffCacheKey, String> filePaths =
239 93 : keys.stream().collect(Collectors.toMap(identity(), GitFileDiffCacheKey::newFilePath));
240 93 : try (CloseablePool<DiffFormatter> diffPool =
241 93 : new CloseablePool<>(() -> createDiffFormatter(options, repo))) {
242 : ListMultimap<String, DiffEntry> diffEntries;
243 93 : try (CloseablePool<DiffFormatter>.Handle formatter = diffPool.get()) {
244 93 : diffEntries = loadDiffEntries(formatter.get(), options, filePaths.values());
245 : }
246 93 : for (GitFileDiffCacheKey key : filePaths.keySet()) {
247 93 : String newFilePath = filePaths.get(key);
248 93 : if (!diffEntries.containsKey(newFilePath)) {
249 3 : result.put(
250 : key,
251 3 : GitFileDiff.empty(
252 3 : AbbreviatedObjectId.fromObjectId(key.oldTree()),
253 3 : AbbreviatedObjectId.fromObjectId(key.newTree()),
254 : newFilePath));
255 3 : continue;
256 : }
257 93 : List<DiffEntry> entries = diffEntries.get(newFilePath);
258 93 : if (entries.size() == 1) {
259 93 : result.put(key, createGitFileDiff(entries.get(0), key, diffPool));
260 : } else {
261 : // Handle when JGit returns two {Added, Deleted} entries for the same file. This
262 : // happens, for example, when a file's mode is changed between patchsets (e.g.
263 : // converting a symlink to a regular file). We combine both diff entries into a single
264 : // entry with {changeType = Rewrite}.
265 2 : List<GitFileDiff> gitDiffs = new ArrayList<>();
266 2 : for (DiffEntry entry : diffEntries.get(newFilePath)) {
267 2 : gitDiffs.add(createGitFileDiff(entry, key, diffPool));
268 2 : }
269 2 : result.put(key, createRewriteEntry(gitDiffs));
270 : }
271 93 : }
272 93 : return result.build();
273 : }
274 : }
275 :
276 : private static ListMultimap<String, DiffEntry> loadDiffEntries(
277 : DiffFormatter diffFormatter, DiffOptions diffOptions, Collection<String> filePaths)
278 : throws IOException {
279 93 : Set<String> filePathsSet = ImmutableSet.copyOf(filePaths);
280 93 : List<DiffEntry> diffEntries =
281 93 : diffFormatter.scan(
282 93 : diffOptions.oldTree().equals(ObjectId.zeroId()) ? null : diffOptions.oldTree(),
283 93 : diffOptions.newTree());
284 :
285 93 : return diffEntries.stream()
286 93 : .filter(d -> filePathsSet.contains(extractPath(d)))
287 93 : .collect(
288 93 : Multimaps.toMultimap(
289 : Loader::extractPath,
290 93 : identity(),
291 93 : MultimapBuilder.treeKeys().arrayListValues()::build));
292 : }
293 :
294 : private static DiffFormatter createDiffFormatter(DiffOptions diffOptions, Repository repo) {
295 93 : try (DiffFormatter diffFormatter = new DiffFormatter(DisabledOutputStream.INSTANCE)) {
296 93 : diffFormatter.setRepository(repo);
297 93 : RawTextComparator cmp = comparatorFor(diffOptions.whitespace());
298 93 : diffFormatter.setDiffComparator(cmp);
299 93 : if (diffOptions.renameScore() != -1) {
300 93 : diffFormatter.setDetectRenames(true);
301 93 : diffFormatter.getRenameDetector().setRenameScore(diffOptions.renameScore());
302 : }
303 93 : diffFormatter.setDiffAlgorithm(DiffAlgorithmFactory.create(diffOptions.diffAlgorithm()));
304 93 : diffFormatter.getRenameDetector().setSkipContentRenamesForBinaryFiles(true);
305 93 : return diffFormatter;
306 : }
307 : }
308 :
309 : private static RawTextComparator comparatorFor(Whitespace ws) {
310 93 : switch (ws) {
311 : case IGNORE_ALL:
312 2 : return RawTextComparator.WS_IGNORE_ALL;
313 :
314 : case IGNORE_TRAILING:
315 0 : return RawTextComparator.WS_IGNORE_TRAILING;
316 :
317 : case IGNORE_LEADING_AND_TRAILING:
318 7 : return RawTextComparator.WS_IGNORE_CHANGE;
319 :
320 : case IGNORE_NONE:
321 : default:
322 93 : return RawTextComparator.DEFAULT;
323 : }
324 : }
325 :
326 : /**
327 : * Create a {@link GitFileDiff}. The result depends on the value of the {@code useTimeout} field
328 : * of the {@code key} parameter.
329 : *
330 : * <ul>
331 : * <li>If {@code useTimeout} is true, the computation is performed with timeout enforcement
332 : * (identified by {@link #timeoutMillis}). If the timeout is exceeded, this method returns
333 : * a negative result using {@link GitFileDiff#createNegative(AbbreviatedObjectId,
334 : * AbbreviatedObjectId, String)}.
335 : * <li>If {@code useTimeouts} is false, the computation is performed synchronously without
336 : * timeout enforcement.
337 : */
338 : private GitFileDiff createGitFileDiff(
339 : DiffEntry diffEntry, GitFileDiffCacheKey key, CloseablePool<DiffFormatter> diffPool)
340 : throws IOException {
341 93 : if (!key.useTimeout()) {
342 0 : try (CloseablePool<DiffFormatter>.Handle formatter = diffPool.get()) {
343 0 : FileHeader fileHeader = formatter.get().toFileHeader(diffEntry);
344 0 : return GitFileDiff.create(diffEntry, fileHeader);
345 : }
346 : }
347 : // This submits the DiffFormatter to a different thread. The CloseablePool and our usage of it
348 : // ensures that any DiffFormatter instance and the ObjectReader it references internally is
349 : // only used by a single thread concurrently. However, ObjectReaders have a reference to
350 : // Repository which might not be thread safe (FileRepository is, DfsRepository might not).
351 : // This could lead to a race condition.
352 93 : Future<GitFileDiff> fileDiffFuture =
353 93 : diffExecutor.submit(
354 : () -> {
355 93 : try (CloseablePool<DiffFormatter>.Handle formatter = diffPool.get()) {
356 93 : return GitFileDiff.create(diffEntry, formatter.get().toFileHeader(diffEntry));
357 : }
358 : });
359 : try {
360 : // We employ the timeout because of a bug in Myers diff in JGit. See
361 : // bugs.chromium.org/p/gerrit/issues/detail?id=487 for more details. The bug may happen
362 : // if the algorithm used in diffs is HISTOGRAM_WITH_FALLBACK_MYERS.
363 93 : return fileDiffFuture.get(timeoutMillis, TimeUnit.MILLISECONDS);
364 0 : } catch (InterruptedException | TimeoutException e) {
365 : // If timeout happens, create a negative result
366 0 : metrics.timeouts.increment();
367 0 : return GitFileDiff.createNegative(
368 0 : AbbreviatedObjectId.fromObjectId(key.oldTree()),
369 0 : AbbreviatedObjectId.fromObjectId(key.newTree()),
370 0 : key.newFilePath());
371 0 : } catch (ExecutionException e) {
372 : // If there was an error computing the result, carry it
373 : // up to the caller so the cache knows this key is invalid.
374 0 : Throwables.throwIfInstanceOf(e.getCause(), IOException.class);
375 0 : throw new IOException(e.getMessage(), e.getCause());
376 : }
377 : }
378 :
379 : /**
380 : * Extract the file path from a {@link DiffEntry}. Returns the old file path if the entry
381 : * corresponds to a deleted file, otherwise it returns the new file path.
382 : */
383 : private static String extractPath(DiffEntry diffEntry) {
384 93 : return diffEntry.getChangeType().equals(ChangeType.DELETE)
385 23 : ? diffEntry.getOldPath()
386 93 : : diffEntry.getNewPath();
387 : }
388 : }
389 :
390 : /**
391 : * Create a single {@link GitFileDiff} with {@link com.google.gerrit.entities.Patch.ChangeType}
392 : * equals {@link com.google.gerrit.entities.Patch.ChangeType#REWRITE}, assuming the input list
393 : * contains two entries.
394 : *
395 : * @param gitDiffs input list of exactly two {@link GitFileDiff} for same file path.
396 : * @return a single {@link GitFileDiff} with change type equals {@link
397 : * com.google.gerrit.entities.Patch.ChangeType#REWRITE}.
398 : * @throws DiffNotAvailableException if input list contains git diffs with change types other than
399 : * {ADDED, DELETED}. This is a JGit error.
400 : */
401 : private static GitFileDiff createRewriteEntry(List<GitFileDiff> gitDiffs)
402 : throws DiffNotAvailableException {
403 2 : if (gitDiffs.size() != 2) {
404 0 : throw new DiffNotAvailableException(
405 0 : String.format(
406 : "JGit error: found %d dff entries for same file path %s",
407 0 : gitDiffs.size(), gitDiffs.get(0).getDefaultPath()));
408 : }
409 : // Convert the first entry (prioritized according to change type enum order) to REWRITE
410 2 : gitDiffs.sort(Comparator.comparingInt(o -> o.changeType().ordinal()));
411 2 : return gitDiffs.get(0).toBuilder().changeType(Patch.ChangeType.REWRITE).build();
412 : }
413 :
414 : /** An entity representing the options affecting the diff computation. */
415 : @AutoValue
416 93 : abstract static class DiffOptions {
417 : /** Convert a {@link GitFileDiffCacheKey} input to a {@link DiffOptions}. */
418 : static DiffOptions fromKey(GitFileDiffCacheKey key) {
419 93 : return create(
420 93 : key.oldTree(), key.newTree(), key.renameScore(), key.whitespace(), key.diffAlgorithm());
421 : }
422 :
423 : private static DiffOptions create(
424 : ObjectId oldTree,
425 : ObjectId newTree,
426 : Integer renameScore,
427 : Whitespace whitespace,
428 : DiffAlgorithm diffAlgorithm) {
429 93 : return new AutoValue_GitFileDiffCacheImpl_DiffOptions(
430 : oldTree, newTree, renameScore, whitespace, diffAlgorithm);
431 : }
432 :
433 : abstract ObjectId oldTree();
434 :
435 : abstract ObjectId newTree();
436 :
437 : abstract Integer renameScore();
438 :
439 : abstract Whitespace whitespace();
440 :
441 : abstract DiffAlgorithm diffAlgorithm();
442 : }
443 : }
|