LCOV - code coverage report
Current view: top level - server/patch/gitfilediff - FileHeaderUtil.java (source / functions) Hit Total Coverage
Test: _coverage_report.dat Lines: 47 58 81.0 %
Date: 2022-11-19 15:00:39 Functions: 11 12 91.7 %

          Line data    Source code
       1             : // Copyright (C) 2020 The Android Open Source Project
       2             : //
       3             : // Licensed under the Apache License, Version 2.0 (the "License");
       4             : // you may not use this file except in compliance with the License.
       5             : // You may obtain a copy of the License at
       6             : //
       7             : // http://www.apache.org/licenses/LICENSE-2.0
       8             : //
       9             : // Unless required by applicable law or agreed to in writing, software
      10             : // distributed under the License is distributed on an "AS IS" BASIS,
      11             : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             : // See the License for the specific language governing permissions and
      13             : // limitations under the License.
      14             : 
      15             : package com.google.gerrit.server.patch.gitfilediff;
      16             : 
      17             : import static java.nio.charset.StandardCharsets.UTF_8;
      18             : 
      19             : import com.google.common.collect.ImmutableList;
      20             : import com.google.gerrit.entities.Patch;
      21             : import com.google.gerrit.entities.Patch.PatchType;
      22             : import java.util.Optional;
      23             : import org.eclipse.jgit.patch.CombinedFileHeader;
      24             : import org.eclipse.jgit.patch.FileHeader;
      25             : import org.eclipse.jgit.util.IntList;
      26             : import org.eclipse.jgit.util.RawParseUtils;
      27             : 
      28             : /** A utility class for the {@link FileHeader} JGit object */
      29           0 : public class FileHeaderUtil {
      30         104 :   private static final Byte NUL = '\0';
      31             : 
      32             :   /**
      33             :    * The maximum number of characters to lookup in the binary file {@link FileHeader}. This is used
      34             :    * to scan the file header for the occurrence of the {@link #NUL} character.
      35             :    *
      36             :    * <p>This limit assumes a uniform distribution of all characters, hence the probability of the
      37             :    * occurrence of each character = (1 / 256). We want to find the limit that makes the prob. of
      38             :    * finding {@link #NUL} > 0.999. 1 - (255 / 256) ^ N > 0.999 yields N = 1766. We set the limit to
      39             :    * this value multiplied by 10 for more confidence.
      40             :    */
      41             :   private static final int BIN_FILE_MAX_SCAN_LIMIT = 20000;
      42             : 
      43             :   /** Converts the {@link FileHeader} parameter to a String representation. */
      44             :   static String toString(FileHeader header) {
      45         104 :     return new String(FileHeaderUtil.toByteArray(header), UTF_8);
      46             :   }
      47             : 
      48             :   /** Converts the {@link FileHeader} parameter to a byte array. */
      49             :   static byte[] toByteArray(FileHeader header) {
      50         104 :     int end = getEndOffset(header);
      51         104 :     if (header.getStartOffset() == 0 && end == header.getBuffer().length) {
      52         104 :       return header.getBuffer();
      53             :     }
      54             : 
      55           0 :     final byte[] buf = new byte[end - header.getStartOffset()];
      56           0 :     System.arraycopy(header.getBuffer(), header.getStartOffset(), buf, 0, buf.length);
      57           0 :     return buf;
      58             :   }
      59             : 
      60             :   /** Splits the {@code FileHeader} string to a list of strings, one string per header line. */
      61             :   public static ImmutableList<String> getHeaderLines(FileHeader fileHeader) {
      62         104 :     String fileHeaderString = toString(fileHeader);
      63         104 :     return getHeaderLines(fileHeaderString);
      64             :   }
      65             : 
      66             :   public static ImmutableList<String> getHeaderLines(String header) {
      67         104 :     return getHeaderLines(header.getBytes(UTF_8));
      68             :   }
      69             : 
      70             :   static ImmutableList<String> getHeaderLines(byte[] header) {
      71         104 :     final IntList lineStartOffsets = RawParseUtils.lineMap(header, 0, header.length);
      72         104 :     final ImmutableList.Builder<String> headerLines =
      73         104 :         ImmutableList.builderWithExpectedSize(lineStartOffsets.size() - 1);
      74         104 :     for (int i = 1; i < lineStartOffsets.size() - 1; i++) {
      75         104 :       final int b = lineStartOffsets.get(i);
      76         104 :       int e = lineStartOffsets.get(i + 1);
      77         104 :       if (header[e - 1] == '\n') {
      78         104 :         e--;
      79             :       }
      80         104 :       headerLines.add(RawParseUtils.decode(UTF_8, header, b, e));
      81             :     }
      82         104 :     return headerLines.build();
      83             :   }
      84             : 
      85             :   /**
      86             :    * Returns the old file path associated with the {@link FileHeader}, or empty if the file is
      87             :    * {@link com.google.gerrit.entities.Patch.ChangeType#ADDED} or {@link
      88             :    * com.google.gerrit.entities.Patch.ChangeType#REWRITE}.
      89             :    */
      90             :   public static Optional<String> getOldPath(FileHeader header) {
      91         104 :     Patch.ChangeType changeType = getChangeType(header);
      92         104 :     switch (changeType) {
      93             :       case DELETED:
      94             :       case COPIED:
      95             :       case RENAMED:
      96             :       case MODIFIED:
      97          54 :         return Optional.of(header.getOldPath());
      98             : 
      99             :       case ADDED:
     100             :       case REWRITE:
     101         104 :         return Optional.empty();
     102             :     }
     103           0 :     return Optional.empty();
     104             :   }
     105             : 
     106             :   /**
     107             :    * Returns the new file path associated with the {@link FileHeader}, or empty if the file is
     108             :    * {@link com.google.gerrit.entities.Patch.ChangeType#DELETED}.
     109             :    */
     110             :   public static Optional<String> getNewPath(FileHeader header) {
     111         104 :     Patch.ChangeType changeType = getChangeType(header);
     112         104 :     switch (changeType) {
     113             :       case DELETED:
     114          23 :         return Optional.empty();
     115             : 
     116             :       case ADDED:
     117             :       case MODIFIED:
     118             :       case REWRITE:
     119             :       case COPIED:
     120             :       case RENAMED:
     121         104 :         return Optional.of(header.getNewPath());
     122             :     }
     123           0 :     return Optional.empty();
     124             :   }
     125             : 
     126             :   /** Returns the change type associated with the file header. */
     127             :   public static Patch.ChangeType getChangeType(FileHeader header) {
     128             :     // In Gerrit, we define our own entities  of the JGit entities, so that we have full control
     129             :     // over their behaviors (e.g. making sure that these entities are immutable so that we can add
     130             :     // them as fields of keys / values of persisted caches).
     131             : 
     132             :     // TODO(ghareeb): remove the dead code of the value REWRITE and all its handling
     133         104 :     switch (header.getChangeType()) {
     134             :       case ADD:
     135         104 :         return Patch.ChangeType.ADDED;
     136             :       case MODIFY:
     137          53 :         return Patch.ChangeType.MODIFIED;
     138             :       case DELETE:
     139          23 :         return Patch.ChangeType.DELETED;
     140             :       case RENAME:
     141          12 :         return Patch.ChangeType.RENAMED;
     142             :       case COPY:
     143           4 :         return Patch.ChangeType.COPIED;
     144             :       default:
     145           0 :         throw new IllegalArgumentException("Unsupported type " + header.getChangeType());
     146             :     }
     147             :   }
     148             : 
     149             :   public static PatchType getPatchType(FileHeader header) {
     150             :     PatchType patchType;
     151             : 
     152         104 :     switch (header.getPatchType()) {
     153             :       case UNIFIED:
     154         104 :         patchType = Patch.PatchType.UNIFIED;
     155         104 :         break;
     156             :       case GIT_BINARY:
     157             :       case BINARY:
     158           2 :         patchType = Patch.PatchType.BINARY;
     159           2 :         break;
     160             :       default:
     161           0 :         throw new IllegalArgumentException("Unsupported type " + header.getPatchType());
     162             :     }
     163             : 
     164         104 :     if (patchType != PatchType.BINARY) {
     165         104 :       byte[] buf = header.getBuffer();
     166             :       // TODO(ghareeb): should we adjust the max limit threshold?
     167             :       // JGit sometimes misses the detection of binary files. In this case we look into the file
     168             :       // header for the occurrence of NUL characters, which is a definite signal that the file is
     169             :       // binary. We limit the number of characters to lookup to avoid performance bottlenecks.
     170         104 :       for (int ptr = header.getStartOffset();
     171         104 :           ptr < Math.min(header.getEndOffset(), BIN_FILE_MAX_SCAN_LIMIT);
     172         104 :           ptr++) {
     173         104 :         if (buf[ptr] == NUL) {
     174             :           // It's really binary, but Git couldn't see the nul early enough to realize its binary,
     175             :           // and instead produced the diff.
     176             :           //
     177             :           // Force it to be a binary; it really should have been that.
     178           0 :           return PatchType.BINARY;
     179             :         }
     180             :       }
     181             :     }
     182         104 :     return patchType;
     183             :   }
     184             : 
     185             :   /**
     186             :    * Returns the end offset of the diff header line of the {@code FileHeader parameter} before the
     187             :    * appearance of any file edits (diff hunks).
     188             :    */
     189             :   private static int getEndOffset(FileHeader fileHeader) {
     190         104 :     if (fileHeader instanceof CombinedFileHeader) {
     191           0 :       return fileHeader.getEndOffset();
     192             :     }
     193         104 :     if (!fileHeader.getHunks().isEmpty()) {
     194         104 :       return fileHeader.getHunks().get(0).getStartOffset();
     195             :     }
     196           0 :     return fileHeader.getEndOffset();
     197             :   }
     198             : }

Generated by: LCOV version 1.16+git.20220603.dfeb750