Line data Source code
1 : // Copyright (C) 2018 The Android Open Source Project 2 : // 3 : // Licensed under the Apache License, Version 2.0 (the "License"); 4 : // you may not use this file except in compliance with the License. 5 : // You may obtain a copy of the License at 6 : // 7 : // http://www.apache.org/licenses/LICENSE-2.0 8 : // 9 : // Unless required by applicable law or agreed to in writing, software 10 : // distributed under the License is distributed on an "AS IS" BASIS, 11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 : // See the License for the specific language governing permissions and 13 : // limitations under the License. 14 : 15 : package com.google.gerrit.server.ioutil; 16 : 17 : import static java.util.Objects.requireNonNull; 18 : 19 : import com.google.common.collect.Lists; 20 : import com.google.common.primitives.Chars; 21 : import dk.brics.automaton.Automaton; 22 : import dk.brics.automaton.RegExp; 23 : import dk.brics.automaton.RunAutomaton; 24 : import java.util.Collections; 25 : import java.util.List; 26 : import java.util.function.Function; 27 : import java.util.stream.Stream; 28 : 29 : /** Helper to search sorted lists for elements matching a {@link RegExp}. */ 30 : public final class RegexListSearcher<T> { 31 : public static RegexListSearcher<String> ofStrings(String re) { 32 4 : return new RegexListSearcher<>(re, Function.identity()); 33 : } 34 : 35 : private final RunAutomaton pattern; 36 : private final Function<T, String> toStringFunc; 37 : 38 : private final String prefixBegin; 39 : private final String prefixEnd; 40 : private final int prefixLen; 41 : private final boolean prefixOnly; 42 : 43 5 : public RegexListSearcher(String re, Function<T, String> toStringFunc) { 44 5 : this.toStringFunc = requireNonNull(toStringFunc); 45 : 46 5 : if (re.startsWith("^")) { 47 4 : re = re.substring(1); 48 : } 49 : 50 5 : if (re.endsWith("$") && !re.endsWith("\\$")) { 51 3 : re = re.substring(0, re.length() - 1); 52 : } 53 : 54 5 : Automaton automaton = new RegExp(re).toAutomaton(); 55 5 : prefixBegin = automaton.getCommonPrefix(); 56 5 : prefixLen = prefixBegin.length(); 57 : 58 5 : if (0 < prefixLen) { 59 5 : char max = Chars.checkedCast(prefixBegin.charAt(prefixLen - 1) + 1); 60 5 : prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max; 61 5 : prefixOnly = re.equals(prefixBegin + ".*"); 62 5 : } else { 63 3 : prefixEnd = ""; 64 3 : prefixOnly = false; 65 : } 66 : 67 5 : pattern = prefixOnly ? null : new RunAutomaton(automaton); 68 5 : } 69 : 70 : public Stream<T> search(List<T> list) { 71 5 : requireNonNull(list); 72 : int begin; 73 : int end; 74 : 75 5 : if (0 < prefixLen) { 76 : // Assumes many consecutive elements may have the same prefix, so the cost of two binary 77 : // searches is less than iterating linearly and running the regexp find the endpoints. 78 5 : List<String> strings = Lists.transform(list, toStringFunc::apply); 79 5 : begin = find(strings, prefixBegin); 80 5 : end = find(strings, prefixEnd); 81 5 : } else { 82 3 : begin = 0; 83 3 : end = list.size(); 84 : } 85 5 : if (begin >= end) { 86 4 : return Stream.empty(); 87 : } 88 : 89 5 : Stream<T> result = list.subList(begin, end).stream(); 90 5 : if (!prefixOnly) { 91 5 : result = result.filter(x -> pattern.run(toStringFunc.apply(x))); 92 : } 93 5 : return result; 94 : } 95 : 96 : private static int find(List<String> list, String p) { 97 5 : int r = Collections.binarySearch(list, p); 98 5 : return r < 0 ? -(r + 1) : r; 99 : } 100 : }