Line data Source code
1 : // Copyright (C) 2008 The Android Open Source Project 2 : // 3 : // Licensed under the Apache License, Version 2.0 (the "License"); 4 : // you may not use this file except in compliance with the License. 5 : // You may obtain a copy of the License at 6 : // 7 : // http://www.apache.org/licenses/LICENSE-2.0 8 : // 9 : // Unless required by applicable law or agreed to in writing, software 10 : // distributed under the License is distributed on an "AS IS" BASIS, 11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 : // See the License for the specific language governing permissions and 13 : // limitations under the License. 14 : 15 : package com.google.gerrit.httpd; 16 : 17 : import static java.nio.charset.StandardCharsets.UTF_8; 18 : 19 : import com.google.common.flogger.FluentLogger; 20 : import com.google.common.io.ByteStreams; 21 : import com.google.gerrit.common.Nullable; 22 : import java.io.ByteArrayOutputStream; 23 : import java.io.IOException; 24 : import java.io.InputStream; 25 : import java.io.StringWriter; 26 : import java.nio.charset.Charset; 27 : import java.nio.file.Files; 28 : import java.nio.file.NoSuchFileException; 29 : import java.nio.file.Path; 30 : import java.util.Optional; 31 : import java.util.concurrent.TimeUnit; 32 : import java.util.zip.GZIPOutputStream; 33 : import javax.xml.parsers.DocumentBuilder; 34 : import javax.xml.parsers.DocumentBuilderFactory; 35 : import javax.xml.parsers.ParserConfigurationException; 36 : import javax.xml.transform.OutputKeys; 37 : import javax.xml.transform.Transformer; 38 : import javax.xml.transform.TransformerException; 39 : import javax.xml.transform.TransformerFactory; 40 : import javax.xml.transform.dom.DOMSource; 41 : import javax.xml.transform.stream.StreamResult; 42 : import javax.xml.xpath.XPathConstants; 43 : import javax.xml.xpath.XPathExpression; 44 : import javax.xml.xpath.XPathExpressionException; 45 : import javax.xml.xpath.XPathFactory; 46 : import org.jsoup.parser.Parser; 47 : import org.w3c.dom.Document; 48 : import org.w3c.dom.Element; 49 : import org.w3c.dom.Node; 50 : import org.w3c.dom.NodeList; 51 : import org.xml.sax.SAXException; 52 : 53 : /** Utility functions to deal with HTML using W3C DOM operations. */ 54 0 : public class HtmlDomUtil { 55 100 : private static final FluentLogger logger = FluentLogger.forEnclosingClass(); 56 : 57 : /** Standard character encoding we prefer (UTF-8). */ 58 100 : public static final Charset ENC = UTF_8; 59 : 60 : /** DOCTYPE for a standards mode HTML document. */ 61 : public static final String HTML_STRICT = 62 : "-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd"; 63 : 64 : /** Convert a document to a UTF-8 byte sequence. */ 65 : public static byte[] toUTF8(Document hostDoc) throws IOException { 66 0 : return toString(hostDoc).getBytes(ENC); 67 : } 68 : 69 : /** Compress the document. */ 70 : public static byte[] compress(byte[] raw) throws IOException { 71 1 : ByteArrayOutputStream out = new ByteArrayOutputStream(); 72 1 : GZIPOutputStream gz = new GZIPOutputStream(out); 73 1 : gz.write(raw); 74 1 : gz.finish(); 75 1 : gz.flush(); 76 1 : return out.toByteArray(); 77 : } 78 : 79 : /** Convert a document to a String, assuming later encoding to UTF-8. */ 80 : public static String toString(Document hostDoc) throws IOException { 81 : try { 82 0 : StringWriter out = new StringWriter(); 83 0 : DOMSource domSource = new DOMSource(hostDoc); 84 0 : StreamResult streamResult = new StreamResult(out); 85 0 : TransformerFactory tf = TransformerFactory.newInstance(); 86 0 : Transformer serializer = tf.newTransformer(); 87 0 : serializer.setOutputProperty(OutputKeys.ENCODING, ENC.name()); 88 0 : serializer.setOutputProperty(OutputKeys.METHOD, "html"); 89 0 : serializer.setOutputProperty(OutputKeys.INDENT, "no"); 90 0 : serializer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, HtmlDomUtil.HTML_STRICT); 91 0 : serializer.transform(domSource, streamResult); 92 0 : return out.toString(); 93 0 : } catch (TransformerException e) { 94 0 : throw new IOException("Error transforming page", e); 95 : } 96 : } 97 : 98 : /** Find an element by its "id" attribute; null if no element is found. */ 99 : @Nullable 100 : public static Element find(Node parent, String name) { 101 0 : NodeList list = parent.getChildNodes(); 102 0 : for (int i = 0; i < list.getLength(); i++) { 103 0 : Node n = list.item(i); 104 0 : if (n instanceof Element) { 105 0 : Element e = (Element) n; 106 0 : if (name.equals(e.getAttribute("id"))) { 107 0 : return e; 108 : } 109 : } 110 0 : Element r = find(n, name); 111 0 : if (r != null) { 112 0 : return r; 113 : } 114 : } 115 0 : return null; 116 : } 117 : 118 : /** Append an HTML <input type="hidden"> to the form. */ 119 : public static void addHidden(Element form, String name, String value) { 120 0 : Element in = form.getOwnerDocument().createElement("input"); 121 0 : in.setAttribute("type", "hidden"); 122 0 : in.setAttribute("name", name); 123 0 : in.setAttribute("value", value); 124 0 : form.appendChild(in); 125 0 : } 126 : 127 : /** Construct a new empty document. */ 128 : public static Document newDocument() { 129 : try { 130 0 : return newBuilder().newDocument(); 131 0 : } catch (ParserConfigurationException e) { 132 0 : throw new RuntimeException("Cannot create new document", e); 133 : } 134 : } 135 : 136 : /** Clone a document so it can be safely modified on a per-request basis. */ 137 : public static Document clone(Document doc) throws IOException { 138 : Document d; 139 : try { 140 0 : d = newBuilder().newDocument(); 141 0 : } catch (ParserConfigurationException e) { 142 0 : throw new IOException("Cannot clone document", e); 143 0 : } 144 0 : Node n = d.importNode(doc.getDocumentElement(), true); 145 0 : d.appendChild(n); 146 0 : return d; 147 : } 148 : 149 : /** Parse an XHTML file from our CLASSPATH and return the instance. */ 150 : @Nullable 151 : public static Document parseFile(Class<?> context, String name) throws IOException { 152 0 : try (InputStream in = context.getResourceAsStream(name)) { 153 0 : if (in == null) { 154 0 : return null; 155 : } 156 0 : Document doc = newBuilder().parse(in); 157 0 : compact(doc); 158 0 : return doc; 159 0 : } catch (SAXException | ParserConfigurationException | IOException e) { 160 0 : throw new IOException("Error reading " + name, e); 161 : } 162 : } 163 : 164 : private static void compact(Document doc) { 165 : try { 166 0 : String expr = "//text()[normalize-space(.) = '']"; 167 0 : XPathFactory xp = XPathFactory.newInstance(); 168 0 : XPathExpression e = xp.newXPath().compile(expr); 169 0 : NodeList empty = (NodeList) e.evaluate(doc, XPathConstants.NODESET); 170 0 : for (int i = 0; i < empty.getLength(); i++) { 171 0 : Node node = empty.item(i); 172 0 : node.getParentNode().removeChild(node); 173 : } 174 0 : } catch (XPathExpressionException e) { 175 : // Don't do the whitespace removal. 176 0 : } 177 0 : } 178 : 179 : /** Read a Read a UTF-8 text file from our CLASSPATH and return it. */ 180 : @Nullable 181 : public static String readFile(Class<?> context, String name) throws IOException { 182 0 : try (InputStream in = context.getResourceAsStream(name)) { 183 0 : if (in == null) { 184 0 : return null; 185 : } 186 0 : return new String(ByteStreams.toByteArray(in), ENC); 187 0 : } catch (IOException e) { 188 0 : throw new IOException("Error reading " + name, e); 189 : } 190 : } 191 : 192 : /** Parse an XHTML file from the local drive and return the instance. */ 193 : @Nullable 194 : public static Document parseFile(Path path) throws IOException { 195 0 : try (InputStream in = Files.newInputStream(path)) { 196 0 : Document doc = newBuilder().parse(in); 197 0 : compact(doc); 198 0 : return doc; 199 99 : } catch (NoSuchFileException e) { 200 99 : return null; 201 0 : } catch (SAXException | ParserConfigurationException | IOException e) { 202 0 : throw new IOException("Error reading " + path, e); 203 : } 204 : } 205 : 206 : /** Read a UTF-8 text file from the local drive. */ 207 : @Nullable 208 : public static String readFile(Path parentDir, String name) throws IOException { 209 99 : if (parentDir == null) { 210 0 : return null; 211 : } 212 99 : Path path = parentDir.resolve(name); 213 0 : try (InputStream in = Files.newInputStream(path)) { 214 0 : return new String(ByteStreams.toByteArray(in), ENC); 215 99 : } catch (NoSuchFileException e) { 216 99 : return null; 217 0 : } catch (IOException e) { 218 0 : throw new IOException("Error reading " + path, e); 219 : } 220 : } 221 : 222 : private static DocumentBuilder newBuilder() throws ParserConfigurationException { 223 0 : DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 224 0 : factory.setValidating(false); 225 0 : factory.setExpandEntityReferences(false); 226 0 : factory.setIgnoringComments(true); 227 0 : factory.setCoalescing(true); 228 0 : return factory.newDocumentBuilder(); 229 : } 230 : 231 : /** 232 : * Attaches nonce to all script elements in html. 233 : * 234 : * <p>The returned html is not guaranteed to have the same formatting as the input. 235 : * 236 : * @return Updated html or {#link Optional.empty()} if parsing failed. 237 : */ 238 : public static Optional<String> attachNonce(String html, String nonce) { 239 1 : Parser parser = Parser.htmlParser(); 240 1 : org.jsoup.nodes.Document document = parser.parseInput(html, ""); 241 1 : if (!parser.getErrors().isEmpty()) { 242 0 : logger.atSevere().atMostEvery(5, TimeUnit.MINUTES).log( 243 0 : "Html couldn't be parsed to attach nonce. Errors: %s", parser.getErrors()); 244 0 : return Optional.empty(); 245 : } 246 1 : document.getElementsByTag("script").attr("nonce", nonce); 247 1 : return Optional.of( 248 : document 249 1 : .outputSettings( 250 1 : new org.jsoup.nodes.Document.OutputSettings().prettyPrint(false).indentAmount(0)) 251 1 : .outerHtml()); 252 : } 253 : }