LCOV - code coverage report
Current view: top level - httpd - HtmlDomUtil.java (source / functions) Hit Total Coverage
Test: _coverage_report.dat Lines: 22 108 20.4 %
Date: 2022-11-19 15:00:39 Functions: 5 16 31.2 %

          Line data    Source code
       1             : // Copyright (C) 2008 The Android Open Source Project
       2             : //
       3             : // Licensed under the Apache License, Version 2.0 (the "License");
       4             : // you may not use this file except in compliance with the License.
       5             : // You may obtain a copy of the License at
       6             : //
       7             : // http://www.apache.org/licenses/LICENSE-2.0
       8             : //
       9             : // Unless required by applicable law or agreed to in writing, software
      10             : // distributed under the License is distributed on an "AS IS" BASIS,
      11             : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             : // See the License for the specific language governing permissions and
      13             : // limitations under the License.
      14             : 
      15             : package com.google.gerrit.httpd;
      16             : 
      17             : import static java.nio.charset.StandardCharsets.UTF_8;
      18             : 
      19             : import com.google.common.flogger.FluentLogger;
      20             : import com.google.common.io.ByteStreams;
      21             : import com.google.gerrit.common.Nullable;
      22             : import java.io.ByteArrayOutputStream;
      23             : import java.io.IOException;
      24             : import java.io.InputStream;
      25             : import java.io.StringWriter;
      26             : import java.nio.charset.Charset;
      27             : import java.nio.file.Files;
      28             : import java.nio.file.NoSuchFileException;
      29             : import java.nio.file.Path;
      30             : import java.util.Optional;
      31             : import java.util.concurrent.TimeUnit;
      32             : import java.util.zip.GZIPOutputStream;
      33             : import javax.xml.parsers.DocumentBuilder;
      34             : import javax.xml.parsers.DocumentBuilderFactory;
      35             : import javax.xml.parsers.ParserConfigurationException;
      36             : import javax.xml.transform.OutputKeys;
      37             : import javax.xml.transform.Transformer;
      38             : import javax.xml.transform.TransformerException;
      39             : import javax.xml.transform.TransformerFactory;
      40             : import javax.xml.transform.dom.DOMSource;
      41             : import javax.xml.transform.stream.StreamResult;
      42             : import javax.xml.xpath.XPathConstants;
      43             : import javax.xml.xpath.XPathExpression;
      44             : import javax.xml.xpath.XPathExpressionException;
      45             : import javax.xml.xpath.XPathFactory;
      46             : import org.jsoup.parser.Parser;
      47             : import org.w3c.dom.Document;
      48             : import org.w3c.dom.Element;
      49             : import org.w3c.dom.Node;
      50             : import org.w3c.dom.NodeList;
      51             : import org.xml.sax.SAXException;
      52             : 
      53             : /** Utility functions to deal with HTML using W3C DOM operations. */
      54           0 : public class HtmlDomUtil {
      55         100 :   private static final FluentLogger logger = FluentLogger.forEnclosingClass();
      56             : 
      57             :   /** Standard character encoding we prefer (UTF-8). */
      58         100 :   public static final Charset ENC = UTF_8;
      59             : 
      60             :   /** DOCTYPE for a standards mode HTML document. */
      61             :   public static final String HTML_STRICT =
      62             :       "-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd";
      63             : 
      64             :   /** Convert a document to a UTF-8 byte sequence. */
      65             :   public static byte[] toUTF8(Document hostDoc) throws IOException {
      66           0 :     return toString(hostDoc).getBytes(ENC);
      67             :   }
      68             : 
      69             :   /** Compress the document. */
      70             :   public static byte[] compress(byte[] raw) throws IOException {
      71           1 :     ByteArrayOutputStream out = new ByteArrayOutputStream();
      72           1 :     GZIPOutputStream gz = new GZIPOutputStream(out);
      73           1 :     gz.write(raw);
      74           1 :     gz.finish();
      75           1 :     gz.flush();
      76           1 :     return out.toByteArray();
      77             :   }
      78             : 
      79             :   /** Convert a document to a String, assuming later encoding to UTF-8. */
      80             :   public static String toString(Document hostDoc) throws IOException {
      81             :     try {
      82           0 :       StringWriter out = new StringWriter();
      83           0 :       DOMSource domSource = new DOMSource(hostDoc);
      84           0 :       StreamResult streamResult = new StreamResult(out);
      85           0 :       TransformerFactory tf = TransformerFactory.newInstance();
      86           0 :       Transformer serializer = tf.newTransformer();
      87           0 :       serializer.setOutputProperty(OutputKeys.ENCODING, ENC.name());
      88           0 :       serializer.setOutputProperty(OutputKeys.METHOD, "html");
      89           0 :       serializer.setOutputProperty(OutputKeys.INDENT, "no");
      90           0 :       serializer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, HtmlDomUtil.HTML_STRICT);
      91           0 :       serializer.transform(domSource, streamResult);
      92           0 :       return out.toString();
      93           0 :     } catch (TransformerException e) {
      94           0 :       throw new IOException("Error transforming page", e);
      95             :     }
      96             :   }
      97             : 
      98             :   /** Find an element by its "id" attribute; null if no element is found. */
      99             :   @Nullable
     100             :   public static Element find(Node parent, String name) {
     101           0 :     NodeList list = parent.getChildNodes();
     102           0 :     for (int i = 0; i < list.getLength(); i++) {
     103           0 :       Node n = list.item(i);
     104           0 :       if (n instanceof Element) {
     105           0 :         Element e = (Element) n;
     106           0 :         if (name.equals(e.getAttribute("id"))) {
     107           0 :           return e;
     108             :         }
     109             :       }
     110           0 :       Element r = find(n, name);
     111           0 :       if (r != null) {
     112           0 :         return r;
     113             :       }
     114             :     }
     115           0 :     return null;
     116             :   }
     117             : 
     118             :   /** Append an HTML &lt;input type="hidden"&gt; to the form. */
     119             :   public static void addHidden(Element form, String name, String value) {
     120           0 :     Element in = form.getOwnerDocument().createElement("input");
     121           0 :     in.setAttribute("type", "hidden");
     122           0 :     in.setAttribute("name", name);
     123           0 :     in.setAttribute("value", value);
     124           0 :     form.appendChild(in);
     125           0 :   }
     126             : 
     127             :   /** Construct a new empty document. */
     128             :   public static Document newDocument() {
     129             :     try {
     130           0 :       return newBuilder().newDocument();
     131           0 :     } catch (ParserConfigurationException e) {
     132           0 :       throw new RuntimeException("Cannot create new document", e);
     133             :     }
     134             :   }
     135             : 
     136             :   /** Clone a document so it can be safely modified on a per-request basis. */
     137             :   public static Document clone(Document doc) throws IOException {
     138             :     Document d;
     139             :     try {
     140           0 :       d = newBuilder().newDocument();
     141           0 :     } catch (ParserConfigurationException e) {
     142           0 :       throw new IOException("Cannot clone document", e);
     143           0 :     }
     144           0 :     Node n = d.importNode(doc.getDocumentElement(), true);
     145           0 :     d.appendChild(n);
     146           0 :     return d;
     147             :   }
     148             : 
     149             :   /** Parse an XHTML file from our CLASSPATH and return the instance. */
     150             :   @Nullable
     151             :   public static Document parseFile(Class<?> context, String name) throws IOException {
     152           0 :     try (InputStream in = context.getResourceAsStream(name)) {
     153           0 :       if (in == null) {
     154           0 :         return null;
     155             :       }
     156           0 :       Document doc = newBuilder().parse(in);
     157           0 :       compact(doc);
     158           0 :       return doc;
     159           0 :     } catch (SAXException | ParserConfigurationException | IOException e) {
     160           0 :       throw new IOException("Error reading " + name, e);
     161             :     }
     162             :   }
     163             : 
     164             :   private static void compact(Document doc) {
     165             :     try {
     166           0 :       String expr = "//text()[normalize-space(.) = '']";
     167           0 :       XPathFactory xp = XPathFactory.newInstance();
     168           0 :       XPathExpression e = xp.newXPath().compile(expr);
     169           0 :       NodeList empty = (NodeList) e.evaluate(doc, XPathConstants.NODESET);
     170           0 :       for (int i = 0; i < empty.getLength(); i++) {
     171           0 :         Node node = empty.item(i);
     172           0 :         node.getParentNode().removeChild(node);
     173             :       }
     174           0 :     } catch (XPathExpressionException e) {
     175             :       // Don't do the whitespace removal.
     176           0 :     }
     177           0 :   }
     178             : 
     179             :   /** Read a Read a UTF-8 text file from our CLASSPATH and return it. */
     180             :   @Nullable
     181             :   public static String readFile(Class<?> context, String name) throws IOException {
     182           0 :     try (InputStream in = context.getResourceAsStream(name)) {
     183           0 :       if (in == null) {
     184           0 :         return null;
     185             :       }
     186           0 :       return new String(ByteStreams.toByteArray(in), ENC);
     187           0 :     } catch (IOException e) {
     188           0 :       throw new IOException("Error reading " + name, e);
     189             :     }
     190             :   }
     191             : 
     192             :   /** Parse an XHTML file from the local drive and return the instance. */
     193             :   @Nullable
     194             :   public static Document parseFile(Path path) throws IOException {
     195           0 :     try (InputStream in = Files.newInputStream(path)) {
     196           0 :       Document doc = newBuilder().parse(in);
     197           0 :       compact(doc);
     198           0 :       return doc;
     199          99 :     } catch (NoSuchFileException e) {
     200          99 :       return null;
     201           0 :     } catch (SAXException | ParserConfigurationException | IOException e) {
     202           0 :       throw new IOException("Error reading " + path, e);
     203             :     }
     204             :   }
     205             : 
     206             :   /** Read a UTF-8 text file from the local drive. */
     207             :   @Nullable
     208             :   public static String readFile(Path parentDir, String name) throws IOException {
     209          99 :     if (parentDir == null) {
     210           0 :       return null;
     211             :     }
     212          99 :     Path path = parentDir.resolve(name);
     213           0 :     try (InputStream in = Files.newInputStream(path)) {
     214           0 :       return new String(ByteStreams.toByteArray(in), ENC);
     215          99 :     } catch (NoSuchFileException e) {
     216          99 :       return null;
     217           0 :     } catch (IOException e) {
     218           0 :       throw new IOException("Error reading " + path, e);
     219             :     }
     220             :   }
     221             : 
     222             :   private static DocumentBuilder newBuilder() throws ParserConfigurationException {
     223           0 :     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
     224           0 :     factory.setValidating(false);
     225           0 :     factory.setExpandEntityReferences(false);
     226           0 :     factory.setIgnoringComments(true);
     227           0 :     factory.setCoalescing(true);
     228           0 :     return factory.newDocumentBuilder();
     229             :   }
     230             : 
     231             :   /**
     232             :    * Attaches nonce to all script elements in html.
     233             :    *
     234             :    * <p>The returned html is not guaranteed to have the same formatting as the input.
     235             :    *
     236             :    * @return Updated html or {#link Optional.empty()} if parsing failed.
     237             :    */
     238             :   public static Optional<String> attachNonce(String html, String nonce) {
     239           1 :     Parser parser = Parser.htmlParser();
     240           1 :     org.jsoup.nodes.Document document = parser.parseInput(html, "");
     241           1 :     if (!parser.getErrors().isEmpty()) {
     242           0 :       logger.atSevere().atMostEvery(5, TimeUnit.MINUTES).log(
     243           0 :           "Html couldn't be parsed to attach nonce. Errors: %s", parser.getErrors());
     244           0 :       return Optional.empty();
     245             :     }
     246           1 :     document.getElementsByTag("script").attr("nonce", nonce);
     247           1 :     return Optional.of(
     248             :         document
     249           1 :             .outputSettings(
     250           1 :                 new org.jsoup.nodes.Document.OutputSettings().prettyPrint(false).indentAmount(0))
     251           1 :             .outerHtml());
     252             :   }
     253             : }

Generated by: LCOV version 1.16+git.20220603.dfeb750