refactor(core): prevent infinite loops in clobbered elements check

This commit updates HTML sanitization logic to avoid infinite loops in case clobbered elements contain fields like `nextSibling` or `parentNode`. Those fields are used for DOM traversal and this update makes sure that those calls return valid results. Also this commit fixes an issue when clobbering `nodeName` causes JS exceptions.
AndrewKushnir · Feb 14, 2024 · c484002 · c484002
1 parent 79001a7
commit c484002
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 10 deletions.
diff --git a/packages/core/src/sanitization/html_sanitizer.ts b/packages/core/src/sanitization/html_sanitizer.ts
@@ -116,6 +116,7 @@ class SanitizingHtmlSerializer {
     // again, so it shouldn't be vulnerable to DOM clobbering.
     let current: Node = el.firstChild!;
     let traverseContent = true;
+    let parentNodes = [];
     while (current) {
       if (current.nodeType === Node.ELEMENT_NODE) {
         traverseContent = this.startElement(current as Element);
@@ -126,23 +127,27 @@ class SanitizingHtmlSerializer {
         this.sanitizedSomething = true;
       }
       if (traverseContent && current.firstChild) {
+        // Push current node to the parent stack before entering its content.
+        parentNodes.push(current);
         current = current.firstChild!;
         continue;
       }
       while (current) {
-        // Leaving the element. Walk up and to the right, closing tags as we go.
+        // Leaving the element.
+        // Walk up and to the right, closing tags as we go.
         if (current.nodeType === Node.ELEMENT_NODE) {
           this.endElement(current as Element);
         }
 
-        let next = this.checkClobberedElement(current, current.nextSibling!);
+        let next = this.checkClobberedElement(current, getNextSibling(current)!);
 
         if (next) {
           current = next;
           break;
         }
 
-        current = this.checkClobberedElement(current, current.parentNode!);
+        // There was no next sibling, walk up to the parent node (extract it from the stack).
+        current = parentNodes.pop()!;
       }
     }
     return this.buf.join('');
@@ -157,7 +162,7 @@ class SanitizingHtmlSerializer {
    * @return True if the element's contents should be traversed.
    */
   private startElement(element: Element): boolean {
-    const tagName = element.nodeName.toLowerCase();
+    const tagName = getNodeName(element).toLowerCase();
     if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
       this.sanitizedSomething = true;
       return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
@@ -183,7 +188,7 @@ class SanitizingHtmlSerializer {
   }
 
   private endElement(current: Element) {
-    const tagName = current.nodeName.toLowerCase();
+    const tagName = getNodeName(current).toLowerCase();
     if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
       this.buf.push('</');
       this.buf.push(tagName);
@@ -199,13 +204,39 @@ class SanitizingHtmlSerializer {
     if (nextNode &&
         (node.compareDocumentPosition(nextNode) &
          Node.DOCUMENT_POSITION_CONTAINED_BY) === Node.DOCUMENT_POSITION_CONTAINED_BY) {
-      throw new Error(`Failed to sanitize html because the element is clobbered: ${
-          (node as Element).outerHTML}`);
+      throw clobberedElementError(node);
     }
     return nextNode;
   }
 }
 
+/**
+ * Retrieves next sibling node and makes sure that there is no
+ * clobbering of the `nextSibling` property happening.
+ */
+function getNextSibling(node: Node): Node|null {
+  const nextSibling = node.nextSibling;
+  // Make sure there is no `nextSibling` clobbering: navigating to
+  // the next sibling and going back to the previous one should result
+  // in the original node.
+  if (nextSibling && node !== nextSibling.previousSibling) {
+    throw clobberedElementError(node);
+  }
+  return nextSibling;
+}
+
+/** Gets a reasonable nodeName, even for clobbered nodes. */
+export function getNodeName(node: Node): string {
+  const nodeName = node.nodeName;
+  // If the property is clobbered, assume it is an `HTMLFormElement`.
+  return (typeof nodeName === 'string') ? nodeName : 'FORM';
+}
+
+function clobberedElementError(node: Node) {
+  return new Error(
+      `Failed to sanitize html because the element is clobbered: ${(node as Element).outerHTML}`);
+}
+
 // Regular Expressions for parsing tags and attributes
 const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
 // ! to ~ is the ASCII range.

diff --git a/packages/core/test/sanitization/html_sanitizer_spec.ts b/packages/core/test/sanitization/html_sanitizer_spec.ts
@@ -206,18 +206,28 @@ describe('HTML sanitizer', () => {
     try {
       sanitizeHtml(defaultDoc, '<form><input name="parentNode" /></form>');
     } catch (e) {
-      // depending on the browser, we might ge an exception
+      // depending on the browser, we might get an exception
     }
     try {
       sanitizeHtml(defaultDoc, '<form><input name="nextSibling" /></form>');
     } catch (e) {
-      // depending on the browser, we might ge an exception
+      // depending on the browser, we might get an exception
     }
     try {
       sanitizeHtml(defaultDoc, '<form><div><div><input name="nextSibling" /></div></div></form>');
     } catch (e) {
-      // depending on the browser, we might ge an exception
+      // depending on the browser, we might get an exception
     }
+    try {
+      sanitizeHtml(defaultDoc, '<input name="nextSibling" form="a"><form id="a"></form>');
+    } catch (e) {
+      // depending on the browser, we might get an exception
+    }
+  });
+
+  it('should properly sanitize the content when `nodeName` is clobbered', () => {
+    const output = sanitizeHtml(defaultDoc, '<form><input name=nodeName></form>');
+    expect(output).toBe('');
   });
 
   // See