Skip to content

Commit

Permalink
refactor(core): prevent infinite loops in clobbered elements check
Browse files Browse the repository at this point in the history
This commit updates HTML sanitization logic to avoid infinite loops in case clobbered elements contain fields like `nextSibling` or `parentNode`. Those fields are used for DOM traversal and this update makes sure that those calls return valid results.

Also this commit fixes an issue when clobbering `nodeName` causes JS exceptions.
  • Loading branch information
AndrewKushnir committed Feb 14, 2024
1 parent 79001a7 commit c484002
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 10 deletions.
45 changes: 38 additions & 7 deletions packages/core/src/sanitization/html_sanitizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class SanitizingHtmlSerializer {
// again, so it shouldn't be vulnerable to DOM clobbering.
let current: Node = el.firstChild!;
let traverseContent = true;
let parentNodes = [];
while (current) {
if (current.nodeType === Node.ELEMENT_NODE) {
traverseContent = this.startElement(current as Element);
Expand All @@ -126,23 +127,27 @@ class SanitizingHtmlSerializer {
this.sanitizedSomething = true;
}
if (traverseContent && current.firstChild) {
// Push current node to the parent stack before entering its content.
parentNodes.push(current);
current = current.firstChild!;
continue;
}
while (current) {
// Leaving the element. Walk up and to the right, closing tags as we go.
// Leaving the element.
// Walk up and to the right, closing tags as we go.
if (current.nodeType === Node.ELEMENT_NODE) {
this.endElement(current as Element);
}

let next = this.checkClobberedElement(current, current.nextSibling!);
let next = this.checkClobberedElement(current, getNextSibling(current)!);

if (next) {
current = next;
break;
}

current = this.checkClobberedElement(current, current.parentNode!);
// There was no next sibling, walk up to the parent node (extract it from the stack).
current = parentNodes.pop()!;
}
}
return this.buf.join('');
Expand All @@ -157,7 +162,7 @@ class SanitizingHtmlSerializer {
* @return True if the element's contents should be traversed.
*/
private startElement(element: Element): boolean {
const tagName = element.nodeName.toLowerCase();
const tagName = getNodeName(element).toLowerCase();
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.sanitizedSomething = true;
return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
Expand All @@ -183,7 +188,7 @@ class SanitizingHtmlSerializer {
}

private endElement(current: Element) {
const tagName = current.nodeName.toLowerCase();
const tagName = getNodeName(current).toLowerCase();
if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
this.buf.push('</');
this.buf.push(tagName);
Expand All @@ -199,13 +204,39 @@ class SanitizingHtmlSerializer {
if (nextNode &&
(node.compareDocumentPosition(nextNode) &
Node.DOCUMENT_POSITION_CONTAINED_BY) === Node.DOCUMENT_POSITION_CONTAINED_BY) {
throw new Error(`Failed to sanitize html because the element is clobbered: ${
(node as Element).outerHTML}`);
throw clobberedElementError(node);
}
return nextNode;
}
}

/**
* Retrieves next sibling node and makes sure that there is no
* clobbering of the `nextSibling` property happening.
*/
function getNextSibling(node: Node): Node|null {
const nextSibling = node.nextSibling;
// Make sure there is no `nextSibling` clobbering: navigating to
// the next sibling and going back to the previous one should result
// in the original node.
if (nextSibling && node !== nextSibling.previousSibling) {
throw clobberedElementError(node);
}
return nextSibling;
}

/** Gets a reasonable nodeName, even for clobbered nodes. */
export function getNodeName(node: Node): string {
const nodeName = node.nodeName;
// If the property is clobbered, assume it is an `HTMLFormElement`.
return (typeof nodeName === 'string') ? nodeName : 'FORM';
}

function clobberedElementError(node: Node) {
return new Error(
`Failed to sanitize html because the element is clobbered: ${(node as Element).outerHTML}`);
}

// Regular Expressions for parsing tags and attributes
const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
// ! to ~ is the ASCII range.
Expand Down
16 changes: 13 additions & 3 deletions packages/core/test/sanitization/html_sanitizer_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,28 @@ describe('HTML sanitizer', () => {
try {
sanitizeHtml(defaultDoc, '<form><input name="parentNode" /></form>');
} catch (e) {
// depending on the browser, we might ge an exception
// depending on the browser, we might get an exception
}
try {
sanitizeHtml(defaultDoc, '<form><input name="nextSibling" /></form>');
} catch (e) {
// depending on the browser, we might ge an exception
// depending on the browser, we might get an exception
}
try {
sanitizeHtml(defaultDoc, '<form><div><div><input name="nextSibling" /></div></div></form>');
} catch (e) {
// depending on the browser, we might ge an exception
// depending on the browser, we might get an exception
}
try {
sanitizeHtml(defaultDoc, '<input name="nextSibling" form="a"><form id="a"></form>');
} catch (e) {
// depending on the browser, we might get an exception
}
});

it('should properly sanitize the content when `nodeName` is clobbered', () => {
const output = sanitizeHtml(defaultDoc, '<form><input name=nodeName></form>');
expect(output).toBe('');
});

// See
Expand Down

0 comments on commit c484002

Please sign in to comment.