From 03e5cd09fe1e3f4d0b73dcdee47797e42e430e05 Mon Sep 17 00:00:00 2001 From: Andy C Date: Mon, 13 Jan 2025 14:13:46 -0500 Subject: [PATCH] [lazylex/html] Start conversion to XML Can correct > to & This will help refine the TagLexer and AttrValueLexer APIs as well. Although it's looking like significant work too. --- lazylex/html.py | 56 +++++++++++++++++++++++++++++++++++++++++--- lazylex/html_test.py | 16 +++++++------ 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/lazylex/html.py b/lazylex/html.py index 09251037c..12e7ed597 100755 --- a/lazylex/html.py +++ b/lazylex/html.py @@ -919,10 +919,10 @@ def Validate(contents, flags, counters): counters.num_tokens += len(tokens) -def ToXml(h): +def ToXml(htm8_str): # type: (str) -> str - # TODO: + # TODO: # 1. Lex it # 2. < & > must be escaped # a. in raw data @@ -930,7 +930,57 @@ def ToXml(h): # 3.