Skip to content

Commit

Permalink
publisher: Skip script, pre and textarea content when looking for HTM…
Browse files Browse the repository at this point in the history
…L elements

Updates #7567
  • Loading branch information
bep committed Apr 6, 2021
1 parent 7b4ade5 commit 8a30894
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 33 deletions.
87 changes: 55 additions & 32 deletions publisher/htmlElementsCollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ type cssClassCollectorWriter struct {
buff bytes.Buffer

isCollecting bool
dropValue bool
inPreTag string

inQuote bool
quoteValue byte
Expand All @@ -90,56 +90,70 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
b := p[i]
w.toggleIfQuote(b)
if !w.inQuote && b == '>' {
w.endCollecting(false)
w.endCollecting()
break
}
w.buff.WriteByte(b)
}

if !w.isCollecting {
if w.dropValue {
w.buff.Reset()
} else {
// First check if we have processed this element before.
w.collector.mu.RLock()

// See https://github.com/dominikh/go-tools/issues/723
//lint:ignore S1030 This construct avoids memory allocation for the string.
seen := w.collector.elementSet[string(w.buff.Bytes())]
w.collector.mu.RUnlock()
if seen {
w.buff.Reset()
continue
if w.inPreTag != "" {
s := w.buff.String()
if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName {
w.inPreTag = ""
}
w.buff.Reset()
continue
}

s := w.buff.String()
// First check if we have processed this element before.
w.collector.mu.RLock()

// See https://github.com/dominikh/go-tools/issues/723
//lint:ignore S1030 This construct avoids memory allocation for the string.
seen := w.collector.elementSet[string(w.buff.Bytes())]
w.collector.mu.RUnlock()
if seen {
w.buff.Reset()
continue
}

if strings.HasPrefix(s, "</") {
continue
}
s := w.buff.String()

key := s
w.buff.Reset()

s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName
if strings.HasPrefix(s, "</") {
continue
}

w.collector.mu.Lock()
w.collector.elementSet[key] = true
if el.Tag != "" {
w.collector.elements = append(w.collector.elements, el)
}
w.collector.mu.Unlock()
key := s

s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName
if w.isPreFormatted(tagName) {
w.inPreTag = tagName
}

w.collector.mu.Lock()
w.collector.elementSet[key] = true
if el.Tag != "" {
w.collector.elements = append(w.collector.elements, el)
}
w.collector.mu.Unlock()

}
}
}

return
}

// No need to look inside these for HTML elements.
func (c *cssClassCollectorWriter) isPreFormatted(s string) bool {
return s == "pre" || s == "textarea" || s == "script"
}

// The net/html parser does not handle single table elements as input, e.g. tbody.
// We only care about the element/class/ids, so just store away the original tag name
// and pretend it's a <div>.
Expand All @@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s
return newv, strings.ToLower(tag)
}

func (c *cssClassCollectorWriter) endCollecting(drop bool) {
func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) {
if !strings.HasPrefix(s, "</") {
return "", false
}
s = strings.TrimPrefix(s, "</")
s = strings.TrimSuffix(s, ">")
return strings.ToLower(strings.TrimSpace(s)), true
}

func (c *cssClassCollectorWriter) endCollecting() {
c.isCollecting = false
c.inQuote = false
c.dropValue = drop

}

func (c *cssClassCollectorWriter) startCollecting() {
c.isCollecting = true
c.dropValue = false

}

func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
Expand Down
6 changes: 5 additions & 1 deletion publisher/htmlElementsCollector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) {

{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
// https://github.com/gohugoio/hugo/issues/7746
// Issue #7746
{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
// Issue #7567
{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
{"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
} {
c.Run(test.name, func(c *qt.C) {
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
Expand Down

0 comments on commit 8a30894

Please sign in to comment.