diff --git a/htmldoc/document.go b/htmldoc/document.go
index 5b77f35..98de6d5 100644
--- a/htmldoc/document.go
+++ b/htmldoc/document.go
@@ -39,16 +39,18 @@ func (doc *Document) Init() {
}
// Parse : Ask Document to parse its HTML file. Returns quickly if this has
-// already been done. Thread safe.
+// already been done. Thread safe. Either called when the document is tested
+// or when another document needs data from this one.
func (doc *Document) Parse() {
- // Parse the document
- // Either called when the document is tested or when another document needs
- // data from this one.
- doc.htmlMutex.Lock() // MUTEX
+ // Only one routine may parse the doc
+ doc.htmlMutex.Lock()
+ defer doc.htmlMutex.Unlock()
+
+ // If document has already been parsed, return early.
if doc.htmlNode != nil {
- doc.htmlMutex.Unlock() // MUTEX
return
}
+
// Open, parse, and close document
f, err := os.Open(doc.FilePath)
output.CheckErrorPanic(err)
@@ -59,7 +61,6 @@ func (doc *Document) Parse() {
doc.htmlNode = htmlNode
doc.parseNode(htmlNode)
- doc.htmlMutex.Unlock() // MUTEX
}
// Internal recursive function that delves into the node tree and captures
diff --git a/htmldoc/document_test.go b/htmldoc/document_test.go
index 1aed8b6..bc9ebb4 100644
--- a/htmldoc/document_test.go
+++ b/htmldoc/document_test.go
@@ -3,6 +3,7 @@ package htmldoc
import (
"github.com/daviddengcn/go-assert"
"testing"
+ "sync"
)
func TestDocumentParse(t *testing.T) {
@@ -16,6 +17,43 @@ func TestDocumentParse(t *testing.T) {
assert.Equals(t, "document first body node", nodeElem.Data, "h1")
}
+func TestDocumentParseOnce(t *testing.T) {
+ // Document.Parse should only parse once, subsequent calls should return quickly
+ doc := Document{
+ FilePath: "fixtures/documents/index.html",
+ }
+ doc.Init()
+ doc.Parse()
+ // Store copy of htmlNode
+ hN := doc.htmlNode
+ doc.Parse()
+ // and assert it's the same one
+ assert.Equals(t, "htmlNode", doc.htmlNode, hN)
+}
+
+func TestDocumentParseOnceConcurrent(t *testing.T) {
+ // Document.Parse should be thread safe
+ doc := Document{
+ FilePath: "fixtures/documents/index.html",
+ }
+ doc.Init()
+ // Parse many times
+ wg := sync.WaitGroup{}
+ for i := 0; i < 320; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ doc.Parse()
+ }()
+ }
+ // Wait until all jobs done
+ wg.Wait()
+ // Assert we have something sensible by the end of this
+ nodeElem := doc.htmlNode.FirstChild.FirstChild.NextSibling.FirstChild
+ assert.Equals(t, "document first body node", nodeElem.Data, "h1")
+}
+
+
func TestDocumentNodesOfInterest(t *testing.T) {
doc := Document{
FilePath: "fixtures/documents/nodes.htm",