diff --git a/htmldoc/document.go b/htmldoc/document.go index 5b77f35..98de6d5 100644 --- a/htmldoc/document.go +++ b/htmldoc/document.go @@ -39,16 +39,18 @@ func (doc *Document) Init() { } // Parse : Ask Document to parse its HTML file. Returns quickly if this has -// already been done. Thread safe. +// already been done. Thread safe. Either called when the document is tested +// or when another document needs data from this one. func (doc *Document) Parse() { - // Parse the document - // Either called when the document is tested or when another document needs - // data from this one. - doc.htmlMutex.Lock() // MUTEX + // Only one routine may parse the doc + doc.htmlMutex.Lock() + defer doc.htmlMutex.Unlock() + + // If document has already been parsed, return early. if doc.htmlNode != nil { - doc.htmlMutex.Unlock() // MUTEX return } + // Open, parse, and close document f, err := os.Open(doc.FilePath) output.CheckErrorPanic(err) @@ -59,7 +61,6 @@ func (doc *Document) Parse() { doc.htmlNode = htmlNode doc.parseNode(htmlNode) - doc.htmlMutex.Unlock() // MUTEX } // Internal recursive function that delves into the node tree and captures diff --git a/htmldoc/document_test.go b/htmldoc/document_test.go index 1aed8b6..bc9ebb4 100644 --- a/htmldoc/document_test.go +++ b/htmldoc/document_test.go @@ -3,6 +3,7 @@ package htmldoc import ( "github.com/daviddengcn/go-assert" "testing" + "sync" ) func TestDocumentParse(t *testing.T) { @@ -16,6 +17,43 @@ func TestDocumentParse(t *testing.T) { assert.Equals(t, "document first body node", nodeElem.Data, "h1") } +func TestDocumentParseOnce(t *testing.T) { + // Document.Parse should only parse once, subsequent calls should return quickly + doc := Document{ + FilePath: "fixtures/documents/index.html", + } + doc.Init() + doc.Parse() + // Store copy of htmlNode + hN := doc.htmlNode + doc.Parse() + // and assert it's the same one + assert.Equals(t, "htmlNode", doc.htmlNode, hN) +} + +func TestDocumentParseOnceConcurrent(t *testing.T) { + // Document.Parse should be thread safe + doc := Document{ + FilePath: "fixtures/documents/index.html", + } + doc.Init() + // Parse many times + wg := sync.WaitGroup{} + for i := 0; i < 320; i++ { + wg.Add(1) + go func() { + defer wg.Done() + doc.Parse() + }() + } + // Wait until all jobs done + wg.Wait() + // Assert we have something sensible by the end of this + nodeElem := doc.htmlNode.FirstChild.FirstChild.NextSibling.FirstChild + assert.Equals(t, "document first body node", nodeElem.Data, "h1") +} + + func TestDocumentNodesOfInterest(t *testing.T) { doc := Document{ FilePath: "fixtures/documents/nodes.htm",