Skip to content

Commit

Permalink
htmldoc: Add tests for concurrency issues with Document.Parse
Browse files Browse the repository at this point in the history
  • Loading branch information
wjdp committed Aug 28, 2017
1 parent 9522220 commit 6131581
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 7 deletions.
15 changes: 8 additions & 7 deletions htmldoc/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,18 @@ func (doc *Document) Init() {
}

// Parse : Ask Document to parse its HTML file. Returns quickly if this has
// already been done. Thread safe.
// already been done. Thread safe. Either called when the document is tested
// or when another document needs data from this one.
func (doc *Document) Parse() {
// Parse the document
// Either called when the document is tested or when another document needs
// data from this one.
doc.htmlMutex.Lock() // MUTEX
// Only one routine may parse the doc
doc.htmlMutex.Lock()
defer doc.htmlMutex.Unlock()

// If document has already been parsed, return early.
if doc.htmlNode != nil {
doc.htmlMutex.Unlock() // MUTEX
return
}

// Open, parse, and close document
f, err := os.Open(doc.FilePath)
output.CheckErrorPanic(err)
Expand All @@ -59,7 +61,6 @@ func (doc *Document) Parse() {

doc.htmlNode = htmlNode
doc.parseNode(htmlNode)
doc.htmlMutex.Unlock() // MUTEX
}

// Internal recursive function that delves into the node tree and captures
Expand Down
38 changes: 38 additions & 0 deletions htmldoc/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package htmldoc
import (
"github.com/daviddengcn/go-assert"
"testing"
"sync"
)

func TestDocumentParse(t *testing.T) {
Expand All @@ -16,6 +17,43 @@ func TestDocumentParse(t *testing.T) {
assert.Equals(t, "document first body node", nodeElem.Data, "h1")
}

func TestDocumentParseOnce(t *testing.T) {
// Document.Parse should only parse once, subsequent calls should return quickly
doc := Document{
FilePath: "fixtures/documents/index.html",
}
doc.Init()
doc.Parse()
// Store copy of htmlNode
hN := doc.htmlNode
doc.Parse()
// and assert it's the same one
assert.Equals(t, "htmlNode", doc.htmlNode, hN)
}

func TestDocumentParseOnceConcurrent(t *testing.T) {
// Document.Parse should be thread safe
doc := Document{
FilePath: "fixtures/documents/index.html",
}
doc.Init()
// Parse many times
wg := sync.WaitGroup{}
for i := 0; i < 320; i++ {
wg.Add(1)
go func() {
defer wg.Done()
doc.Parse()
}()
}
// Wait until all jobs done
wg.Wait()
// Assert we have something sensible by the end of this
nodeElem := doc.htmlNode.FirstChild.FirstChild.NextSibling.FirstChild
assert.Equals(t, "document first body node", nodeElem.Data, "h1")
}


func TestDocumentNodesOfInterest(t *testing.T) {
doc := Document{
FilePath: "fixtures/documents/nodes.htm",
Expand Down

1 comment on commit 6131581

@wjdp
Copy link
Owner Author

@wjdp wjdp commented on 6131581 Aug 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re #59

Please sign in to comment.