diff --git a/README.md b/README.md index 5e217b3..1b62e16 100644 --- a/README.md +++ b/README.md @@ -44,13 +44,12 @@ Options: Many options of the following tests can customised. Items marked :soon: are not checked yet, but will be *soon*. - `a` `link` `img` `script`: Whether internal links work / are valid. -- `a`: :soon: Whether internal hashes work. +- `a`: Whether internal hashes work. - `a` `link` `img` `script`: Whether external links work. - `a`: :soon: Whether external hashes work. - `a` `link`: Whether external links use HTTPS. -- `a` `link`: Whether external links use HTTPS. - `img`: Whether your images have valid alt attributes. -- `meta`: :soon: Whether favicons are valid. +- `link`: Whether pages have a valid favicon. - `meta`: :soon: Whether images and URLs in the OpenGraph metadata are valid. - `meta` `title`: :soon: Whether you've got the [recommended tags](https://support.google.com/webmasters/answer/79812?hl=en) in your head. @@ -90,12 +89,13 @@ htmltest uses a YAML configuration file. Put `.htmltest.yml` in the same directo | `CheckScripts` | Enables checking `" + nodeDoc, _ := html.Parse(strings.NewReader(snip)) + nodeImg := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild + attrs := ExtractAttrs(nodeImg.Attr, []string{"src", "alt"}) + + assert.Equals(t, "src", attrs["src"], "x") + assert.Equals(t, "alt", attrs["alt"], "y") + assert.NotEquals(t, "foo", attrs["foo"], "bar") +} + +func TestAttrPresent(t *testing.T) { + snip := "\"y\"" + nodeDoc, _ := html.Parse(strings.NewReader(snip)) + nodeImg := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild + + assert.Equals(t, "src in attr", AttrPresent(nodeImg.Attr, "src"), true) + assert.Equals(t, "alt in attr", AttrPresent(nodeImg.Attr, "src"), true) + assert.NotEquals(t, "foo in attr", AttrPresent(nodeImg.Attr, "src"), false) +} + +func TestAttrValIdId(t *testing.T) { + snip := "

" + nodeDoc, _ := html.Parse(strings.NewReader(snip)) + nodeH1 := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild + + assert.Equals(t, "h1 id", GetId(nodeH1.Attr), "x") +} + +func TestAttrValIdName(t *testing.T) { + snip := "

" + nodeDoc, _ := html.Parse(strings.NewReader(snip)) + nodeH1 := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild + + assert.Equals(t, "h1 name", GetId(nodeH1.Attr), "x") +} diff --git a/htmldoc/document.go b/htmldoc/document.go index 57cbff4..37862a9 100644 --- a/htmldoc/document.go +++ b/htmldoc/document.go @@ -3,16 +3,18 @@ package htmldoc import ( "golang.org/x/net/html" "os" - "path" - "regexp" + "sync" ) type Document struct { - FilePath string // Relative to the shell session - SitePath string // Relative to the site root - Directory string - HTMLNode *html.Node - State DocumentState + FilePath string // Relative to the shell session + SitePath string // Relative to the site root + Directory string + htmlMutex *sync.Mutex + htmlNode *html.Node + hashMap map[string]*html.Node + NodesOfInterest []*html.Node + State DocumentState } // Used by checks that depend on the document being parsed @@ -20,7 +22,23 @@ type DocumentState struct { FaviconPresent bool } +func (doc *Document) Init() { + // Setup the document, doesn't mesh nice with the NewXYZ() convention but + // many optional parameters for Document and no parameter overloading in Go + doc.htmlMutex = &sync.Mutex{} + doc.NodesOfInterest = make([]*html.Node, 0) + doc.hashMap = make(map[string]*html.Node) +} + func (doc *Document) Parse() { + // Parse the document + // Either called when the document is tested or when another document needs + // data from this one. + doc.htmlMutex.Lock() // MUTEX + if doc.htmlNode != nil { + doc.htmlMutex.Unlock() // MUTEX + return + } // Open, parse, and close document f, err := os.Open(doc.FilePath) checkErr(err) @@ -29,73 +47,34 @@ func (doc *Document) Parse() { htmlNode, err := html.Parse(f) checkErr(err) - doc.HTMLNode = htmlNode -} - -func DocumentsFromDir(path string, ignorePatterns []interface{}) []Document { - // Nice proxy for recurseDir - return recurseDir(path, ignorePatterns, "") + doc.htmlNode = htmlNode + doc.parseNode(htmlNode) + doc.htmlMutex.Unlock() // MUTEX } -func recurseDir(basePath string, ignorePatterns []interface{}, dPath string) []Document { - // Recursive function that returns all Document struts in a given - // os directory. - // basePath: the directory to scan - // dPath: the subdirectory within basePath we're scanning - // ignorePatterns: string slice of dPaths to ignore - - documents := make([]Document, 0) - - if isDirIgnored(ignorePatterns, dPath) { - return documents - } - - // Open directory to scan - f, err := os.Open(path.Join(basePath, dPath)) - checkErr(err) - defer f.Close() - - // Get FileInfo of directory (scan it) - fi, err := f.Stat() - checkErr(err) - - if fi.IsDir() { // Double check we're dealing with a directory - // Read all FileInfo-s from directory, Readdir(count int) - fis, err := f.Readdir(-1) - checkErr(err) - - // Iterate over contents of directory - for _, fileinfo := range fis { - fPath := path.Join(dPath, fileinfo.Name()) - if fileinfo.IsDir() { - // If item is a dir, we need to iterate further, save returned documents - documents = append(documents, recurseDir(basePath, ignorePatterns, fPath)...) - } else if path.Ext(fileinfo.Name()) == ".html" || path.Ext(fileinfo.Name()) == ".htm" { - // If a file, save to filename list - documents = append(documents, Document{ - FilePath: path.Join(basePath, fPath), - SitePath: fPath, - Directory: dPath, - }) - } +func (doc *Document) parseNode(n *html.Node) { + if n.Type == html.ElementNode { + // If present save fragment identifier to the hashMap + nodeId := GetId(n.Attr) + if nodeId != "" { + doc.hashMap[nodeId] = n + } + // Identify and store tags of interest + switch n.Data { + case "a", "link", "img", "script": + doc.NodesOfInterest = append(doc.NodesOfInterest, n) + case "pre", "code": + return // Everything within these elements is not to be interpreted } - } else { // It's a file, return single file - filePath := path.Join(basePath, dPath) - documents = append(documents, Document{ - FilePath: filePath, - SitePath: path.Base(filePath), - Directory: dPath, - }) } - - return documents + // Iterate over children + for c := n.FirstChild; c != nil; c = c.NextSibling { + doc.parseNode(c) + } } -func isDirIgnored(ignorePatterns []interface{}, dir string) bool { - for _, item := range ignorePatterns { - if ok, _ := regexp.MatchString(item.(string), dir+"/"); ok { - return true - } - } - return false +func (doc *Document) IsHashValid(hash string) bool { + doc.Parse() // Ensure doc has been parsed + _, ok := doc.hashMap[hash] + return ok } diff --git a/htmldoc/document_store.go b/htmldoc/document_store.go new file mode 100644 index 0000000..441de67 --- /dev/null +++ b/htmldoc/document_store.go @@ -0,0 +1,117 @@ +package htmldoc + +import ( + "os" + "path" + "regexp" +) + +type DocumentStore struct { + BasePath string // Path, relative to cwd, the site is located in + IgnorePatterns []interface{} // Regexes of directories to ignore + Documents []*Document // All of the documents, used to iterate over + DocumentPathMap map[string]*Document // Maps slash separated paths to documents + DocumentExtension string // File extension to look for + DirectoryIndex string // What file is the index of the directory +} + +func NewDocumentStore() DocumentStore { + return DocumentStore{ + Documents: make([]*Document, 0), + DocumentPathMap: make(map[string]*Document), + } +} + +func (dS *DocumentStore) AddDocument(doc *Document) { + // Save reference to document to various data stores + dS.Documents = append(dS.Documents, doc) + dS.DocumentPathMap[doc.SitePath] = doc +} + +func (dS *DocumentStore) Discover() { + // Find all documents in BasePath + dS.discoverRecurse(".") +} + +func (dS *DocumentStore) isDirIgnored(dir string) bool { + // Does path dir match IgnorePatterns? + for _, item := range dS.IgnorePatterns { + if ok, _ := regexp.MatchString(item.(string), dir+"/"); ok { + return true + } + } + return false +} + +func (dS *DocumentStore) discoverRecurse(dPath string) { + // Recurse over relative path dPath, saves found documents to dS + if dS.isDirIgnored(dPath) { + return + } + + // Open directory to scan + f, err := os.Open(path.Join(dS.BasePath, dPath)) + checkErr(err) + defer f.Close() + + // Get FileInfo of directory (scan it) + fi, err := f.Stat() + checkErr(err) + + if fi.IsDir() { // Double check we're dealing with a directory + // Read all FileInfo-s from directory, Readdir(count int) + fis, err := f.Readdir(-1) + checkErr(err) + + // Iterate over contents of directory + for _, fileinfo := range fis { + fPath := path.Join(dPath, fileinfo.Name()) + if fileinfo.IsDir() { + // If item is a dir, we delve deeper + dS.discoverRecurse(fPath) + } else if path.Ext(fileinfo.Name()) == dS.DocumentExtension { + // If a file, create and save document + newDoc := &Document{ + FilePath: path.Join(dS.BasePath, fPath), + SitePath: fPath, + Directory: dPath, + } + newDoc.Init() + dS.AddDocument(newDoc) + } + } + } else { // It's a file, return single file + panic("discoverRecurse encountered a file: " + dPath) + } + +} + +func (dS *DocumentStore) ResolvePath(refPath string) (*Document, bool) { + // Resolves internal absolute paths to documents + + // Match root document + if refPath == "/" { + d0, b0 := dS.DocumentPathMap[dS.DirectoryIndex] + return d0, b0 + } + + if refPath[0] == '/' && len(refPath) > 1 { + // Is an absolute link, remove the leading slash for map lookup + refPath = refPath[1:len(refPath)] + } + + // Try path as-is, path.ext + d1, b1 := dS.DocumentPathMap[refPath] + if b1 { + // as-is worked, return that + return d1, b1 + } + + // Try as a directory, path.ext/index.html + d2, b2 := dS.DocumentPathMap[path.Join(refPath, dS.DirectoryIndex)] + return d2, b2 +} + +func (dS *DocumentStore) ResolveRef(ref *Reference) (*Document, bool) { + return dS.ResolvePath(ref.RefSitePath()) +} diff --git a/htmldoc/document_store_test.go b/htmldoc/document_store_test.go new file mode 100644 index 0000000..c96e080 --- /dev/null +++ b/htmldoc/document_store_test.go @@ -0,0 +1,77 @@ +package htmldoc + +import ( + "github.com/daviddengcn/go-assert" + "testing" +) + +func TestDocumentStoreDiscover(t *testing.T) { + // documentstore can scan an os directory + dS := NewDocumentStore() + dS.BasePath = "fixtures/documents" + dS.DocumentExtension = ".html" // Ignores .htm + dS.DirectoryIndex = "index.html" + dS.Discover() + // Fixtures dir has eight documents in various folders + assert.Equals(t, "document count", len(dS.Documents), 6) +} + +func TestDocumentStoreIgnorePatterns(t *testing.T) { + // documentstore can scan an os directory + dS := NewDocumentStore() + dS.BasePath = "fixtures/documents" + dS.DocumentExtension = ".html" // Ignores .htm + dS.DirectoryIndex = "index.html" + dS.IgnorePatterns = []interface{}{"^lib/"} + dS.Discover() + // Fixtures dir has seven documents in various folders, (one ignored in lib) + assert.Equals(t, "document count", len(dS.Documents), 5) +} + +func TestDocumentStoreDocumentExists(t *testing.T) { + // documentstore knows if documents exist or not + dS := NewDocumentStore() + dS.BasePath = "fixtures/documents" + dS.DocumentExtension = ".html" + dS.DirectoryIndex = "index.html" + dS.Discover() + _, b1 := dS.DocumentPathMap["index.html"] + assert.IsTrue(t, "index.html exists", b1) + _, b2 := dS.DocumentPathMap["dir2/index.html"] + assert.IsTrue(t, "dir2/index.html exists", b2) + _, b3 := dS.DocumentPathMap["foo.html"] + assert.IsFalse(t, "foo.html does not exist", b3) + _, b4 := dS.DocumentPathMap["dir3/index.html"] + assert.IsFalse(t, "dir3/index.html does not exist", b4) +} + +func TestDocumentStoreDocumentResolve(t *testing.T) { + // documentstore correctly resolves documents + dS := NewDocumentStore() + dS.BasePath = "fixtures/documents" + dS.DocumentExtension = ".html" + dS.DirectoryIndex = "index.html" + dS.Discover() + d0, b0 := dS.ResolvePath("/") + assert.IsTrue(t, "root document exists", b0) + assert.Equals(t, "/ resolves to index.html", + d0.FilePath, "fixtures/documents/index.html") + d1, b1 := dS.ResolvePath("/contact.html") + assert.IsTrue(t, "/contact.html exists", b1) + assert.Equals(t, "/contact.html resolves to correct document", + d1.FilePath, "fixtures/documents/contact.html") + d2, b2 := dS.ResolvePath("dir2/index.html") + assert.IsTrue(t, "dir2/index.html exists", b2) + assert.Equals(t, "dir2/index.html resolves to correct document", + d2.FilePath, "fixtures/documents/dir2/index.html") + d3, b3 := dS.ResolvePath("dir2/") + assert.IsTrue(t, "dir2/index.html exists", b3) + assert.Equals(t, "dir2/index.html resolves to correct document", + d3.FilePath, "fixtures/documents/dir2/index.html") + d4, b4 := dS.ResolvePath("dir2") + assert.IsTrue(t, "dir2/index.html exists", b4) + assert.Equals(t, "dir2/index.html resolves to correct document", + d4.FilePath, "fixtures/documents/dir2/index.html") + _, b5 := dS.ResolvePath("does-not-exist") + assert.IsFalse(t, "does not return doc for invalid path", b5) +} diff --git a/htmldoc/document_test.go b/htmldoc/document_test.go index c0fd9d8..0e0420d 100644 --- a/htmldoc/document_test.go +++ b/htmldoc/document_test.go @@ -10,14 +10,30 @@ func TestDocumentParse(t *testing.T) { doc := Document{ FilePath: "fixtures/documents/index.html", } + doc.Init() doc.Parse() - nodeElem := doc.HTMLNode.FirstChild.FirstChild.NextSibling.FirstChild + nodeElem := doc.htmlNode.FirstChild.FirstChild.NextSibling.FirstChild assert.Equals(t, "document first body node", nodeElem.Data, "h1") } -func TestDocumentsFromDir(t *testing.T) { - // it creates Document struts from an os directory - docs := DocumentsFromDir("fixtures/documents", []interface{}{"^lib/"}) - // Fixtures dir has seven documents in various folders - assert.Equals(t, "document count", len(docs), 7) +func TestDocumentNodesOfInterest(t *testing.T) { + doc := Document{ + FilePath: "fixtures/documents/nodes.htm", + } + doc.Init() + doc.Parse() + assert.Equals(t, "nodes of interest", len(doc.NodesOfInterest), 4) +} + +func TestDocumentIsHashValid(t *testing.T) { + // parse a document and check we have valid nodes + doc := Document{ + FilePath: "fixtures/documents/index.html", + } + doc.Init() + doc.Parse() + + assert.IsTrue(t, "#xyz present", doc.IsHashValid("xyz")) + assert.IsTrue(t, "#prq present", doc.IsHashValid("prq")) + assert.IsFalse(t, "#abc present", doc.IsHashValid("abc")) } diff --git a/htmldoc/fixtures/conf.json b/htmldoc/fixtures/conf.json deleted file mode 100644 index 3f17ad0..0000000 --- a/htmldoc/fixtures/conf.json +++ /dev/null @@ -1 +0,0 @@ -DirectoryPath: "htmldoc/fixtures/documents" diff --git a/htmldoc/fixtures/documents/index.html b/htmldoc/fixtures/documents/index.html index afc2780..2fc8836 100644 --- a/htmldoc/fixtures/documents/index.html +++ b/htmldoc/fixtures/documents/index.html @@ -1 +1,9 @@

INDEX

+ +

Lorem ipsum

+ +

Another para

+ +

Hello

+ +
diff --git a/htmldoc/fixtures/documents/page.htm b/htmldoc/fixtures/documents/lib/lib.js similarity index 100% rename from htmldoc/fixtures/documents/page.htm rename to htmldoc/fixtures/documents/lib/lib.js diff --git a/htmldoc/fixtures/documents/nodes.htm b/htmldoc/fixtures/documents/nodes.htm new file mode 100644 index 0000000..9dcc7a1 --- /dev/null +++ b/htmldoc/fixtures/documents/nodes.htm @@ -0,0 +1,16 @@ + + + +foobar +
+    
+    
+    
+    foobar
+
+ + + + + foobar + diff --git a/htmldoc/reference.go b/htmldoc/reference.go index 7b74911..196866e 100644 --- a/htmldoc/reference.go +++ b/htmldoc/reference.go @@ -73,11 +73,7 @@ func (ref *Reference) IsInternalAbsolute() bool { return !strings.HasPrefix(ref.Path, "//") && strings.HasPrefix(ref.Path, "/") } -func (ref *Reference) AbsolutePath() string { - // If external return unchanged - if ref.Scheme() != "file" { - return ref.URL.Path - } +func (ref *Reference) RefSitePath() string { // If internal, return a path to the referenced file relative to the 'site root' // Strip shit off the end? if ref.IsInternalAbsolute() { diff --git a/htmldoc/reference_test.go b/htmldoc/reference_test.go index 3fad37d..3e527b6 100644 --- a/htmldoc/reference_test.go +++ b/htmldoc/reference_test.go @@ -19,7 +19,7 @@ func TestReferenceScheme(t *testing.T) { doc := Document{ SitePath: "doc.html", - HTMLNode: nodeDoc, + htmlNode: nodeDoc, } var ref *Reference @@ -28,6 +28,8 @@ func TestReferenceScheme(t *testing.T) { assert.Equals(t, "http reference", ref.Scheme(), "http") ref = NewReference(&doc, nodeElem, "https://test.com") assert.Equals(t, "https reference", ref.Scheme(), "https") + ref = NewReference(&doc, nodeElem, "//test.com") + assert.Equals(t, "https reference", ref.Scheme(), "https") ref = NewReference(&doc, nodeElem, "https://photos.smugmug.com/photos/i-CNHsHLM/0/440x622/i-CNHsHLM-440x622.jpg") assert.Equals(t, "http reference", ref.Scheme(), "https") @@ -53,13 +55,19 @@ func TestReferenceURLString(t *testing.T) { doc := Document{ SitePath: "doc.html", - HTMLNode: nodeDoc, + htmlNode: nodeDoc, } var ref *Reference - ref = NewReference(&doc, nodeElem, "google.com") - assert.Equals(t, "URLString", ref.URLString(), "google.com") + ref = NewReference(&doc, nodeElem, "http://example.com") + assert.Equals(t, "URLString", ref.URLString(), "http://example.com") + ref = NewReference(&doc, nodeElem, "http://example.com/") + assert.Equals(t, "URLString", ref.URLString(), "http://example.com/") + ref = NewReference(&doc, nodeElem, "https://example.com") + assert.Equals(t, "URLString", ref.URLString(), "https://example.com") + ref = NewReference(&doc, nodeElem, "//example.com") + assert.Equals(t, "URLString", ref.URLString(), "https://example.com") } @@ -69,7 +77,7 @@ func TestReferenceIsInternalAbsolute(t *testing.T) { doc := Document{ SitePath: "doc.html", - HTMLNode: nodeDoc, + htmlNode: nodeDoc, } var ref *Reference @@ -93,21 +101,21 @@ func TestReferenceAbsolutePath(t *testing.T) { doc := Document{ SitePath: "doc.html", Directory: "directory/subdir", - HTMLNode: nodeDoc, + htmlNode: nodeDoc, } var ref *Reference ref = NewReference(&doc, nodeElem, "/abc/page.html") - assert.Equals(t, "internal absolute reference", ref.AbsolutePath(), "/abc/page.html") + assert.Equals(t, "internal absolute reference", ref.RefSitePath(), "/abc/page.html") ref = NewReference(&doc, nodeElem, "/yyz") - assert.Equals(t, "internal absolute reference", ref.AbsolutePath(), "/yyz") + assert.Equals(t, "internal absolute reference", ref.RefSitePath(), "/yyz") ref = NewReference(&doc, nodeElem, "zzy") - assert.Equals(t, "internal relative reference", ref.AbsolutePath(), "directory/subdir/zzy") + assert.Equals(t, "internal relative reference", ref.RefSitePath(), "directory/subdir/zzy") ref = NewReference(&doc, nodeElem, "zzy/uup.jjr") - assert.Equals(t, "internal relative reference", ref.AbsolutePath(), "directory/subdir/zzy/uup.jjr") + assert.Equals(t, "internal relative reference", ref.RefSitePath(), "directory/subdir/zzy/uup.jjr") ref = NewReference(&doc, nodeElem, "./zzy/uup.jjr") - assert.Equals(t, "internal relative reference", ref.AbsolutePath(), "directory/subdir/zzy/uup.jjr") + assert.Equals(t, "internal relative reference", ref.RefSitePath(), "directory/subdir/zzy/uup.jjr") } func TestURLStripQueryString(t *testing.T) { diff --git a/htmltest/attr_test.go b/htmltest/attr_test.go deleted file mode 100644 index a61a193..0000000 --- a/htmltest/attr_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package htmltest - -import ( - "github.com/daviddengcn/go-assert" - "golang.org/x/net/html" - "strings" - "testing" -) - -func TestExtractAttrs(t *testing.T) { - snip := "\"y\"" - nodeDoc, _ := html.Parse(strings.NewReader(snip)) - nodeImg := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild - attrs := extractAttrs(nodeImg.Attr, []string{"src", "alt"}) - - assert.Equals(t, "src", attrs["src"], "x") - assert.Equals(t, "alt", attrs["alt"], "y") - assert.NotEquals(t, "foo", attrs["foo"], "bar") -} - -func TestAttrPresent(t *testing.T) { - snip := "\"y\"" - nodeDoc, _ := html.Parse(strings.NewReader(snip)) - nodeImg := nodeDoc.FirstChild.FirstChild.NextSibling.FirstChild - - assert.Equals(t, "src in attr", attrPresent(nodeImg.Attr, "src"), true) - assert.Equals(t, "alt in attr", attrPresent(nodeImg.Attr, "src"), true) - assert.NotEquals(t, "foo in attr", attrPresent(nodeImg.Attr, "src"), false) -} diff --git a/htmltest/check-img.go b/htmltest/check-img.go index d81338e..02e7281 100644 --- a/htmltest/check-img.go +++ b/htmltest/check-img.go @@ -8,11 +8,11 @@ import ( ) func (hT *HtmlTest) checkImg(document *htmldoc.Document, node *html.Node) { - attrs := extractAttrs(node.Attr, + attrs := htmldoc.ExtractAttrs(node.Attr, []string{"src", "alt", hT.opts.IgnoreTagAttribute}) // Ignore if data-proofer-ignore set - if attrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { + if htmldoc.AttrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { return } @@ -20,13 +20,13 @@ func (hT *HtmlTest) checkImg(document *htmldoc.Document, node *html.Node) { ref := htmldoc.NewReference(document, node, attrs["src"]) // Check alt present, fail if absent unless asked to ignore - if !attrPresent(node.Attr, "alt") && !hT.opts.IgnoreAltMissing { + if !htmldoc.AttrPresent(node.Attr, "alt") && !hT.opts.IgnoreAltMissing { hT.issueStore.AddIssue(issues.Issue{ Level: issues.ERROR, Message: "alt attribute missing", Reference: ref, }) - } else if attrPresent(node.Attr, "alt") { + } else if htmldoc.AttrPresent(node.Attr, "alt") { // Following checks require alt attr is present if len(attrs["alt"]) == 0 { // Check alt has length, fail if empty @@ -47,7 +47,7 @@ func (hT *HtmlTest) checkImg(document *htmldoc.Document, node *html.Node) { } // Check src present, fail if absent - if !attrPresent(node.Attr, "src") { + if !htmldoc.AttrPresent(node.Attr, "src") { hT.issueStore.AddIssue(issues.Issue{ Level: issues.ERROR, Message: "src attribute missing", diff --git a/htmltest/check-img_test.go b/htmltest/check-img_test.go index 3133847..5e44fb9 100644 --- a/htmltest/check-img_test.go +++ b/htmltest/check-img_test.go @@ -7,12 +7,14 @@ import ( func TestImageExternalWorking(t *testing.T) { // passes for existing external images + t_SkipShortExternal(t) hT := t_testFile("fixtures/images/existingImageExternal.html") t_expectIssueCount(t, hT, 0) } func TestImageExternalMissing(t *testing.T) { // fails for missing external images + t_SkipShortExternal(t) hT := t_testFile("fixtures/images/missingImageExternal.html") t_expectIssueCount(t, hT, 1) // Issue contains "no such host" @@ -21,12 +23,14 @@ func TestImageExternalMissing(t *testing.T) { func TestImageExternalMissingProtocolValid(t *testing.T) { // works for valid images missing the protocol + t_SkipShortExternal(t) hT := t_testFile("fixtures/images/image_missing_protocol_valid.html") t_expectIssueCount(t, hT, 0) } func TestImageExternalMissingProtocolInvalid(t *testing.T) { // fails for invalid images missing the protocol + t_SkipShortExternal(t) hT := t_testFile("fixtures/images/image_missing_protocol_invalid.html") t_expectIssueCount(t, hT, 1) // t_expectIssue(t, hT, message, 1) @@ -34,12 +38,14 @@ func TestImageExternalMissingProtocolInvalid(t *testing.T) { func TestImageExternalInsecureDefault(t *testing.T) { // passes for HTTP images by default + t_SkipShortExternal(t) hT := t_testFile("fixtures/images/src_http.html") t_expectIssueCount(t, hT, 0) } func TestImageExternalInsecureOption(t *testing.T) { // fails for HTTP images when asked + t_SkipShortExternal(t) hT := t_testFileOpts("fixtures/images/src_http.html", map[string]interface{}{"EnforceHTTPS": true}) t_expectIssueCount(t, hT, 1) @@ -107,6 +113,7 @@ func TestImageSrcEmpty(t *testing.T) { func TestImageSrcLineBreaks(t *testing.T) { // deals with linebreaks in src + t_SkipShortExternal(t) // TODO use internal images hT := t_testFile("fixtures/images/lineBreaks.html") t_expectIssueCount(t, hT, 0) } diff --git a/htmltest/check-link.go b/htmltest/check-link.go index a600a8a..f37841d 100644 --- a/htmltest/check-link.go +++ b/htmltest/check-link.go @@ -13,16 +13,16 @@ import ( ) func (hT *HtmlTest) checkLink(document *htmldoc.Document, node *html.Node) { - attrs := extractAttrs(node.Attr, + attrs := htmldoc.ExtractAttrs(node.Attr, []string{"href", "rel", hT.opts.IgnoreTagAttribute}) // Ignore if data-proofer-ignore set - if attrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { + if htmldoc.AttrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { return } // Check if favicon - if attrPresent(node.Attr, "rel") && + if htmldoc.AttrPresent(node.Attr, "rel") && (attrs["rel"] == "icon" || attrs["rel"] == "shortcut icon") && node.Parent.Data == "head" { document.State.FaviconPresent = true @@ -32,7 +32,7 @@ func (hT *HtmlTest) checkLink(document *htmldoc.Document, node *html.Node) { ref := htmldoc.NewReference(document, node, attrs["href"]) // Check for missing href, fail for link nodes - if !attrPresent(node.Attr, "href") { + if !htmldoc.AttrPresent(node.Attr, "href") { switch node.Data { case "a": hT.issueStore.AddIssue(issues.Issue{ @@ -86,6 +86,8 @@ func (hT *HtmlTest) checkLink(document *htmldoc.Document, node *html.Node) { hT.checkExternal(ref) case "file": hT.checkInternal(ref) + case "self": + hT.checkInternalHash(ref) case "mailto": hT.checkMailto(ref) case "tel": @@ -103,7 +105,7 @@ func (hT *HtmlTest) checkExternal(ref *htmldoc.Reference) { if !hT.opts.CheckExternal { hT.issueStore.AddIssue(issues.Issue{ Level: issues.DEBUG, - Message: "skipping", + Message: "skipping external check", Reference: ref, }) return @@ -208,7 +210,7 @@ func (hT *HtmlTest) checkExternal(ref *htmldoc.Reference) { Reference: ref, }) default: - attrs := extractAttrs(ref.Node.Attr, []string{"rel"}) + attrs := htmldoc.ExtractAttrs(ref.Node.Attr, []string{"rel"}) if attrs["rel"] == "canonical" && hT.opts.IgnoreCanonicalBrokenLinks { hT.issueStore.AddIssue(issues.Issue{ Level: issues.WARNING, @@ -231,41 +233,96 @@ func (hT *HtmlTest) checkInternal(ref *htmldoc.Reference) { if !hT.opts.CheckInternal { hT.issueStore.AddIssue(issues.Issue{ Level: issues.DEBUG, - Message: "skipping", + Message: "skipping internal check", Reference: ref, }) return } - // Resolve a filesystem path for reference - refOsPath := path.Join(hT.opts.DirectoryPath, ref.AbsolutePath()) - hT.checkFile(ref, refOsPath) + + // First lookup in document store, + refDoc, refExists := hT.documentStore.ResolveRef(ref) + + if refExists { + // If path doesn't end in slash and the resolved ref is an index.html, complain + if ref.URL.Path[len(ref.URL.Path)-1] != '/' && path.Base(refDoc.SitePath) == hT.opts.DirectoryIndex { + hT.issueStore.AddIssue(issues.Issue{ + Level: issues.ERROR, + Message: "target is a directory, href lacks trailing slash", + Reference: ref, + }) + } + } else { + // If that fails attempt to lookup with filesystem, resolve a path and check + refOsPath := path.Join(hT.opts.DirectoryPath, ref.RefSitePath()) + hT.checkFile(ref, refOsPath) + } + + if len(ref.URL.Fragment) > 0 { + // Is also a hash link + hT.checkInternalHash(ref) + } } -func (hT *HtmlTest) checkFile(ref *htmldoc.Reference, absPath string) { - f, err := os.Stat(absPath) - if os.IsNotExist(err) { +func (hT *HtmlTest) checkInternalHash(ref *htmldoc.Reference) { + if !hT.opts.CheckInternalHash { hT.issueStore.AddIssue(issues.Issue{ - Level: issues.ERROR, - Message: "target does not exist", + Level: issues.DEBUG, + Message: "skipping hash check", Reference: ref, }) return } - checkErr(err) // Crash on other errors - if f.IsDir() { - if !strings.HasSuffix(ref.URL.Path, "/") && !hT.opts.IgnoreDirectoryMissingTrailingSlash { + // var refDoc *htmldoc.Document + if len(ref.URL.Fragment) == 0 { + hT.issueStore.AddIssue(issues.Issue{ + Level: issues.ERROR, + Message: "missing hash", + Reference: ref, + }) + } + + if len(ref.URL.Path) > 0 { + // internal + refDoc, _ := hT.documentStore.ResolveRef(ref) + if !refDoc.IsHashValid(ref.URL.Fragment) { hT.issueStore.AddIssue(issues.Issue{ Level: issues.ERROR, - Message: "target is a directory, href lacks trailing slash", + Message: "hash does not exist", Reference: ref, }) - return } + } else { + // self + if !ref.Document.IsHashValid(ref.URL.Fragment) { + hT.issueStore.AddIssue(issues.Issue{ + Level: issues.ERROR, + Message: "hash does not exist", + Reference: ref, + }) + } + } +} - hT.checkFile(ref, path.Join(absPath, hT.opts.DirectoryIndex)) +func (hT *HtmlTest) checkFile(ref *htmldoc.Reference, absPath string) { + f, err := os.Stat(absPath) + if os.IsNotExist(err) { + hT.issueStore.AddIssue(issues.Issue{ + Level: issues.ERROR, + Message: "target does not exist", + Reference: ref, + }) return } + checkErr(err) // Crash on other errors + + if f.IsDir() { + hT.issueStore.AddIssue(issues.Issue{ + Level: issues.ERROR, + Message: "target is a directory, no index", + Reference: ref, + }) + } } func (hT *HtmlTest) checkMailto(ref *htmldoc.Reference) { diff --git a/htmltest/check-link_test.go b/htmltest/check-link_test.go index 2432acc..82bd474 100644 --- a/htmltest/check-link_test.go +++ b/htmltest/check-link_test.go @@ -22,6 +22,7 @@ func TestAnchorIgnorable(t *testing.T) { func TestAnchorExternalBroken(t *testing.T) { // fails for broken external links + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/brokenLinkExternal.html") t_expectIssueCount(t, hT, 1) } @@ -35,6 +36,7 @@ func TestAnchorExternalIgnore(t *testing.T) { func TestAnchorExternalHashBrokenDefault(t *testing.T) { // passes for broken external hashes by default + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/brokenHashOnTheWeb.html") t_expectIssueCount(t, hT, 0) } @@ -42,6 +44,7 @@ func TestAnchorExternalHashBrokenDefault(t *testing.T) { func TestAnchorExternalHashBrokenOption(t *testing.T) { // fails for broken external hashes when asked t.Skip("Not yet implemented") + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/brokenHashOnTheWeb.html") t_expectIssueCount(t, hT, 1) t_expectIssue(t, hT, "no such hash", 1) @@ -51,24 +54,28 @@ func TestAnchorExternalCache(t *testing.T) { // does not check links with parameters multiple times // TODO check cache is being checked t.Skip("Not yet implemented") + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/check_just_once.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalHrefMalformed(t *testing.T) { // does not explode on bad external links in files + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/bad_external_links.html") t_expectIssueCount(t, hT, 2) } func TestAnchorExternalInsecureDefault(t *testing.T) { // passes for non-HTTPS links when not asked + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/non_https.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalInsecureOption(t *testing.T) { // fails for non-HTTPS links when asked + t_SkipShortExternal(t) hT := t_testFileOpts("fixtures/links/non_https.html", map[string]interface{}{"EnforceHTTPS": true}) t_expectIssueCount(t, hT, 1) @@ -77,12 +84,14 @@ func TestAnchorExternalInsecureOption(t *testing.T) { func TestAnchorExternalHrefIP(t *testing.T) { // fails for broken IP address links + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/ip_href.html") t_expectIssueCount(t, hT, 2) } func TestAnchorExternalHrefIPTimeout(t *testing.T) { // fails for broken IP address links + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/ip_timeout.html") t_expectIssueCount(t, hT, 1) t_expectIssue(t, hT, "request exceeded our ExternalTimeout", 1) @@ -91,6 +100,7 @@ func TestAnchorExternalHrefIPTimeout(t *testing.T) { func TestAnchorExternalFollowRedirects(t *testing.T) { // should follow redirects t.Skip("Need new link, times out") + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/linkWithRedirect.html") t_expectIssueCount(t, hT, 0) } @@ -98,6 +108,7 @@ func TestAnchorExternalFollowRedirects(t *testing.T) { func TestAnchorExternalFollowRedirectsDisabled(t *testing.T) { // fails on redirects if not following t.Skip("Not yet implemented, need new link, times out") + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/linkWithRedirect.html") t_expectIssueCount(t, hT, 99) t_expectIssue(t, hT, "PLACEHOLDER", 99) @@ -105,18 +116,21 @@ func TestAnchorExternalFollowRedirectsDisabled(t *testing.T) { func TestAnchorExternalHTTPS(t *testing.T) { // should understand https + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/linkWithHttps.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalMissingProtocolValid(t *testing.T) { // works for valid links missing the protocol + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/link_missing_protocol_valid.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalMissingProtocolInvalid(t *testing.T) { // fails for invalid links missing the protocol + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/link_missing_protocol_invalid.html") t_expectIssueCount(t, hT, 1) // t_expectIssue(t, hT, "no such host", 1) @@ -124,18 +138,21 @@ func TestAnchorExternalMissingProtocolInvalid(t *testing.T) { func TestLinkExternalHrefPipes(t *testing.T) { // works for pipes in the URL + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/escape_pipes.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalHrefNonstandardChars(t *testing.T) { // passes non-standard characters + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/non_standard_characters.html") t_expectIssueCount(t, hT, 0) } func TestAnchorExternalHrefUTF8(t *testing.T) { // passes for external UTF-8 links + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/utf8Link.html") t_expectIssueCount(t, hT, 0) } @@ -161,12 +178,40 @@ func TestAnchorInternalRelativeLinksBase(t *testing.T) { t_expectIssueCount(t, hT, 0) } -func TestAnchorInternalHashBroken(t *testing.T) { +func TestAnchorHashInternalValid(t *testing.T) { + // passes for valid internal hash + hT := t_testFile("fixtures/links/hashInternalOk.html") + t_expectIssueCount(t, hT, 0) +} + +func TestAnchorHashInternalBroken(t *testing.T) { // fails for broken internal hash - t.Skip("Not yet implemented") - hT := t_testFile("fixtures/links/brokenHashInternal.html") - t_expectIssueCount(t, hT, 99) - t_expectIssue(t, hT, "PLACEHOLDER", 99) + hT := t_testFile("fixtures/links/hashInternalBroken.html") + t_expectIssueCount(t, hT, 1) + t_expectIssue(t, hT, "hash does not exist", 1) +} + +func TestAnchorHashSelfValid(t *testing.T) { + // passes for valid self hash + hT := t_testFile("fixtures/links/hashSelfOk.html") + t_expectIssueCount(t, hT, 0) +} + +func TestAnchorHashSelfBroken(t *testing.T) { + // fails for broken self hash + hT := t_testFile("fixtures/links/hashSelfBroken.html") + t_expectIssueCount(t, hT, 1) + t_expectIssue(t, hT, "hash does not exist", 1) +} + +func TestAnchorHashBrokenIgnore(t *testing.T) { + // fails for broken internal hash + hT1 := t_testFileOpts("fixtures/links/hashInternalBroken.html", + map[string]interface{}{"CheckInternalHash": false}) + hT2 := t_testFileOpts("fixtures/links/hashSelfBroken.html", + map[string]interface{}{"CheckInternalHash": false}) + t_expectIssueCount(t, hT1, 0) + t_expectIssueCount(t, hT2, 0) } func TestAnchorDirectoryRootResolve(t *testing.T) { @@ -186,7 +231,7 @@ func TestAnchorDirectoryCustomRootBroken(t *testing.T) { // fails if custom directory index file doesn't exist hT := t_testFile("fixtures/links/link_pointing_to_directory.html") t_expectIssueCount(t, hT, 1) - t_expectIssue(t, hT, "target does not exist", 1) + t_expectIssue(t, hT, "target is a directory, no index", 1) } func TestAnchorDirectoryNoTrailingSlash(t *testing.T) { @@ -196,7 +241,7 @@ func TestAnchorDirectoryNoTrailingSlash(t *testing.T) { t_expectIssue(t, hT, "target is a directory, href lacks trailing slash", 1) } -func TestAnchorDirectoryNoTrailingSlashQueryHash(t *testing.T) { +func TestAnchorDirectoryQueryHash(t *testing.T) { // fails for internal linking to a directory without trailing slash hT := t_testFile("fixtures/links/link_directory_with_slash_query_hash.html") t_expectIssueCount(t, hT, 0) @@ -208,6 +253,12 @@ func TestAnchorDirectoryHtmlExtension(t *testing.T) { t_expectIssueCount(t, hT, 0) } +func TestAnchorDirectoryWithEncodedCharacters(t *testing.T) { + // passes for folder with encoded characters + hT := t_testFile("fixtures/links/linkToFolderWithSpace.html") + t_expectIssueCount(t, hT, 0) +} + func TestAnchorInternalRootLink(t *testing.T) { // properly checks links to root hT := t_testFile("fixtures/links/rootLink/rootLink.html") @@ -239,7 +290,8 @@ func TestAnchorInternalDashedAttrs(t *testing.T) { } func TestAnchorInternalCaseMismatch(t *testing.T) { - // does not complain for internal links with mismatched cases + // does not complain for internal hash links with mismatched cases + t.Skip("Unsure on whether we should ignore case, pretty sure we shouldn't") hT := t_testFile("fixtures/links/ignores_cases.html") t_expectIssueCount(t, hT, 0) } @@ -348,12 +400,14 @@ func TestLinkHrefAbsent(t *testing.T) { func TestLinkHrefBrokenCanonicalDefault(t *testing.T) { // works for valid href within link elements + t_SkipShortExternal(t) hT := t_testFile("fixtures/links/brokenCanonicalLink.html") t_expectIssueCount(t, hT, 0) } func TestLinkHrefBrokenCanonicalOption(t *testing.T) { // works for valid href within link elements + t_SkipShortExternal(t) hT := t_testFileOpts("fixtures/links/brokenCanonicalLink.html", map[string]interface{}{"IgnoreCanonicalBrokenLinks": false}) t_expectIssueCount(t, hT, 1) diff --git a/htmltest/check-script.go b/htmltest/check-script.go index af26c52..d14000d 100644 --- a/htmltest/check-script.go +++ b/htmltest/check-script.go @@ -7,11 +7,11 @@ import ( ) func (hT *HtmlTest) checkScript(document *htmldoc.Document, node *html.Node) { - attrs := extractAttrs(node.Attr, + attrs := htmldoc.ExtractAttrs(node.Attr, []string{"src", hT.opts.IgnoreTagAttribute}) // Ignore if data-proofer-ignore set - if attrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { + if htmldoc.AttrPresent(node.Attr, hT.opts.IgnoreTagAttribute) { return } @@ -19,7 +19,7 @@ func (hT *HtmlTest) checkScript(document *htmldoc.Document, node *html.Node) { ref := htmldoc.NewReference(document, node, attrs["src"]) // Check src problems - if attrPresent(node.Attr, "src") && len(attrs["src"]) == 0 { + if htmldoc.AttrPresent(node.Attr, "src") && len(attrs["src"]) == 0 { // Check src has length, fail if empty hT.issueStore.AddIssue(issues.Issue{ Level: issues.ERROR, @@ -30,7 +30,7 @@ func (hT *HtmlTest) checkScript(document *htmldoc.Document, node *html.Node) { } // Check invalid content - if !attrPresent(node.Attr, "src") && node.FirstChild == nil { + if !htmldoc.AttrPresent(node.Attr, "src") && node.FirstChild == nil { hT.issueStore.AddIssue(issues.Issue{ Level: issues.ERROR, Message: "script content missing / no src attribute", diff --git a/htmltest/check-script_test.go b/htmltest/check-script_test.go index 1382439..91400f6 100644 --- a/htmltest/check-script_test.go +++ b/htmltest/check-script_test.go @@ -8,12 +8,14 @@ import ( func TestScriptExternalSrcValid(t *testing.T) { // passes for valid external src + t_SkipShortExternal(t) hT := t_testFile("fixtures/scripts/script_valid_external.html") t_expectIssueCount(t, hT, 0) } func TestScriptExternalSrcBroken(t *testing.T) { // fails for broken external src + t_SkipShortExternal(t) hT := t_testFile("fixtures/scripts/script_broken_external.html") t_expectIssueCount(t, hT, 1) // t_expectIssue(t, hT, "no such host", 1) @@ -21,12 +23,14 @@ func TestScriptExternalSrcBroken(t *testing.T) { func TestScriptExternalInsecureDefault(t *testing.T) { // passes for HTTP scripts by default + t_SkipShortExternal(t) hT := t_testFile("fixtures/scripts/scriptInsecure.html") t_expectIssueCount(t, hT, 0) } func TestScriptExternalInsecureOption(t *testing.T) { // fails for HTTP scripts when asked + t_SkipShortExternal(t) hT := t_testFileOpts("fixtures/scripts/scriptInsecure.html", map[string]interface{}{"EnforceHTTPS": true}) t_expectIssueCount(t, hT, 1) diff --git a/htmltest/external_benchmark_test.go b/htmltest/external_benchmark_test.go index d770405..7879357 100644 --- a/htmltest/external_benchmark_test.go +++ b/htmltest/external_benchmark_test.go @@ -8,6 +8,6 @@ import ( func BenchmarkExternal(b *testing.B) { for i := 0; i < b.N; i++ { t_testDirectoryOpts("/home/will/local/history-project/_site/", - map[string]interface{}{"LogLevel": issues.NONE}) + map[string]interface{}{"LogLevel": issues.INFO, "CheckExternal": false}) } } diff --git a/htmltest/fixtures/images/ignorableAltViaOptions.html b/htmltest/fixtures/images/ignorableAltViaOptions.html index 35473d3..7b5a19a 100644 --- a/htmltest/fixtures/images/ignorableAltViaOptions.html +++ b/htmltest/fixtures/images/ignorableAltViaOptions.html @@ -5,9 +5,6 @@ Relative to self - -

Blah blah blah.

- diff --git a/htmltest/fixtures/links/anchors_in_pre.html b/htmltest/fixtures/links/anchors_in_pre.html index 69b8bd8..cdc9e98 100644 --- a/htmltest/fixtures/links/anchors_in_pre.html +++ b/htmltest/fixtures/links/anchors_in_pre.html @@ -1,4 +1,4 @@ -
+
 
   broken link!
 
diff --git a/htmltest/fixtures/links/attributeWithDash.html b/htmltest/fixtures/links/attributeWithDash.html index 40af45b..958fb2e 100644 --- a/htmltest/fixtures/links/attributeWithDash.html +++ b/htmltest/fixtures/links/attributeWithDash.html @@ -3,6 +3,6 @@ data-columns="3" data-index-number="12314" data-parent="cars" - href="https://www.github.com"> + href="links_in_pre.html"> Hullo diff --git a/htmltest/fixtures/links/brokenHashInternal.html b/htmltest/fixtures/links/brokenHashInternal.html index 3cbbc20..a08b124 100644 --- a/htmltest/fixtures/links/brokenHashInternal.html +++ b/htmltest/fixtures/links/brokenHashInternal.html @@ -3,7 +3,7 @@

Blah blah blah. Not a real hash!

-

Blah blah blah. A real link!

+

Blah blah blah. A real link!

Let's go to the header, though. @@ -11,4 +11,4 @@

Fake header.

- \ No newline at end of file + diff --git a/htmltest/fixtures/links/folder _withspace/index.html b/htmltest/fixtures/links/folder _withspace/index.html new file mode 100644 index 0000000..568e875 --- /dev/null +++ b/htmltest/fixtures/links/folder _withspace/index.html @@ -0,0 +1 @@ + diff --git a/htmltest/fixtures/links/folder/index.html b/htmltest/fixtures/links/folder/index.html index e69de29..568e875 100644 --- a/htmltest/fixtures/links/folder/index.html +++ b/htmltest/fixtures/links/folder/index.html @@ -0,0 +1 @@ + diff --git a/htmltest/fixtures/links/hashInternalBroken.html b/htmltest/fixtures/links/hashInternalBroken.html new file mode 100644 index 0000000..3ef6c2c --- /dev/null +++ b/htmltest/fixtures/links/hashInternalBroken.html @@ -0,0 +1 @@ +hashl diff --git a/htmltest/fixtures/links/hashInternalOk.html b/htmltest/fixtures/links/hashInternalOk.html new file mode 100644 index 0000000..e6004ef --- /dev/null +++ b/htmltest/fixtures/links/hashInternalOk.html @@ -0,0 +1 @@ +hashl diff --git a/htmltest/fixtures/links/hashSelfBroken.html b/htmltest/fixtures/links/hashSelfBroken.html new file mode 100644 index 0000000..e7001b6 --- /dev/null +++ b/htmltest/fixtures/links/hashSelfBroken.html @@ -0,0 +1,3 @@ +hashl + +

diff --git a/htmltest/fixtures/links/hashSelfOk.html b/htmltest/fixtures/links/hashSelfOk.html new file mode 100644 index 0000000..ba759e5 --- /dev/null +++ b/htmltest/fixtures/links/hashSelfOk.html @@ -0,0 +1,3 @@ +hashl + +

diff --git a/htmltest/fixtures/links/linkToFolderWithSpace.html b/htmltest/fixtures/links/linkToFolderWithSpace.html new file mode 100644 index 0000000..6d3a6f5 --- /dev/null +++ b/htmltest/fixtures/links/linkToFolderWithSpace.html @@ -0,0 +1,2 @@ + + diff --git a/htmltest/fixtures/links/nodupe.html b/htmltest/fixtures/links/nodupe.html index c336bcd..708d9b2 100644 --- a/htmltest/fixtures/links/nodupe.html +++ b/htmltest/fixtures/links/nodupe.html @@ -1 +1 @@ -add ssh +add ssh diff --git a/htmltest/htmltest.go b/htmltest/htmltest.go index 4bd8cfe..9c14799 100644 --- a/htmltest/htmltest.go +++ b/htmltest/htmltest.go @@ -1,23 +1,24 @@ package htmltest import ( + "fmt" "github.com/wjdp/htmltest/htmldoc" "github.com/wjdp/htmltest/issues" "github.com/wjdp/htmltest/refcache" - "golang.org/x/net/html" "net/http" + "os" "path" "sync" "time" ) type HtmlTest struct { - opts Options - httpClient *http.Client - httpChannel chan bool - documents []htmldoc.Document - issueStore issues.IssueStore - refCache *refcache.RefCache + opts Options + httpClient *http.Client + httpChannel chan bool + documentStore htmldoc.DocumentStore + issueStore issues.IssueStore + refCache *refcache.RefCache } func Test(optsUser map[string]interface{}) *HtmlTest { @@ -52,23 +53,31 @@ func Test(optsUser map[string]interface{}) *HtmlTest { return &hT } + // Init our document store + hT.documentStore = htmldoc.NewDocumentStore() + // Setup document store + hT.documentStore.BasePath = hT.opts.DirectoryPath + hT.documentStore.DocumentExtension = ".html" // TODO add option + hT.documentStore.DirectoryIndex = hT.opts.DirectoryIndex + hT.documentStore.IgnorePatterns = hT.opts.IgnoreDirs + // Discover documents + hT.documentStore.Discover() + if hT.opts.FilePath != "" { // Single document mode - doc := htmldoc.Document{ - FilePath: path.Join(hT.opts.DirectoryPath, hT.opts.FilePath), - SitePath: hT.opts.FilePath, + doc, ok := hT.documentStore.ResolvePath(hT.opts.FilePath) + if !ok { + fmt.Println("Could not find document", hT.opts.FilePath, "in", hT.opts.DirectoryPath) + os.Exit(1) } - hT.documents = []htmldoc.Document{doc} + hT.testDocument(doc) } else if hT.opts.DirectoryPath != "" { - // Directory mode - hT.documents = htmldoc.DocumentsFromDir( - hT.opts.DirectoryPath, hT.opts.IgnoreDirs) + // Test documents + hT.testDocuments() } else { panic("Neither file or directory path provided") } - hT.testDocuments() - if hT.opts.EnableCache { hT.refCache.WriteStore(cachePath) } @@ -88,31 +97,26 @@ func (hT *HtmlTest) testDocuments() { var wg sync.WaitGroup // Make buffered channel to act as concurrency limiter var concChannel = make(chan bool, hT.opts.DocumentConcurrencyLimit) - for _, document := range hT.documents { + for _, document := range hT.documentStore.Documents { wg.Add(1) concChannel <- true // Add to concurrency limiter - go func(document htmldoc.Document) { + go func(document *htmldoc.Document) { defer wg.Done() - hT.testDocument(&document) + hT.testDocument(document) <-concChannel // Bump off concurrency limiter }(document) } wg.Wait() } else { - for _, document := range hT.documents { - hT.testDocument(&document) + for _, document := range hT.documentStore.Documents { + hT.testDocument(document) } } } func (hT *HtmlTest) testDocument(document *htmldoc.Document) { document.Parse() - hT.parseNode(document, document.HTMLNode) - hT.postChecks(document) -} - -func (hT *HtmlTest) parseNode(document *htmldoc.Document, n *html.Node) { - if n.Type == html.ElementNode { + for _, n := range document.NodesOfInterest { switch n.Data { case "a": if hT.opts.CheckAnchors { @@ -130,16 +134,9 @@ func (hT *HtmlTest) parseNode(document *htmldoc.Document, n *html.Node) { if hT.opts.CheckScripts { hT.checkScript(document, n) } - case "pre": - return // Everything within a pre is not to be interpreted - case "code": - return // Everything within a code is not to be interpreted } } - // Iterate over children - for c := n.FirstChild; c != nil; c = c.NextSibling { - hT.parseNode(document, c) - } + hT.postChecks(document) } func (hT *HtmlTest) postChecks(document *htmldoc.Document) { diff --git a/htmltest/htmltest_test.go b/htmltest/htmltest_test.go index 119e017..2e79373 100644 --- a/htmltest/htmltest_test.go +++ b/htmltest/htmltest_test.go @@ -37,6 +37,7 @@ func TestHTML5Page(t *testing.T) { func TestNormalLookingPage(t *testing.T) { // Page containing HTML5 tags + t_SkipShortExternal(t) hT := t_testFile("fixtures/html/normal_looking_page.html") t_expectIssueCount(t, hT, 0) } @@ -53,7 +54,8 @@ func TestCacheIntegration(t *testing.T) { } func TestConcurrencyDirExternals(t *testing.T) { + t_SkipShortExternal(t) hT := t_testDirectoryOpts("fixtures/concurrency/manyBrokenExt", - map[string]interface{}{"TestFilesConcurrently": true, "LogLevel": 1}) + map[string]interface{}{"TestFilesConcurrently": true}) // "LogLevel": 1 t_expectIssueCount(t, hT, 26) } diff --git a/htmltest/options.go b/htmltest/options.go index 6af59f3..1ddbbeb 100644 --- a/htmltest/options.go +++ b/htmltest/options.go @@ -17,12 +17,13 @@ type Options struct { CheckImages bool CheckScripts bool - CheckExternal bool - CheckInternal bool - CheckMailto bool - CheckTel bool - CheckFavicon bool - EnforceHTTPS bool + CheckExternal bool + CheckInternal bool + CheckInternalHash bool + CheckMailto bool + CheckTel bool + CheckFavicon bool + EnforceHTTPS bool IgnoreURLs []interface{} IgnoreDirs []interface{} @@ -63,12 +64,13 @@ func DefaultOptions() map[string]interface{} { "CheckImages": true, "CheckScripts": true, - "CheckExternal": true, - "CheckInternal": true, - "CheckMailto": true, - "CheckTel": true, - "CheckFavicon": false, - "EnforceHTTPS": false, + "CheckExternal": true, + "CheckInternal": true, + "CheckInternalHash": true, + "CheckMailto": true, + "CheckTel": true, + "CheckFavicon": false, + "EnforceHTTPS": false, "IgnoreURLs": []interface{}{}, "IgnoreDirs": []interface{}{}, diff --git a/htmltest/test_helpers_test.go b/htmltest/test_helpers_test.go index 3fa12b3..992a48c 100644 --- a/htmltest/test_helpers_test.go +++ b/htmltest/test_helpers_test.go @@ -17,6 +17,7 @@ const t_ExternalTimeout int = 3 func t_expectIssue(t *testing.T, hT *HtmlTest, message string, expected int) { c := hT.issueStore.MessageMatchCount(message) if c != expected { + hT.issueStore.DumpIssues(true) t.Error("expected issue", message, "count", expected, "!=", c) } } @@ -24,6 +25,7 @@ func t_expectIssue(t *testing.T, hT *HtmlTest, message string, expected int) { func t_expectIssueCount(t *testing.T, hT *HtmlTest, expected int) { c := hT.issueStore.Count(issues.ERROR) if c != expected { + hT.issueStore.DumpIssues(true) t.Error("expected", expected, "issues,", c, "found") } } @@ -75,3 +77,9 @@ func t_testDirectoryOpts(filename string, t_opts map[string]interface{}) *HtmlTe mergo.MergeWithOverwrite(&opts, t_opts) return Test(opts) } + +func t_SkipShortExternal(t *testing.T) { + if testing.Short() { + t.Skip("skipping test requiring network calls in short mode") + } +} diff --git a/issues/issue.go b/issues/issue.go index 73b5105..de16ea3 100644 --- a/issues/issue.go +++ b/issues/issue.go @@ -47,8 +47,8 @@ func (issue *Issue) text() string { issue.secondary()) } -func (issue *Issue) print() { - if issue.Level < issue.store.LogLevel { +func (issue *Issue) print(force bool) { + if (issue.Level < issue.store.LogLevel) && !force { return } diff --git a/issues/issuestore.go b/issues/issue_store.go similarity index 84% rename from issues/issuestore.go rename to issues/issue_store.go index 2b1c67b..0c5b05f 100644 --- a/issues/issuestore.go +++ b/issues/issue_store.go @@ -1,6 +1,7 @@ package issues import ( + "fmt" "io/ioutil" "strings" "sync" @@ -25,7 +26,7 @@ func (iS *IssueStore) AddIssue(issue Issue) { issue.store = iS // Set ref to issue store in issue iS.writeMutex.Lock() iS.issues = append(iS.issues, issue) - issue.print() + issue.print(false) if issue.Level >= iS.LogLevel { // Build byte slice to write out at the end iS.byteLog = append(iS.byteLog, []byte(issue.text()+"\n")...) @@ -60,3 +61,11 @@ func (iS *IssueStore) WriteLog(path string) { panic(err) } } + +func (iS *IssueStore) DumpIssues(force bool) { + fmt.Println("<<<<<<<<<<<<<<<<<<<<<<<<") + for _, issue := range iS.issues { + issue.print(force) + } + fmt.Println(">>>>>>>>>>>>>>>>>>>>>>>>") +} diff --git a/issues/issuestore_test.go b/issues/issue_store_test.go similarity index 55% rename from issues/issuestore_test.go rename to issues/issue_store_test.go index 6d65d04..335ba8c 100644 --- a/issues/issuestore_test.go +++ b/issues/issue_store_test.go @@ -2,6 +2,9 @@ package issues import ( "github.com/daviddengcn/go-assert" + "io/ioutil" + "os" + "strings" "testing" ) @@ -41,3 +44,48 @@ func TestIssueStoreMessageMatchCount(t *testing.T) { assert.Equals(t, "issue message match count", iS.MessageMatchCount("notice"), 1) } + +func TestIssueStoreWriteLog(t *testing.T) { + // passes for log written using LogLevel + LOGFILE := "issue-store-test.log" + iS := NewIssueStore(ERROR) + issue1 := Issue{ + Level: ERROR, + Message: "test1", + } + iS.AddIssue(issue1) + issue2 := Issue{ + Level: WARNING, + Message: "test2", + } + iS.AddIssue(issue2) + + iS.WriteLog(LOGFILE) + logBytes, err := ioutil.ReadFile(LOGFILE) + assert.Equals(t, "file error", err, nil) + logString := string(logBytes) + + assert.IsTrue(t, "log contents", strings.Contains( + logString, "test1 --- --> ")) + assert.IsFalse(t, "log contents", strings.Contains( + logString, "test2 --- --> ")) + + removeErr := os.Remove(LOGFILE) + assert.Equals(t, "file error", removeErr, nil) + +} + +func ExampleIssueStoreDumpIssues() { + // Passes for dumping all issues, ignoring LogLevel + iS := NewIssueStore(NONE) + issue := Issue{ + Level: ERROR, + Message: "test1", + } + iS.AddIssue(issue) + iS.DumpIssues(true) + // Output: + // <<<<<<<<<<<<<<<<<<<<<<<< + // test1 --- --> + // >>>>>>>>>>>>>>>>>>>>>>>> +} diff --git a/issues/issue_test.go b/issues/issue_test.go index 742ffbd..f68ea2c 100644 --- a/issues/issue_test.go +++ b/issues/issue_test.go @@ -40,7 +40,7 @@ func TestIssueSecondary(t *testing.T) { assert.Equals(t, "issue1 secondary", issue1.secondary(), "http://example.com") } -func ExampleIssuePrint() { +func ExampleIssuePrintLogLevel() { doc := htmldoc.Document{ SitePath: "dir/doc.html", } @@ -59,7 +59,7 @@ func ExampleIssuePrint() { store: &issueStore, Message: "test1", } - issue1.print() + issue1.print(false) issue2 := Issue{ Level: WARNING, @@ -67,7 +67,7 @@ func ExampleIssuePrint() { store: &issueStore, Message: "test2", } - issue2.print() + issue2.print(false) issue3 := Issue{ Level: INFO, @@ -75,10 +75,63 @@ func ExampleIssuePrint() { store: &issueStore, Message: "test3", } - issue3.print() + issue3.print(false) // Output: // test1 --- dir/doc.html --> // test2 --- dir/doc.html --> http://example.com } + +func ExampleIssuePrintLogAll() { + doc := htmldoc.Document{ + SitePath: "dir/doc.html", + } + ref := htmldoc.Reference{ + Document: &doc, + Path: "http://example.com", + } + + issueStore := IssueStore{ + LogLevel: DEBUG, + } + + issue1 := Issue{ + Level: ERROR, + Document: &doc, + store: &issueStore, + Message: "test1", + } + issue1.print(false) + + issue2 := Issue{ + Level: WARNING, + Reference: &ref, + store: &issueStore, + Message: "test2", + } + issue2.print(false) + + issue3 := Issue{ + Level: INFO, + Document: &doc, + store: &issueStore, + Message: "test3", + } + issue3.print(false) + + issue4 := Issue{ + Level: DEBUG, + Document: &doc, + store: &issueStore, + Message: "test4", + } + issue4.print(false) + + // Output: + // test1 --- dir/doc.html --> + // test2 --- dir/doc.html --> http://example.com + // test3 --- dir/doc.html --> + // test4 --- dir/doc.html --> + +}