Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit per-host concurrency and add retries #217

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 51 additions & 49 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/fatih/color v1.10.0
github.com/golangplus/bytes v1.0.0 // indirect
github.com/golangplus/sort v1.0.0 // indirect
github.com/hashicorp/go-retryablehttp v0.7.4 // indirect
github.com/imdario/mergo v0.3.11
github.com/seborama/govcr v4.5.0+incompatible // indirect
golang.org/x/net v0.0.0-20201224014010-6772e930b67b
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
github.com/badoux/checkmail v1.2.1 h1:TzwYx5pnsV6anJweMx2auXdekBwGr/yt1GgalIx9nBQ=
github.com/badoux/checkmail v1.2.1/go.mod h1:XroCOBU5zzZJcLvgwU15I+2xXyCdTWXyR9MGfRhBYy0=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/daviddengcn/go-algs v0.0.0-20180330170136-fe23fabd9d06 h1:jEHTltplMBbYsHlSnvZD1J4CsfweUSJIqS8uP56q1Ng=
github.com/daviddengcn/go-algs v0.0.0-20180330170136-fe23fabd9d06/go.mod h1:CpyLopUWBmqupyWU6OlSfrzgIuzNq0R6DXzM74O9RMs=
github.com/daviddengcn/go-assert v0.0.0-20150305222929-ba7e68aeeff6 h1:OPIYL/VhQiSpoaxIcmeYdghLswBylfk6JDVCUqadXxg=
Expand All @@ -19,14 +20,21 @@ github.com/golangplus/sort v1.0.0 h1:nvzfdNN8GNWv7iZ6PENJBmXE+r7OGv4tqbdjjNAZKXE
github.com/golangplus/sort v1.0.0/go.mod h1:ixQX/WLtGd0UQxcO9cWrtAv0daVnQpo4KfqBUIassNM=
github.com/golangplus/testing v1.0.0 h1:+ZeeiKZENNOMkTTELoSySazi+XaEhVO0mb+eanrSEUQ=
github.com/golangplus/testing v1.0.0/go.mod h1:ZDreixUV3YzhoVraIDyOzHrr76p6NUh6k/pPg/Q3gYA=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZnpUv3/+BxzFA=
github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8=
github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/seborama/govcr v4.5.0+incompatible h1:XvdHtXi0d4cUAn+0aWolvwfS3nmhNC8Z+yMQwn/M64I=
github.com/seborama/govcr v4.5.0+incompatible/go.mod h1:EgcISudCCYDLzbiAImJ8i7kk4+wTA44Kp+j4S0LhASI=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b h1:iFwSg7t5GZmB/Q5TjiEAsdoLDrdJRC1RiF2WhuV29Qw=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
71 changes: 50 additions & 21 deletions htmltest/check-link.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ package htmltest

import (
"crypto/x509"
"errors"
"fmt"
"net/http"
"net/url"
"os"
"path"
"strings"
"sync"

"github.com/badoux/checkmail"
"github.com/hashicorp/go-retryablehttp"
"github.com/wjdp/htmltest/htmldoc"
"github.com/wjdp/htmltest/issues"
"github.com/wjdp/htmltest/output"
Expand Down Expand Up @@ -169,7 +172,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) {
})

// Build the request
req, err := http.NewRequest("GET", urlStr, nil)
req, err := retryablehttp.NewRequest("GET", urlStr, nil)
// Only error NewRequest raises is if the url isn't valid, we have already checked it by this point so OK just
// to panic if err != nil.
output.CheckErrorPanic(err)
Expand All @@ -184,17 +187,7 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) {
req.Header.Set(fmt.Sprintf("%v", key), fmt.Sprintf("%v", value))
}

hT.httpChannel <- true // Add to http concurrency limiter

hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelInfo,
Message: "hitting",
Reference: ref,
})

resp, err := hT.httpClient.Do(req)

<-hT.httpChannel // Bump off http concurrency limiter
resp, err := hT.getReq(req, ref)

if err != nil {
if strings.Contains(err.Error(), "Client.Timeout") {
Expand All @@ -206,15 +199,18 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) {
return
}

if certErr, ok := err.(*url.Error).Err.(x509.UnknownAuthorityError); ok {
err = validateCertChain(certErr.Cert)
if err == nil {
hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelWarning,
Reference: ref,
Message: "incomplete certificate chain",
})
return
var urlError *url.Error
if errors.As(err, &urlError) {
if certErr, ok := urlError.Err.(x509.UnknownAuthorityError); ok {
err = validateCertChain(certErr.Cert)
if err == nil {
hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelWarning,
Reference: ref,
Message: "incomplete certificate chain",
})
return
}
}
}

Expand Down Expand Up @@ -280,6 +276,39 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) {
// TODO check a hash id exists in external page if present in reference (URL.Fragment)
}

var hostChannelsLock sync.Mutex // Lock used to prevent concurrent updates to hostChannels
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially added this as a member of HTMLTest but that in turn required setRedirectLimitCheck() to be modified by passing a reference instead of a value; this broke redirection tests by increasing the number of issues found from 1 to 2.

Making this a global is slightly suboptimal, but seemed to be a reasonable compromise. Happy to move it back into HTMLTest if that's a better spot for it to live.


func (hT *HTMLTest) getReq(req *retryablehttp.Request, ref *htmldoc.Reference) (*http.Response, error) {
// Limit the number of concurrent requests to a single host
hostChannelsLock.Lock()
ch, ok := hT.hostChannels[req.URL.Host]
if !ok {
// Haven't seen this host before, need to create a limiter
ch = make(chan bool, hT.opts.HTTPHostConcurrencyLimit)
hT.hostChannels[req.URL.Host] = ch
}
hostChannelsLock.Unlock()

ch <- true // Add to host concurrency limiter
defer func() {
<-ch // Bump off host concurrency limiter
}()

// Limit the total number of concurrent requests too
hT.httpChannel <- true // Add to http concurrency limiter
defer func() {
<-hT.httpChannel // Bump off http concurrency limiter
}()

hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelInfo,
Message: "hitting",
Reference: ref,
})

return hT.httpClient.Do(req)
}

func (hT *HTMLTest) checkInternal(ref *htmldoc.Reference) {
if !hT.opts.CheckInternal {
hT.issueStore.AddIssue(issues.Issue{
Expand Down
Loading