diff --git a/csv.go b/csv.go index 0581b50..f63846e 100644 --- a/csv.go +++ b/csv.go @@ -63,7 +63,7 @@ func csvMain(_ *cobra.Command, args []string) error { } else { glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) } - url, err := lib.FileURL(context.Background(), lib.LicensePath) + url, err := lib.LicenseURL(context.Background()) if err == nil { licenseURL = url } else { diff --git a/internal/third_party/pkgsite/README.md b/internal/third_party/pkgsite/README.md index 473d33a..597c479 100644 --- a/internal/third_party/pkgsite/README.md +++ b/internal/third_party/pkgsite/README.md @@ -17,3 +17,5 @@ Local modifications: - For pkgsite/internal/source, switched to use go log package, because glog conflicts with a test dependency that also defines the "v" flag. - Add a SetCommit method to type ModuleInfo in ./source/source_patch.go, more rationale explained in the method's comments. +- Added RepoFileURL and RepoRawURL methods to source.Info struct in file ./source/source_patch.go. +They are needed when accessing files outside of the module dir, but in the same repo. diff --git a/internal/third_party/pkgsite/source/source_patch.go b/internal/third_party/pkgsite/source/source_patch.go index 3bce5e6..3df6830 100644 --- a/internal/third_party/pkgsite/source/source_patch.go +++ b/internal/third_party/pkgsite/source/source_patch.go @@ -14,6 +14,11 @@ package source +import ( + "path" + "strings" +) + // This file includes all local additions to source package for google/go-licenses use-cases. // SetCommit overrides commit to a specified commit. Usually, you should pass your version to @@ -31,3 +36,36 @@ func (i *Info) SetCommit(commit string) { } i.commit = commit } + +// RepoFileURL returns a URL for a file whose pathname is relative to the repo's home directory instead of the module's. +func (i *Info) RepoFileURL(pathname string) string { + if i == nil { + return "" + } + dir, base := path.Split(pathname) + return expand(i.templates.File, map[string]string{ + "repo": i.repoURL, + "importPath": path.Join(strings.TrimPrefix(i.repoURL, "https://"), dir), + "commit": i.commit, + "dir": dir, + "file": pathname, + "base": base, + }) +} + +// RepoRawURL returns a URL referring to the raw contents of a file relative to the +// repo's home directory instead of the module's. +func (i *Info) RepoRawURL(pathname string) string { + if i == nil { + return "" + } + // Some templates don't support raw content serving. + if i.templates.Raw == "" { + return "" + } + return expand(i.templates.Raw, map[string]string{ + "repo": i.repoURL, + "commit": i.commit, + "file": pathname, + }) +} diff --git a/licenses/library.go b/licenses/library.go index bbfd290..8a90c06 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -18,6 +18,8 @@ import ( "context" "fmt" "go/build" + "io/ioutil" + "net/http" "path/filepath" "sort" "strings" @@ -209,12 +211,17 @@ func (l *Library) String() string { return l.Name() } -// FileURL attempts to determine the URL for a file in this library using -// go module name and version. -func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) { +// testOnlySkipValidation is an internal flag to skip validation during testing, +// because we cannot easily set up actual license files on disk. +var testOnlySkipValidation = false + +// LicenseURL attempts to determine the URL for the license file in this library +// using go module name and version. +func (l *Library) LicenseURL(ctx context.Context) (string, error) { if l == nil { return "", fmt.Errorf("library is nil") } + filePath := l.LicensePath wrap := func(err error) error { return fmt.Errorf("getting file URL in library %s: %w", l.Name(), err) } @@ -256,9 +263,88 @@ func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) if err != nil { return "", wrap(err) } - // TODO: there are still rare cases this may result in an incorrect URL. - // https://github.com/google/go-licenses/issues/73#issuecomment-1005587408 - return remote.FileURL(relativePath), nil + url := remote.FileURL(relativePath) + if testOnlySkipValidation { + return url, nil + } + // An error during validation, the URL may still be valid. + validationError := func(err error) error { + return fmt.Errorf("failed to validate %s: %w", url, err) + } + localContentBytes, err := ioutil.ReadFile(l.LicensePath) + if err != nil { + return "", validationError(err) + } + localContent := string(localContentBytes) + // Attempt 1 + rawURL := remote.RawURL(relativePath) + if rawURL == "" { + glog.Warningf( + "Skipping license URL validation, because %s. Please verify whether %s matches content of %s manually!", + validationError(fmt.Errorf("remote repo %s does not support raw URL", remote)), + url, + l.LicensePath, + ) + return url, nil + } + validationError1 := validate(rawURL, localContent) + if validationError1 == nil { + // The found URL is valid! + return url, nil + } + if relativePath != "LICENSE" { + return "", validationError1 + } + // Attempt 2 when relativePath == "LICENSE" + // When module is at a subdir, the LICENSE file we find at root + // of the module may actually lie in the root of the repo, due + // to special go module behavior. + // Reference: https://github.com/google/go-licenses/issues/73#issuecomment-1005587408. + url2 := remote.RepoFileURL("LICENSE") + rawURL2 := remote.RepoRawURL("LICENSE") + if url2 == url { + // Return early, because the second attempt resolved to the same file. + return "", validationError1 + } + // For the same remote, no need to check rawURL != "" again. + validationError2 := validate(rawURL2, localContent) + if validationError2 == nil { + return url2, nil + } + return "", fmt.Errorf("cannot infer remote URL for %s, failed attempts:\n\tattempt 1: %s\n\tattempt 2: %s", l.LicensePath, validationError1, validationError2) +} + +// validate validates content of rawURL matches localContent. +func validate(rawURL string, localContent string) error { + remoteContent, err := download(rawURL) + if err != nil { + // Retry after 1 sec. + time.Sleep(time.Second) + remoteContent, err = download(rawURL) + if err != nil { + return err + } + } + if remoteContent != localContent { + return fmt.Errorf("local license file content does not match remote license URL %s", rawURL) + } + return nil +} + +func download(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", fmt.Errorf("download(%q): %w", url, err) + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + return "", fmt.Errorf("download(%q): response status code %v not OK", url, resp.StatusCode) + } + bodyBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("download(%q): failed to read from response body: %w", url, err) + } + return string(bodyBytes), nil } // isStdLib returns true if this package is part of the Go standard library. diff --git a/licenses/library_test.go b/licenses/library_test.go index 4f86c06..3e486d5 100644 --- a/licenses/library_test.go +++ b/licenses/library_test.go @@ -153,14 +153,13 @@ func TestLibraryFileURL(t *testing.T) { "github.com/google/trillian", "github.com/google/trillian/crypto", }, - LicensePath: "/go/src/github.com/google/trillian/LICENSE", + LicensePath: "/go/src/github.com/google/trillian/foo/README.md", module: &Module{ Path: "github.com/google/trillian", Dir: "/go/src/github.com/google/trillian", Version: "v1.2.3", }, }, - path: "/go/src/github.com/google/trillian/foo/README.md", wantURL: "https://github.com/google/trillian/blob/v1.2.3/foo/README.md", }, { @@ -170,14 +169,13 @@ func TestLibraryFileURL(t *testing.T) { "bitbucket.org/user/project/pkg", "bitbucket.org/user/project/pkg2", }, - LicensePath: "/foo/bar/bitbucket.org/user/project/LICENSE", + LicensePath: "/foo/bar/bitbucket.org/user/project/foo/README.md", module: &Module{ Path: "bitbucket.org/user/project", Dir: "/foo/bar/bitbucket.org/user/project", Version: "v1.2.3", }, }, - path: "/foo/bar/bitbucket.org/user/project/foo/README.md", wantURL: "https://bitbucket.org/user/project/src/v1.2.3/foo/README.md", }, { @@ -187,14 +185,13 @@ func TestLibraryFileURL(t *testing.T) { "example.com/user/project/pkg", "example.com/user/project/pkg2", }, - LicensePath: "/foo/bar/example.com/user/project/LICENSE", + LicensePath: "/foo/bar/example.com/user/project/foo/README.md", module: &Module{ Path: "example.com/user/project", Dir: "/foo/bar/example.com/user/project", Version: "v1.2.3", }, }, - path: "/foo/bar/example.com/user/project/foo/README.md", wantURL: "https://example.com/user/project/blob/v1.2.3/foo/README.md", }, { @@ -204,13 +201,12 @@ func TestLibraryFileURL(t *testing.T) { "github.com/google/trillian", "github.com/google/trillian/crypto", }, - LicensePath: "/go/src/github.com/google/trillian/LICENSE", + LicensePath: "/go/src/github.com/google/trillian/foo/README.md", module: &Module{ Path: "github.com/google/trillian", Dir: "/go/src/github.com/google/trillian", }, }, - path: "/go/src/github.com/google/trillian/foo/README.md", wantURL: "https://github.com/google/trillian/blob/HEAD/foo/README.md", }, { @@ -226,19 +222,19 @@ func TestLibraryFileURL(t *testing.T) { Version: "v0.23.1", }, }, - path: "/go/modcache/k8s.io/api/LICENSE", wantURL: "https://github.com/kubernetes/api/blob/v0.23.1/LICENSE", }, } { t.Run(test.desc, func(t *testing.T) { - fileURL, err := test.lib.FileURL(context.Background(), test.path) + testOnlySkipValidation = true + fileURL, err := test.lib.LicenseURL(context.Background()) if gotErr := err != nil; gotErr != test.wantErr { - t.Fatalf("FileURL(%q) = (_, %q), want err? %t", test.path, err, test.wantErr) + t.Fatalf("LicenseURL(%q) = (_, %q), want err? %t", test.path, err, test.wantErr) } else if gotErr { return } if got, want := fileURL, test.wantURL; got != want { - t.Fatalf("FileURL(%q) = %q, want %q", test.path, got, want) + t.Fatalf("LicenseURL(%q) = %q, want %q", test.path, got, want) } }) } diff --git a/testdata/modules/modinsubdir05/licenses.csv b/testdata/modules/modinsubdir05/licenses.csv index 6d014fa..d3ca1d5 100644 --- a/testdata/modules/modinsubdir05/licenses.csv +++ b/testdata/modules/modinsubdir05/licenses.csv @@ -1,12 +1,12 @@ -cloud.google.com/go/compute/metadata,https://github.com/googleapis/google-cloud-go/blob/compute/v0.1.0/compute/LICENSE,Apache-2.0 -cloud.google.com/go/iam,https://github.com/googleapis/google-cloud-go/blob/iam/v0.1.1/iam/LICENSE,Apache-2.0 +cloud.google.com/go/compute/metadata,https://github.com/googleapis/google-cloud-go/blob/compute/v0.1.0/LICENSE,Apache-2.0 +cloud.google.com/go/iam,https://github.com/googleapis/google-cloud-go/blob/iam/v0.1.1/LICENSE,Apache-2.0 cloud.google.com/go/internal,https://github.com/googleapis/google-cloud-go/blob/v0.100.2/LICENSE,Apache-2.0 -cloud.google.com/go/storage,https://github.com/googleapis/google-cloud-go/blob/storage/v1.19.0/storage/LICENSE,Apache-2.0 +cloud.google.com/go/storage,https://github.com/googleapis/google-cloud-go/blob/storage/v1.19.0/LICENSE,Apache-2.0 github.com/golang/groupcache/lru,https://github.com/golang/groupcache/blob/8c9f03a8e57e/LICENSE,Apache-2.0 github.com/golang/protobuf,https://github.com/golang/protobuf/blob/v1.5.2/LICENSE,BSD-3-Clause github.com/google/go-cmp/cmp,https://github.com/google/go-cmp/blob/v0.5.7/LICENSE,BSD-3-Clause github.com/google/go-licenses/testdata/modules/modinsubdir05,Unknown,Unknown -github.com/googleapis/gax-go/v2,https://github.com/googleapis/gax-go/blob/v2.1.1/v2/LICENSE,BSD-3-Clause +github.com/googleapis/gax-go/v2,https://github.com/googleapis/gax-go/blob/v2.1.1/LICENSE,BSD-3-Clause go.opencensus.io,https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/LICENSE,Apache-2.0 golang.org/x/net,https://cs.opensource.google/go/x/net/+/7fd8e65b:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause