Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Access additional license data #23

Merged
merged 5 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
- name: Run GolangCI-Lint
uses: golangci/golangci-lint-action@v2
with:
version: v1.29
version: v1.47.0

- name: Test
run: go test ./...
2 changes: 1 addition & 1 deletion .github/workflows/race.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ jobs:
- name: Checkout code
uses: actions/checkout@v2
- name: Test race
run: go test -v -race ./...
run: go test -v -race -timeout 60m ./...
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
goos: [linux, darwin, windows]
goos: [linux, darwin]
runs-on: ubuntu-latest
steps:
- name: Checkout code
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
fail-fast: false
matrix:
go-version: [1.16.x, 1.17.x]
platform: [ubuntu-latest, macos-latest, windows-latest]
platform: [ubuntu-latest, macos-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
GOPATH ?= $(shell go env GOPATH)
SPDX_DATA_VERSION ?= 3.8
SPDX_DATA_VERSION ?= 3.17

licensedb/internal/assets/bindata.go: licenses.tar urls.csv names.csv $(GOPATH)/bin/go-bindata
rm -rf license-list-data-$(SPDX_DATA_VERSION)
Expand Down
8 changes: 4 additions & 4 deletions cmd/license-detector/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ func TestCmdMain(t *testing.T) {
assert.Equal(t, "", r[0].ErrStr)
assert.Equal(t, "no license file was found", r[1].ErrStr)
assert.Equal(t, "Apache-2.0", r[0].Matches[0].License)
assert.InDelta(t, 0.9877, r[0].Matches[0].Confidence, 0.001)
assert.InDelta(t, 0.9877, r[0].Matches[0].Confidence, 0.002)
assert.Equal(t, "ECL-2.0", r[0].Matches[1].License)
assert.InDelta(t, 0.9047, r[0].Matches[1].Confidence, 0.001)
assert.InDelta(t, 0.9047, r[0].Matches[1].Confidence, 0.002)
buffer.Reset()
detect([]string{"../..", "."}, "text", buffer)
assert.Equal(t, `../..
99% Apache-2.0
90% ECL-2.0
85% SHL-0.51
85% SHL-0.5
81% SHL-0.51
81% SHL-0.5
.
no license file was found
`, buffer.String())
Expand Down
16 changes: 8 additions & 8 deletions licensedb/internal/assets/bindata.go

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions licensedb/internal/assets/extract_urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ func main() {
for _, url := range seeAlso.([]interface{}) {
id := data["licenseId"].(string)
strUrl := strings.TrimSpace(url.(string))
strUrl = strUrl[strings.Index(strUrl, "://"):] // ignore http/https
cutIndex := strings.Index(strUrl, "://")
schema := strUrl[:cutIndex]
strUrl = strUrl[cutIndex:] // ignore http/https
if strings.HasSuffix(strUrl, "/legalcode") && strings.HasPrefix(id, "CC") {
strUrl = strUrl[:len(strUrl)-10]
}
writer.Write([]string{id, strUrl})
writer.Write([]string{id, strUrl, schema})
}
}
}
writer.Write([]string{"MIT", ".mit-license.org"})
writer.Write([]string{"MIT", ".mit-license.org", "https"})
}
46 changes: 41 additions & 5 deletions licensedb/internal/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"archive/tar"
"bytes"
"encoding/csv"
"errors"
"fmt"
"index/suffixarray"
"io"
Expand All @@ -24,6 +25,10 @@ import (
"github.com/go-enry/go-license-detector/v4/licensedb/internal/wmh"
)

// ErrUnknownLicenseID is raised if license identifier is not known.
// Probably you need to upgrade version of the SPDX.
var ErrUnknownLicenseID = errors.New("license id is not known")

var (
licenseReadmeMentionRe = regexp.MustCompile(
fmt.Sprintf("(?i)[^\\s]+/[^/\\s]*(%s)[^\\s]*",
Expand All @@ -39,8 +44,12 @@ type database struct {
licenseTexts map[string]string
// minimum license text length
minLicenseLength int
// official license URLs
urls map[string]string
// official license URL -> id
idByURL map[string]string
// id -> license URLs
urlsByID map[string][]string
// id -> license name
nameByID map[string]string
// all URLs joined
urlRe *regexp.Regexp
// first line of each license OR-ed - used to split
Expand Down Expand Up @@ -93,10 +102,12 @@ func loadUrls(db *database) {
if err != nil || len(records) == 0 {
log.Fatalf("failed to parse urls.csv from the assets: %v", err)
}
db.urls = map[string]string{}
db.idByURL = map[string]string{}
db.urlsByID = map[string][]string{}
urlReWriter := &bytes.Buffer{}
for i, record := range records {
db.urls[record[1]] = record[0]
db.idByURL[record[1]] = record[0]
db.urlsByID[record[0]] = append(db.urlsByID[record[0]], record[2]+record[1]) // schema+url
urlReWriter.Write([]byte(regexp.QuoteMeta(record[1])))
if i < len(records)-1 {
urlReWriter.WriteRune('|')
Expand All @@ -115,9 +126,11 @@ func loadNames(db *database) {
if err != nil || len(records) == 0 {
log.Fatalf("failed to parse names.csv from the assets: %v", err)
}
db.nameByID = map[string]string{}
db.nameSubstringSizes = map[string]int{}
db.nameSubstrings = map[string][]substring{}
for _, record := range records {
db.nameByID[record[0]] = record[1]
registerNameSubstrings(record[1], record[0], db.nameSubstringSizes, db.nameSubstrings)
}
}
Expand Down Expand Up @@ -270,6 +283,9 @@ func (db *database) queryLicenseAbstract(text string) map[string]float32 {
for i, titlePos := range titlePositions {
begPos := titlePos[0]
match := normalizedModerate[titlePos[0]:titlePos[1]]
if len(match) == 0 {
continue
}
if match[0] == '\n' {
match = match[1:]
}
Expand Down Expand Up @@ -414,7 +430,7 @@ func (db *database) scanForURLs(text string) map[string]bool {
licenses := map[string]bool{}
for _, match := range urlMatches {
url := byteText[match[0]:match[1]]
licenses[db.urls[string(url)]] = true
licenses[db.idByURL[string(url)]] = true
}
return licenses
}
Expand Down Expand Up @@ -452,6 +468,26 @@ func (db *database) QueryReadmeText(text string, fs filer.Filer) map[string]floa
return candidates
}

// URLs returns the list of the URLs for the given license identifier
func (db *database) URLs(id string) ([]string, error) {
urls, found := db.urlsByID[id]
if !found {
return nil, ErrUnknownLicenseID
}
res := make([]string, len(urls))
copy(res, urls)
return urls, nil
}

// Name returns the SPDX name for the license identifier
func (db *database) Name(id string) (string, error) {
name, found := db.nameByID[id]
if !found {
return "", ErrUnknownLicenseID
}
return name, nil
}

func tfidf(freq int, docfreq int, ndocs int) float32 {
weight := fastlog.Log(1+float32(freq)) * fastlog.Log(float32(ndocs)/float32(docfreq))
if weight < 0 {
Expand Down
10 changes: 10 additions & 0 deletions licensedb/internal/investigation.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,13 @@ func IsLicenseDirectory(fileName string) bool {
func Preload() {
_ = globalLicenseDatabase()
}

// LookupURLs returns the list of URLs for the given license identifier
func LookupURLs(id string) ([]string, error) {
return globalLicenseDatabase().URLs(id)
}

// LookupName returns the SPDX name for the given license identifier
func LookupName(id string) (string, error) {
return globalLicenseDatabase().Name(id)
}
16 changes: 8 additions & 8 deletions licensedb/internal/wmh/wmh.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ func (wmh *WeightedMinHasher) MarshalBinary() (data []byte, err error) {
binary.LittleEndian.PutUint32(data[5:9], uint32(wmh.sampleSize))
offset := 9
writeFloat32Slice := func(arr []float32) {
header := *(*reflect.SliceHeader)(unsafe.Pointer(&arr))
header := (*reflect.SliceHeader)(unsafe.Pointer(&arr))
header.Len *= 4
header.Cap *= 4
buffer := *(*[]byte)(unsafe.Pointer(&header))
buffer := *(*[]byte)(unsafe.Pointer(header))
copy(data[offset:], buffer)
offset += len(buffer)
}
Expand All @@ -86,10 +86,10 @@ func (wmh *WeightedMinHasher) MarshalBinary() (data []byte, err error) {
writeFloat32Slice(arr)
}
for _, arr := range wmh.betas {
header := *(*reflect.SliceHeader)(unsafe.Pointer(&arr))
header := (*reflect.SliceHeader)(unsafe.Pointer(&arr))
header.Len *= 2
header.Cap *= 2
buffer := *(*[]byte)(unsafe.Pointer(&header))
buffer := *(*[]byte)(unsafe.Pointer(header))
copy(data[offset:], buffer)
offset += len(buffer)
}
Expand All @@ -111,10 +111,10 @@ func (wmh *WeightedMinHasher) UnmarshalBinary(data []byte) error {
wmh.lnCs = make([][]float32, wmh.sampleSize)
wmh.betas = make([][]uint16, wmh.sampleSize)
readFloat32Slice := func(dest []float32, src []byte) {
header := *(*reflect.SliceHeader)(unsafe.Pointer(&src))
header := (*reflect.SliceHeader)(unsafe.Pointer(&src))
header.Len /= 4
header.Cap /= 4
buffer := *(*[]float32)(unsafe.Pointer(&header))
buffer := *(*[]float32)(unsafe.Pointer(header))
copy(dest, buffer)
}
offset := 9
Expand All @@ -134,10 +134,10 @@ func (wmh *WeightedMinHasher) UnmarshalBinary(data []byte) error {
wmh.betas[i] = make([]uint16, wmh.dim)
nextOffset := offset + wmh.dim*2
slice := data[offset:nextOffset]
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
header := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
header.Len /= 2
header.Cap /= 2
buffer := *(*[]uint16)(unsafe.Pointer(&header))
buffer := *(*[]uint16)(unsafe.Pointer(header))
copy(wmh.betas[i], buffer)
offset = nextOffset
}
Expand Down
27 changes: 27 additions & 0 deletions licensedb/licensedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import (
var (
// ErrNoLicenseFound is raised if no license files were found.
ErrNoLicenseFound = errors.New("no license file was found")
// ErrUnknownLicenseID is raised if license identifier is not known.
// Probably you need to upgrade version of the SPDX.
ErrUnknownLicenseID = errors.New("license id is not known")
)

// Detect returns the most probable reference licenses matched for the given
Expand Down Expand Up @@ -63,3 +66,27 @@ func Detect(fs filer.Filer) (map[string]api.Match, error) {
func Preload() {
internal.Preload()
}

// LicenseURLs returns the list of the URLs for the given license identifier
func LicenseURLs(id string) ([]string, error) {
urls, err := internal.LookupURLs(id)
if err != nil {
if errors.Is(err, internal.ErrUnknownLicenseID) {
return nil, ErrUnknownLicenseID
}
return nil, err
}
return urls, nil
}

// LicenseName returns the name for the given license identifier
func LicenseName(id string) (string, error) {
name, err := internal.LookupName(id)
if err != nil {
if errors.Is(err, internal.ErrUnknownLicenseID) {
return "", ErrUnknownLicenseID
}
return "", err
}
return name, nil
}
29 changes: 29 additions & 0 deletions licensedb/licensedb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import (
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/go-enry/go-license-detector/v4/licensedb/filer"
)

Expand Down Expand Up @@ -43,3 +46,29 @@ func pwdFiler() filer.Filer {
}
return f
}

func TestLicenseURLs(t *testing.T) {
t.Run("existing license", func(t *testing.T) {
res, err := LicenseURLs("ODbL-1.0")
require.NoError(t, err)
assert.Equal(t, []string{"http://www.opendatacommons.org/licenses/odbl/1.0/", "https://opendatacommons.org/licenses/odbl/1-0/"}, res)
})

t.Run("not existing license", func(t *testing.T) {
_, err := LicenseURLs("bad-license-key")
require.Equal(t, ErrUnknownLicenseID, err)
})
}

func TestLicenseName(t *testing.T) {
t.Run("existing license", func(t *testing.T) {
res, err := LicenseName("ODbL-1.0")
require.NoError(t, err)
assert.Equal(t, "Open Data Commons Open Database License v1.0", res)
})

t.Run("not existing license", func(t *testing.T) {
_, err := LicenseName("bad-license-key")
require.Equal(t, ErrUnknownLicenseID, err)
})
}