From 62de344716ecdb48d7f6ed481539a9801220c9b1 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 2 May 2023 13:33:46 -0700 Subject: [PATCH 01/34] :new: Add nvd directory to ignore list --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a339f94..190b94b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea +.nvd *.iml pom.xml pom.xml.asc From d9926f35ff833fc5a8ce1541d712bb487e82cd8d Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 2 May 2023 21:46:31 -0700 Subject: [PATCH 02/34] :books: Add note about upgrading to 2.0 --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index dfb2c8a..66a4254 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,14 @@ Express the correct maven dependencies in your `deps.edn`: [lice-comb.spdx :as lcs])) ``` +## Upgrading + +### 1.x -> 2.0 + +Implementing [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in the creation of a [new SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library). Because of irreconcilable differences in how that Java library represents license data compared to `lice-comb` v1.x, as well as the addition of support for SPDX license exceptions, it was not possible to retain backwards compatibility. + +The backwards compatibility breaking changes are limited to the `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx). It offers all of the same functionality (and more) as the `lice-comb` v1.x functionality, and by virtue of using the official SPDX Java library is far more battle tested than that code was. + ## Contributor Information [Contributor FAQ](https://github.com/pmonks/lice-comb/wiki/FAQ#contributor-faqs) From 0adf2adc61b70e8c25b0d991b9cdedff80ed2a0e Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 2 May 2023 23:01:56 -0700 Subject: [PATCH 03/34] :arrow_up: Upgrade dependency --- .github/workflows/ci.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/docs.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index de52ab7..8589e70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.2 + - uses: DeLaGuardo/setup-clojure@10.3 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 832e3d5..83a2091 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.2 + - uses: DeLaGuardo/setup-clojure@10.3 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 228fdd8..2d1d76d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.2 + - uses: DeLaGuardo/setup-clojure@10.3 with: cli: latest - uses: actions/cache@v3 From 49be3a63c25bca1c16395347af1ffd4522cc7a76 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 2 May 2023 23:02:17 -0700 Subject: [PATCH 04/34] :construction: WIP commit --- deps.edn | 7 +- pbr.clj | 2 +- src/lice_comb/data.clj | 4 +- src/lice_comb/deps.clj | 56 ++++---- src/lice_comb/files.clj | 24 ++-- src/lice_comb/maven.clj | 58 +++++---- src/lice_comb/spdx.clj | 242 ++++++++++++++++++++++------------- src/lice_comb/utils.clj | 36 +----- test/lice_comb/spdx_test.clj | 206 ++++++++++++++--------------- 9 files changed, 346 insertions(+), 289 deletions(-) diff --git a/deps.edn b/deps.edn index 3a8ca62..f6ea19c 100644 --- a/deps.edn +++ b/deps.edn @@ -20,11 +20,10 @@ :deps {org.clojure/tools.logging {:mvn/version "1.2.4"} org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} - cheshire/cheshire {:mvn/version "5.11.0"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} - camel-snake-kebab/camel-snake-kebab {:mvn/version "0.4.3"} - tolitius/xml-in {:mvn/version "0.1.1"}} + tolitius/xml-in {:mvn/version "0.1.1"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.35"}} :aliases - {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.9.3" :git/sha "e537cd1"} + {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.9.4" :git/sha "76b78fe"} com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} diff --git a/pbr.clj b/pbr.clj index 911d20d..cca13cc 100644 --- a/pbr.clj +++ b/pbr.clj @@ -19,7 +19,7 @@ (def lib 'com.github.pmonks/lice-comb) #_{:clj-kondo/ignore [:unresolved-namespace]} -(def version (format "1.0.%s" (b/git-count-revs nil))) +(def version (format "2.0.%s" (b/git-count-revs nil))) (defn set-opts [opts] diff --git a/src/lice_comb/data.clj b/src/lice_comb/data.clj index 34795b1..623fe13 100644 --- a/src/lice_comb/data.clj +++ b/src/lice_comb/data.clj @@ -18,10 +18,10 @@ (ns lice-comb.data "Data handling functionality." - (:require [lice-comb.utils :as u])) + (:require [lice-comb.utils :as lcu])) (defn uri-for-data "Returns a URI (as a string) for the given data file. May be a local file path or a URI to a remote resource." [file] (when file - (str (u/getenv "LICE_COMB_DATA_DIR" "https://raw.githubusercontent.com/pmonks/lice-comb/data") file))) + (str (lcu/getenv "LICE_COMB_DATA_DIR" "https://raw.githubusercontent.com/pmonks/lice-comb/data") file))) diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 25e35aa..f0fbed4 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -21,37 +21,39 @@ (:require [clojure.string :as s] [clojure.reflect :as cr] [clojure.edn :as edn] - [lice-comb.spdx :as spdx] - [lice-comb.maven :as mvn] - [lice-comb.files :as f] - [lice-comb.data :as d] - [lice-comb.utils :as u])) + [spdx.licenses :as sl] + [lice-comb.maven :as lcm] + [lice-comb.files :as lcf] + [lice-comb.data :as lcd] + [lice-comb.utils :as lcu])) -(def ^:private overrides-uri (d/uri-for-data "/deps/overrides.edn")) -(def ^:private overrides (try - (edn/read-string (slurp overrides-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " overrides-uri ". Please check your internet connection and try again.") {} e))))) +(def ^:private overrides-uri (lcd/uri-for-data "/deps/overrides.edn")) +(def ^:private overrides-d (delay + (try + (edn/read-string (slurp overrides-uri)) + (catch Exception e + (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " overrides-uri ". Please check your internet connection and try again.") {} e)))))) -(def ^:private fallbacks-uri (d/uri-for-data "/deps/fallbacks.edn")) -(def ^:private fallbacks (try - (edn/read-string (slurp fallbacks-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " fallbacks-uri ". Please check your internet connection and try again.") {} e))))) +(def ^:private fallbacks-uri (lcd/uri-for-data "/deps/fallbacks.edn")) +(def ^:private fallbacks-d (delay + (try + (edn/read-string (slurp fallbacks-uri)) + (catch Exception e + (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " fallbacks-uri ". Please check your internet connection and try again.") {} e)))))) (defn- check-overrides "Checks if an override should be used for the given dep" ([ga] (check-overrides ga nil)) ([ga v] (let [gav (symbol (str ga (when v (str "@" v))))] - (:licenses (get overrides gav (get overrides ga)))))) ; Lookup overrides both with and without the version + (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version (defn- check-fallbacks "Checks if a fallback should be used for the given dep, given the set of detected ids" [ga ids] (if (or (empty? ids) - (every? #(not (spdx/spdx-id? %)) ids)) - (:licenses (get fallbacks ga {:licenses ids})) + (every? #(not (sl/listed-id? %)) ids)) + (:licenses (get @fallbacks-d ga {:licenses ids})) ids)) (defmulti dep->ids @@ -67,11 +69,11 @@ version (:mvn/version info)] (if-let [override (check-overrides ga version)] override - (let [pom-uri (mvn/pom-uri-for-gav group-id artifact-id version) + (let [pom-uri (lcm/pom-uri-for-gav group-id artifact-id version) license-ids (check-fallbacks ga - (if-let [license-ids (mvn/pom->ids pom-uri)] + (if-let [license-ids (lcm/pom->ids pom-uri)] license-ids - (u/nset (mapcat f/zip->ids (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too + (lcu/nset (mapcat lcf/zip->ids (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too license-ids))))) (defmethod dep->ids :deps @@ -81,7 +83,7 @@ version (:git/sha info)] (if-let [override (check-overrides ga version)] override - (check-fallbacks ga (f/dir->ids (:deps/root info))))))) + (check-fallbacks ga (lcf/dir->ids (:deps/root info))))))) (defmethod dep->ids nil [_]) @@ -96,3 +98,13 @@ (when deps (into {} (pmap #(let [[k v] %] [k (assoc v :lice-comb/licenses (dep->ids [k v]))]) deps)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + @overrides-d + @fallbacks-d + nil) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 6999f1e..8845cde 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -21,9 +21,9 @@ (:require [clojure.string :as s] [clojure.set :as set] [clojure.java.io :as io] - [lice-comb.spdx :as spdx] - [lice-comb.maven :as mvn] - [lice-comb.utils :as u])) + [lice-comb.spdx :as lcs] + [lice-comb.maven :as lcm] + [lice-comb.utils :as lcu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... @@ -31,7 +31,7 @@ "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." [f] (and (not (nil? f)) - (let [fname (s/lower-case (u/filename f))] + (let [fname (s/lower-case (lcu/filename f))] (and (not (s/blank? fname)) (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) @@ -43,26 +43,26 @@ (let [dir (io/file dir)] (if (.exists dir) ; Note: we have to do this, because file-seq does weird things when handed a file that doesn't exist (if (.isDirectory dir) - (u/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) + (lcu/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) (throw (java.nio.file.NotDirectoryException. (str dir)))) (throw (java.io.FileNotFoundException. (str dir))))))) (defn file->ids "Attempts to determine the SPDX license identifier(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, the associated filename MUST also be provided as the second parameter." - ([f] (file->ids f (u/filename f))) + ([f] (file->ids f (lcu/filename f))) ([f fname] (when (and f fname) (let [fname (s/lower-case fname)] - (cond (= fname "pom.xml") (mvn/pom->ids f) - (s/ends-with? fname ".pom") (mvn/pom->ids f) - :else (spdx/text->ids f)))))) + (cond (= fname "pom.xml") (lcm/pom->ids f) + (s/ends-with? fname ".pom") (lcm/pom->ids f) + :else (lcs/text->ids f)))))) (defn dir->ids "Attempt to detect the license(s) in a directory. dir may be a String or a java.io.File, both of which must refer to a directory." [dir] (when dir - (u/nset (mapcat file->ids (probable-license-files dir))))) + (lcu/nset (mapcat file->ids (probable-license-files dir))))) (defn zip->ids "Attempt to detect the license(s) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file." @@ -75,6 +75,6 @@ entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (set/union licenses (file->ids zip-is (u/filename entry))) (.getNextEntry zip-is)) - (recur licenses (.getNextEntry zip-is))) + (recur (set/union licenses (file->ids zip-is (lcu/filename entry))) (.getNextEntry zip-is)) + (recur licenses (.getNextEntry zip-is))) licenses)))))) diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index efb3a47..9de8da1 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -21,20 +21,21 @@ (:require [clojure.string :as s] [clojure.java.io :as io] [clojure.data.xml :as xml] + [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] - [clojure.java.shell :as sh] - [lice-comb.spdx :as spdx] - [lice-comb.utils :as u])) + [lice-comb.spdx :as lcs] + [lice-comb.utils :as lcu])) -(def ^:private local-maven-repo - (try - (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] - (if (= 0 (:exit sh-result)) - (s/trim (:out sh-result)) - (str (System/getProperty "user.home") "/.m2/repository"))) - (catch java.io.IOException _ - (str (System/getProperty "user.home") "/.m2/repository")))) +(def ^:private local-maven-repo-d + (delay + (try + (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] + (if (zero? (:exit sh-result)) + (s/trim (:out sh-result)) + (str (System/getProperty "user.home") "/.m2/repository"))) + (catch java.io.IOException _ + (str (System/getProperty "user.home") "/.m2/repository"))))) (def ^:private remote-maven-repos #{"https://repo1.maven.org/maven2" "https://repo.clojars.org"}) @@ -54,7 +55,7 @@ (not (s/blank? artifact-id)) (not (s/blank? version))) (let [gav-path (str (s/replace group-id "." "/") "/" artifact-id "/" version "/" artifact-id "-" version ".pom") - local-pom (io/file (str local-maven-repo "/" gav-path))] + local-pom (io/file (str @local-maven-repo-d "/" gav-path))] (if (and (.exists local-pom) (.isFile local-pom)) (.toURI local-pom) @@ -63,9 +64,9 @@ (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair." [{:keys [name url]}] - (if-let [license (spdx/uri->id url)] - #{license} - (if-let [licenses (spdx/name->ids name)] + (if-let [licenses (lcs/uri->license-ids url)] + licenses + (if-let [licenses (lcs/fuzzy-match-name->license-ids name)] licenses (when name #{(str "NON-SPDX-Unknown (" name ")")})))) ; Last resort - return the license name @@ -83,19 +84,19 @@ licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - this case catches those (if (or licenses licenses-no-ns) ; Licenses block exists - process it - (let [name-uri-pairs (u/nset (concat (u/map-pad #(hash-map :name (u/strim %1) :url (u/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) - (u/map-pad #(hash-map :name (u/strim %1) :url (u/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url]))))] - (u/nset (mapcat licenses-from-pair name-uri-pairs))) + (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) + (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url]))))] + (lcu/nset (mapcat licenses-from-pair name-uri-pairs))) ; License block doesn't exist, so attempt to lookup the parent pom and get it from there (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) parent-gav (merge {} - (when parent {:group-id (u/strim (first (xi/find-first parent [::pom/groupId]))) - :artifact-id (u/strim (first (xi/find-first parent [::pom/artifactId]))) - :version (u/strim (first (xi/find-first parent [::pom/version])))}) - (when parent-no-ns {:group-id (u/strim (first (xi/find-first parent-no-ns [:groupId]))) - :artifact-id (u/strim (first (xi/find-first parent-no-ns [:artifactId]))) - :version (u/strim (first (xi/find-first parent-no-ns [:version])))}))] + (when parent {:group-id (lcu/strim (first (xi/find-first parent [::pom/groupId]))) + :artifact-id (lcu/strim (first (xi/find-first parent [::pom/artifactId]))) + :version (lcu/strim (first (xi/find-first parent [::pom/version])))}) + (when parent-no-ns {:group-id (lcu/strim (first (xi/find-first parent-no-ns [:groupId]))) + :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) + :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) (pom->ids (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion @@ -106,3 +107,12 @@ (if-let [pom-licenses (pom->ids pom-is)] pom-licenses (log/info (str "'" pom "'") "contains no license information"))))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + @local-maven-repo-d + nil) diff --git a/src/lice_comb/spdx.clj b/src/lice_comb/spdx.clj index a20065b..5e83a1e 100644 --- a/src/lice_comb/spdx.clj +++ b/src/lice_comb/spdx.clj @@ -17,124 +17,184 @@ ; (ns lice-comb.spdx - "SPDX related functionality." + "SPDX related functionality that isn't already provided by https://github.com/pmonks/clj-spdx" (:require [clojure.string :as s] + [clojure.set :as set] [clojure.java.io :as io] [clojure.reflect :as cr] [clojure.edn :as edn] [clojure.tools.logging :as log] - [cheshire.core :as json] - [lice-comb.data :as d] - [lice-comb.utils :as u])) - -(def ^:private spdx-license-list-uri "https://spdx.org/licenses/licenses.json") -(def ^:private spdx-license-list (try - (json/parse-string (slurp spdx-license-list-uri) u/clojurise-json-key) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " spdx-license-list-uri ". Please check your internet connection and try again.") {} e))))) - -(def ^:private aliases-uri (d/uri-for-data "/spdx/aliases.edn")) -(def ^:private aliases (try - (edn/read-string (slurp aliases-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " aliases-uri ". Please check your internet connection and try again.") {} e))))) - -(def license-list-version - "The version of the license list in use." - (:license-list-version spdx-license-list)) - -(def license-list - "The SPDX license list." - (:licenses spdx-license-list)) - -; Alternative indexes into the SPDX list -(def ^:private idx-id-to-info (into {} (map #(vec [(:license-id %) %]) license-list))) -(def ^:private idx-lname-to-id (apply merge (map #(hash-map (s/trim (s/lower-case (:name %))) (:license-id %)) license-list))) -(def ^:private idx-uri-to-id (into {} (mapcat (fn [lic] (map #(vec [(u/simplify-uri %) (:license-id lic)]) (:see-also lic))) license-list))) -(def ^:private idx-regex-to-id (merge aliases - (apply merge (map #(hash-map (s/replace (u/escape-re (s/lower-case (:name %))) #"\s+" "\\\\s+") #{(:license-id %)}) license-list)))) - -; Store regexes in reverse size order, on the assumption that longer regexes are more specific and should be processed first -; Note: `regexes` actually contains string representations, since regexes in Clojure don't implement equality / hash 🙄 -(def ^:private regexes (reverse (sort-by #(count %) (concat (keys idx-regex-to-id) (keys idx-regex-to-id))))) -(def ^:private re-pattern-mem (memoize re-pattern)) ; So we memomize re-pattern to save having to recompile the regex string representations every time we use them - -(def ids - "All SPDX license identifiers in the list." - (keys idx-id-to-info)) - -(defn id->info - "Returns the SPDX license information for the given SPDX license identifier, or nil if unable to do so." - [spdx-id] - (when spdx-id - (get idx-id-to-info spdx-id))) + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [spdx.matching :as sm] + [lice-comb.data :as lcd] + [lice-comb.utils :as lcu])) + +; The lists +(def ^:private license-list-d (delay (map sl/id->info (sl/ids)))) +(def ^:private exception-list-d (delay (map sl/id->info (sl/ids)))) + +; License name aliases +(def ^:private aliases-uri (lcd/uri-for-data "/spdx/aliases.edn")) +(def ^:private aliases-d (delay + (try + (edn/read-string (slurp aliases-uri)) + (catch Exception e + (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " aliases-uri ". Please check your internet connection and try again.") {} e)))))) + +(defn license-list + "The SPDX license list, as a sequence of maps returned from https://pmonks.github.io/clj-spdx/spdx.licenses.html#var-id-.3Einfo, for all SPDX license identifiers." + [] + @license-list-d) + +(defn exception-list + "The SPDX exception list, as a sequence of maps returned from https://pmonks.github.io/clj-spdx/spdx.exceptions.html#var-id-.3Einfo, for all SPDX exception identifiers." + [] + @exception-list-d) + +(defn name->license-ids + "Returns the SPDX license identifier(s) (a set) for the given license name + (matched case insensitively), or nil if there aren't any. + + Note that SPDX license names are not guaranteed to be unique - see https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" + [name] + (when-not (s/blank? name) + (let [lname (s/trim (s/lower-case name))] + (some-> (seq (map :id (filter #(= lname (s/trim (s/lower-case (:name %)))) @license-list-d))) + set)))) -(defn id->spdx-name - "Returns the official license name for the given SPDX id, or nil if unable to do so." - [spdx-id] - (when spdx-id - (:name (id->info spdx-id)))) +(defn name->exception-ids + "Returns the SPDX exception identifier(s) (a set) for the given exception name + (matched case insensitively), or nil if there aren't any. -(defn spdx-name->id - "Returns the SPDX license identifier equivalent of the given license name (matched case insensitively), or nil if unable to do so." + Note that SPDX exception names are not guaranteed to be unique - see https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" [name] - (when name - (get idx-lname-to-id (s/trim (s/lower-case name))))) + (when-not (s/blank? name) + (let [lname (s/trim (s/lower-case name))] + (some-> (seq (map :id (filter #(= lname (s/trim (s/lower-case (:name %)))) @exception-list-d))) + set)))) -(defn uri->id - "Returns the SPDX license identifier equivalent for the given uri, or nil if unable to do so. +(defn uri->license-ids + "Returns the SPDX license identifiers (a set) for the given uri, or nil if + there aren't any. Notes: - 1. this does not perform exact matching; rather it checks whether the given uri matches the start of any of the known license uris. - 2. uris in the SPDX license list are not unique to a license (approximately 70 out of 600 are duplicates)" + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + 2. SPDX license list URIs are not guaranteed to be unique" [uri] - (when uri - (let [simplified-uri (u/simplify-uri uri) - uri-match (first (filter (partial s/starts-with? simplified-uri) (keys idx-uri-to-id)))] - (get idx-uri-to-id uri-match)))) + (when-not (s/blank? uri) + (let [suri (lcu/simplify-uri uri)] + (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (get-in % [:see-also :url]))))) + @license-list-d))) + set)))) -(defn spdx-id? - "Is the given identifier an SPDX identifier?" - [id] - (when id - (not (s/starts-with? id "NON-SPDX")))) +(defn uri->exception-ids + "Returns the SPDX exception identifiers (a set) for the given uri, or nil if + there aren't any. + + Notes: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + 2. SPDX exception list URIs are not guaranteed to be unique" + [uri] + (when-not (s/blank? uri) + (let [suri (lcu/simplify-uri uri)] + (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (:see-also %))))) + @exception-list-d))) + set)))) (defn id->name - "Returns the license name of the given id; either the official SPDX name or (if the id is not an SPDX id) an unofficial name. Returns the id as-is if unable to determine its name." + "Returns the name of the given license or exception identifier; either the + official SPDX license or exception name or (if the id is not a listed SPDX id + but is used by the library) an unofficial name. Returns the id as-is if unable + to determine a name." [id] - (if (spdx-id? id) - (id->spdx-name id) - (case id - "NON-SPDX-Public-Domain" "Public domain" - id))) - -(defn name->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given license name (a string). Returns nil if unable to do so." + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (= (s/lower-case id) "non-spdx-public-domain") "Public domain" + :else id)) + + +; Index of alias regexes +(def ^:private idx-regex-to-id-d (delay + (merge @aliases-d + (apply merge (map #(hash-map (s/replace (lcu/escape-re (s/lower-case (:name %))) #"\s+" "\\\\s+") #{(:id %)}) @license-list-d))))) + +; Store regexes in reverse size order, on the assumption that longer regexes are more specific and should be processed first +; Note: `regexes` actually contains string representations, since regexes in Clojure don't implement equality / hash 🙄 +(def ^:private regexes-d (delay (reverse (sort-by #(count %) (concat (keys @idx-regex-to-id-d) (keys @idx-regex-to-id-d)))))) +(def ^:private re-pattern-mem (memoize re-pattern)) ; So we memomize re-pattern to save having to recompile the regex string representations every time we use them + +(defn fuzzy-match-name->license-ids + "Fuzzily attempts to determine the SPDX license identifier(s) (a set) from the + given name (a string), or nil if there aren't any. This involves three steps: + 1. checking if the name is actually an id (this is rare, but sometimes appears + in pom.xml files) + 2. looking up the name using name->license-ids + 3. falling back on a manually maintained list of common name aliases: https://github.com/pmonks/lice-comb/blob/data/spdx/aliases.edn" [name] - (when (not (s/blank? name)) + (when-not (s/blank? name) (let [name (s/trim name)] - (if-let [exact-id-match (id->info name)] ; First we exact match on the id, for those cases where someone has used the SPDX id as the name (e.g. in a pom.xml file) - #{(:license-id exact-id-match)} - (if-let [exact-name-match (spdx-name->id name)] ; Then we exact match on the name (albeit case-insensitively) - #{exact-name-match} - (if-let [re-name-match (get idx-regex-to-id (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) regexes)))] ; Then the last resort is to match on the regexes - re-name-match + (if-let [list-id-match (sl/id->info name)] ; First we exact match on the id, for those (rare) cases where someone has used an SPDX license id as the name (e.g. in a pom.xml file) + #{(:id list-id-match)} + (if-let [list-name-matches (name->license-ids name)] ; Then we look up by name + list-name-matches + (if-let [re-name-matches (get @idx-regex-to-id-d (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) @regexes-d)))] ; Then the last resort is to match on the regexes + re-name-matches (log/warn "Unable to find a license for" (str "'" name "'")))))))) (defmulti text->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given license text (an InputStream, or something that can have an io/input-stream opened on it)." + "Attempts to determine the SPDX license and/or exception identifier(s) (a set) + within the given license text (a String, Reader, InputStream, or something + that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). + + Notes: + * the caller is expected to close a Reader or InputStream passed to this + function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames to clojure.java.io/file first" {:arglists '([text])} type) -; TODO: https://github.com/pmonks/lice-comb/issues/3 +(defmethod text->ids java.lang.String + [s] + ; These clj-spdx APIs are *expensive*, so we paralellise them + (let [f-lic (future (sm/licenses-within-text s)) + f-exc (future (sm/exceptions-within-text s))] + (set/union @f-lic @f-exc))) + +(defmethod text->ids java.io.Reader + [r] + (text->ids (slurp r))) + (defmethod text->ids java.io.InputStream [is] - (let [rdr (io/reader is) ; Note: we don't wrap this in "with-open", since the input-stream we're handed is closed by the calling fn - first-lines (s/trim (s/join " " (take 2 (remove s/blank? (map s/trim (line-seq rdr))))))] ; Take the first two non-blank lines, since many licenses put the name on line 1, and the version on line 2 - (name->ids first-lines))) + (text->ids (io/reader is))) (defmethod text->ids :default [src] (when src - (with-open [is (io/input-stream src)] - (text->ids is)))) + (with-open [r (io/reader src)] + (text->ids r)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (sl/init!) + (se/init!) + @license-list-d + @exception-list-d + @aliases-d + @idx-regex-to-id-d + @regexes-d + nil) diff --git a/src/lice_comb/utils.clj b/src/lice_comb/utils.clj index 65cc648..fc14793 100644 --- a/src/lice_comb/utils.clj +++ b/src/lice_comb/utils.clj @@ -18,37 +18,9 @@ (ns lice-comb.utils "General purpose utility fns that I seem to end up needing in every single project I write..." - (:require [clojure.string :as s] + (:require [clojure.string :as s] [clojure.java.io :as io])) -(defn clojurise-json-key - "Converts JSON-style string keys (e.g. \"fullName\") to Clojure keyword keys (e.g. :full-name)." - [k] - (when k - (keyword - (s/replace - (s/join "-" - (map s/lower-case - (s/split k #"(? (seq coll) + set)) (defn escape-re "Escapes the given string for use in a regex." diff --git a/test/lice_comb/spdx_test.clj b/test/lice_comb/spdx_test.clj index fab4bf6..45a4bd5 100644 --- a/test/lice_comb/spdx_test.clj +++ b/test/lice_comb/spdx_test.clj @@ -20,118 +20,118 @@ (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.spdx :refer [name->ids uri->id text->ids]])) + [lice-comb.spdx :refer [fuzzy-match-name->license-ids uri->license-ids text->ids]])) (use-fixtures :once fixture) ; Note: these tests should be extended indefinitely, as it exercises the guts of the matching algorithm -(deftest name->ids-tests +(deftest fuzzy-match-name->license-ids-tests (testing "Nil, empty or blank names" - (is (nil? (name->ids nil))) - (is (nil? (name->ids ""))) - (is (nil? (name->ids " "))) - (is (nil? (name->ids "\n"))) - (is (nil? (name->ids "\t")))) + (is (nil? (fuzzy-match-name->license-ids nil))) + (is (nil? (fuzzy-match-name->license-ids ""))) + (is (nil? (fuzzy-match-name->license-ids " "))) + (is (nil? (fuzzy-match-name->license-ids "\n"))) + (is (nil? (fuzzy-match-name->license-ids "\t")))) (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0"} (name->ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (name->ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0"} (name->ids "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GPL-2.0-with-classpath-exception")))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) (testing "Names" - (is (= #{"AGPL-3.0"} (name->ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0"} (name->ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (name->ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (name->ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (name->ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (name->ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (name->ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0"} (name->ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (name->ids "JSON License"))) - (is (= #{"LGPL-2.0"} (name->ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1"} (name->ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1"} (name->ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (name->ids "MIT License"))) - (is (= #{"MIT"} (name->ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (name->ids "The MIT License"))) - (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) + (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) + (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) + (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) + (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) + (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (name->ids "BSD")))) + (is (nil? (fuzzy-match-name->license-ids "BSD")))) (testing "Names that appear in licensey things, but aren't in the SPDX license list, and don't have identified SPDX identifiers" - (is (= #{"NON-SPDX-Public-Domain"} (name->ids "Public Domain"))) - (is (= #{"NON-SPDX-Public-Domain"} (name->ids "Public domain"))))) + (is (= #{"NON-SPDX-Public-Domain"} (fuzzy-match-name->license-ids "Public Domain"))) + (is (= #{"NON-SPDX-Public-Domain"} (fuzzy-match-name->license-ids "Public domain"))))) -(deftest uri->id-tests +(deftest uri->license-ids-tests (testing "Nil, empty or blank uri" - (is (nil? (uri->id nil))) - (is (nil? (uri->id ""))) - (is (nil? (uri->id " "))) - (is (nil? (uri->id "\n"))) - (is (nil? (uri->id "\t")))) + (is (nil? (uri->license-ids nil))) + (is (nil? (uri->license-ids ""))) + (is (nil? (uri->license-ids " "))) + (is (nil? (uri->license-ids "\n"))) + (is (nil? (uri->license-ids "\t")))) (testing "URIs that appear verbatim in the SPDX license list" - (is (= "Apache-2.0" (uri->id "https://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->id "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= "Apache-2.0" (uri->id "https://apache.org/licenses/LICENSE-2.0.txt"))) - (is (= "Apache-2.0" (uri->id " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace - (is (let [license-id (uri->id "https://www.gnu.org/licenses/agpl.txt")] + (is (= "Apache-2.0" (uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) + (is (= "Apache-2.0" (uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= "Apache-2.0" (uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) + (is (= "Apache-2.0" (uri->license-ids " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace + (is (let [license-id (uri->license-ids "https://www.gnu.org/licenses/agpl.txt")] (or (= "AGPL-3.0" license-id) (= "AGPL-3.0-only" license-id)))) - (is (= "CC-BY-SA-4.0" (uri->id "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= "GPL-2.0-with-classpath-exception" (uri->id "https://www.gnu.org/software/classpath/license.html")))) + (is (= "CC-BY-SA-4.0" (uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= "GPL-2.0-with-classpath-exception" (uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= "Apache-2.0" (uri->id "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->id "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (= "Apache-2.0" (uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= "Apache-2.0" (uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) (defn- string-text->ids [s] @@ -141,10 +141,14 @@ (deftest text->ids-tests (testing "Nil, empty or blank text" (is (nil? (text->ids nil))) - (is (thrown? java.io.FileNotFoundException (text->ids ""))) - (is (thrown? java.io.FileNotFoundException (text->ids " "))) - (is (thrown? java.io.FileNotFoundException (text->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (text->ids "\t")))) + (is (nil? (text->ids ""))) + (is (nil? (text->ids " "))) + (is (nil? (text->ids "\n"))) + (is (nil? (text->ids "\t"))) + (is (thrown? java.io.FileNotFoundException (text->ids (io/file "")))) + (is (thrown? java.io.FileNotFoundException (text->ids (io/file " ")))) + (is (thrown? java.io.FileNotFoundException (text->ids (io/file "\n")))) + (is (thrown? java.io.FileNotFoundException (text->ids (io/file "\t"))))) (testing "Text" (is (= #{"Apache-2.0"} (string-text->ids "Apache License\nVersion 2.0, January 2004"))) (is (= #{"Apache-2.0"} (string-text->ids " Apache License\n Version 2.0, January 2004 "))) From dc09fd62275d4998baddb7606d25929771dfe2b7 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Wed, 31 May 2023 13:31:00 -0700 Subject: [PATCH 05/34] :arrow_up: Upgrade dependencies --- .github/workflows/ci.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/docs.yml | 2 +- deps.edn | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8589e70..92fdba4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.3 + - uses: DeLaGuardo/setup-clojure@11.0 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 83a2091..6128aff 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.3 + - uses: DeLaGuardo/setup-clojure@11.0 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2d1d76d..ad96b57 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@10.3 + - uses: DeLaGuardo/setup-clojure@11.0 with: cli: latest - uses: actions/cache@v3 diff --git a/deps.edn b/deps.edn index f6ea19c..036eb64 100644 --- a/deps.edn +++ b/deps.edn @@ -22,7 +22,7 @@ org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.35"}} + com.github.pmonks/clj-spdx {:mvn/version "1.0.48"}} :aliases {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.9.4" :git/sha "76b78fe"} com.github.pmonks/pbr {:mvn/version "RELEASE"}} From e7d31472d3a54c57ea0122ef51669b17932a559d Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 2 Jun 2023 14:33:18 -0700 Subject: [PATCH 06/34] :construction: WIP commit while I switch to issue 20 --- README.md | 2 +- src/lice_comb/maven.clj | 11 +- src/lice_comb/spdx.clj | 52 ++------- test/lice_comb/deps_test.clj | 184 ++++++++++++++++---------------- test/lice_comb/maven_test.clj | 2 +- test/lice_comb/spdx_test.clj | 192 +++++++++++++++++----------------- 6 files changed, 203 insertions(+), 240 deletions(-) diff --git a/README.md b/README.md index 66a4254..a045c8f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # lice-comb -A Clojure library for software license detection. It does this by combing through text, files, and even entire directory structures, and attempting to detect what license(s) they contain. +A Clojure library for software license detection. It does this by combing through `tools.deps` dependency maps, Maven POMs, directory structures & ZIP files, and attempting to detect what license(s) they contain. This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 9de8da1..a640fde 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -19,6 +19,7 @@ (ns lice-comb.maven "Maven related functionality, mostly related to POMs." (:require [clojure.string :as s] + [clojure.set :as set] [clojure.java.io :as io] [clojure.data.xml :as xml] [clojure.java.shell :as sh] @@ -64,11 +65,11 @@ (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair." [{:keys [name url]}] - (if-let [licenses (lcs/uri->license-ids url)] + (if-let [licenses (some-> (seq (set/union (lcs/fuzzy-match-uri->license-ids url) ; Because clojure.set functions are idiotic wrt nils 🙄 + (lcs/fuzzy-match-name->license-ids name))) + set)] licenses - (if-let [licenses (lcs/fuzzy-match-name->license-ids name)] - licenses - (when name #{(str "NON-SPDX-Unknown (" name ")")})))) ; Last resort - return the license name + (when name #{(str "UNKNOWN (" name ")")}))) ; Last resort - return a dummy identifier that includes the name (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") @@ -98,7 +99,7 @@ :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) - (pom->ids (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion + (pom->ids (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep (defmethod pom->ids :default [pom] diff --git a/src/lice_comb/spdx.clj b/src/lice_comb/spdx.clj index 5e83a1e..eb5d452 100644 --- a/src/lice_comb/spdx.clj +++ b/src/lice_comb/spdx.clj @@ -35,24 +35,14 @@ (def ^:private exception-list-d (delay (map sl/id->info (sl/ids)))) ; License name aliases -(def ^:private aliases-uri (lcd/uri-for-data "/spdx/aliases.edn")) +(def ^:private aliases-uri (lcd/uri-for-data "/spdx/aliases.edn")) ; ####TODO: UPGRADE THIS TO USE LicenseRef-lice-comb-public-domain INSTEAD OF NON-SPDX-Public-Domain (def ^:private aliases-d (delay (try (edn/read-string (slurp aliases-uri)) (catch Exception e (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " aliases-uri ". Please check your internet connection and try again.") {} e)))))) -(defn license-list - "The SPDX license list, as a sequence of maps returned from https://pmonks.github.io/clj-spdx/spdx.licenses.html#var-id-.3Einfo, for all SPDX license identifiers." - [] - @license-list-d) - -(defn exception-list - "The SPDX exception list, as a sequence of maps returned from https://pmonks.github.io/clj-spdx/spdx.exceptions.html#var-id-.3Einfo, for all SPDX exception identifiers." - [] - @exception-list-d) - -(defn name->license-ids +(defn- name->license-ids "Returns the SPDX license identifier(s) (a set) for the given license name (matched case insensitively), or nil if there aren't any. @@ -63,18 +53,7 @@ (some-> (seq (map :id (filter #(= lname (s/trim (s/lower-case (:name %)))) @license-list-d))) set)))) -(defn name->exception-ids - "Returns the SPDX exception identifier(s) (a set) for the given exception name - (matched case insensitively), or nil if there aren't any. - - Note that SPDX exception names are not guaranteed to be unique - see https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" - [name] - (when-not (s/blank? name) - (let [lname (s/trim (s/lower-case name))] - (some-> (seq (map :id (filter #(= lname (s/trim (s/lower-case (:name %)))) @exception-list-d))) - set)))) - -(defn uri->license-ids +(defn fuzzy-match-uri->license-ids "Returns the SPDX license identifiers (a set) for the given uri, or nil if there aren't any. @@ -91,33 +70,16 @@ @license-list-d))) set)))) -(defn uri->exception-ids - "Returns the SPDX exception identifiers (a set) for the given uri, or nil if - there aren't any. - - Notes: - 1. this does not perform exact matching; rather it simplifies URIs in various - ways to avoid irrelevant differences, including performing a - case-insensitive comparison, ignoring protocol differences (http vs https), - ignoring extensions representing MIME types (.txt vs .html, etc.), etc. - 2. SPDX exception list URIs are not guaranteed to be unique" - [uri] - (when-not (s/blank? uri) - (let [suri (lcu/simplify-uri uri)] - (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (:see-also %))))) - @exception-list-d))) - set)))) - (defn id->name "Returns the name of the given license or exception identifier; either the official SPDX license or exception name or (if the id is not a listed SPDX id but is used by the library) an unofficial name. Returns the id as-is if unable to determine a name." [id] - (cond (sl/listed-id? id) (:name (sl/id->info id)) - (se/listed-id? id) (:name (se/id->info id)) - (= (s/lower-case id) "non-spdx-public-domain") "Public domain" - :else id)) + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (= (s/lower-case id) "licenseref-lice-comb-public-domain") "Public domain" + :else id)) ; Index of alias regexes diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index 2604110..b806ade 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -34,98 +34,98 @@ (is (nil? (dep->ids ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA (is (nil? (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V (testing "Valid deps - single license" - (is (= #{"Apache-2.0"} (dep->ids ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) - (is (= #{"NON-SPDX-Public-Domain"} (dep->ids ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) - (is (= #{"CC0-1.0"} (dep->ids ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) - (is (= #{"MIT"} (dep->ids ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) - (is (= #{"Plexus"} (dep->ids ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) - (is (= #{"NON-SPDX-Public-Domain"} (dep->ids ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) + (is (= #{"Apache-2.0"} (dep->ids ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) + (is (= #{"LicenseRef-lice-comb-public-domain"} (dep->ids ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"CDDL-1.0"} (dep->ids ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) + (is (= #{"CDDL-1.0"} (dep->ids ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) + (is (= #{"CC0-1.0"} (dep->ids ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) + (is (= #{"MIT"} (dep->ids ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) + (is (= #{"Plexus"} (dep->ids ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"MIT"} (dep->ids ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) + (is (= #{"LicenseRef-lice-comb-public-domain"} (dep->ids ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) + (is (= #{"Apache-2.0"} (dep->ids ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" (is (= #{"EPL-1.0"} (dep->ids ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) (is (= #{"EPL-1.0"} (dep->ids ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index df7287b..f78ca49 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -42,7 +42,7 @@ (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" (is (= #{"Apache-2.0"} (pom->ids "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) - (is (= #{"NON-SPDX-Public-Domain"} (pom->ids "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX + (is (= #{"LicenseRef-lice-comb-public-domain"} (pom->ids "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX (is (= #{"EPL-1.0"} (pom->ids "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) diff --git a/test/lice_comb/spdx_test.clj b/test/lice_comb/spdx_test.clj index 45a4bd5..32cb4bd 100644 --- a/test/lice_comb/spdx_test.clj +++ b/test/lice_comb/spdx_test.clj @@ -20,118 +20,118 @@ (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.spdx :refer [fuzzy-match-name->license-ids uri->license-ids text->ids]])) + [lice-comb.spdx :refer [fuzzy-match-name->license-ids fuzzy-match-uri->license-ids text->ids]])) (use-fixtures :once fixture) -; Note: these tests should be extended indefinitely, as it exercises the guts of the matching algorithm +; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) (deftest fuzzy-match-name->license-ids-tests (testing "Nil, empty or blank names" - (is (nil? (fuzzy-match-name->license-ids nil))) - (is (nil? (fuzzy-match-name->license-ids ""))) - (is (nil? (fuzzy-match-name->license-ids " "))) - (is (nil? (fuzzy-match-name->license-ids "\n"))) - (is (nil? (fuzzy-match-name->license-ids "\t")))) + (is (nil? (fuzzy-match-name->license-ids nil))) + (is (nil? (fuzzy-match-name->license-ids ""))) + (is (nil? (fuzzy-match-name->license-ids " "))) + (is (nil? (fuzzy-match-name->license-ids "\n"))) + (is (nil? (fuzzy-match-name->license-ids "\t")))) (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) (testing "Names" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) - (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) - (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) + (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) + (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) + (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) + (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) + (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (fuzzy-match-name->license-ids "BSD")))) + (is (nil? (fuzzy-match-name->license-ids "BSD")))) (testing "Names that appear in licensey things, but aren't in the SPDX license list, and don't have identified SPDX identifiers" - (is (= #{"NON-SPDX-Public-Domain"} (fuzzy-match-name->license-ids "Public Domain"))) - (is (= #{"NON-SPDX-Public-Domain"} (fuzzy-match-name->license-ids "Public domain"))))) + (is (= #{"LicenseRef-lice-comb-public-domain"} (fuzzy-match-name->license-ids "Public Domain"))) + (is (= #{"LicenseRef-lice-comb-public-domain"} (fuzzy-match-name->license-ids "Public domain"))))) (deftest uri->license-ids-tests (testing "Nil, empty or blank uri" - (is (nil? (uri->license-ids nil))) - (is (nil? (uri->license-ids ""))) - (is (nil? (uri->license-ids " "))) - (is (nil? (uri->license-ids "\n"))) - (is (nil? (uri->license-ids "\t")))) + (is (nil? (fuzzy-match-uri->license-ids nil))) + (is (nil? (fuzzy-match-uri->license-ids ""))) + (is (nil? (fuzzy-match-uri->license-ids " "))) + (is (nil? (fuzzy-match-uri->license-ids "\n"))) + (is (nil? (fuzzy-match-uri->license-ids "\t")))) (testing "URIs that appear verbatim in the SPDX license list" - (is (= "Apache-2.0" (uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= "Apache-2.0" (uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) - (is (= "Apache-2.0" (uri->license-ids " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace - (is (let [license-id (uri->license-ids "https://www.gnu.org/licenses/agpl.txt")] + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace + (is (let [license-id (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt")] (or (= "AGPL-3.0" license-id) (= "AGPL-3.0-only" license-id)))) - (is (= "CC-BY-SA-4.0" (uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= "GPL-2.0-with-classpath-exception" (uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) + (is (= "CC-BY-SA-4.0" (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= "GPL-2.0-with-classpath-exception" (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= "Apache-2.0" (uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) (defn- string-text->ids [s] From 37bae30066bd1a6b3829298a255653a22aa96541 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 29 Jun 2023 12:14:38 -0700 Subject: [PATCH 07/34] :construction: Migration to clj-spdx and fix fallout WIP --- README.md | 2 +- deps.edn | 5 +- resources/lice_comb/deps/fallbacks.edn | 5 ++ resources/lice_comb/deps/overrides.edn | 3 + resources/lice_comb/spdx/aliases.edn | 62 +++++++++++++++++++ src/lice_comb/data.clj | 27 --------- src/lice_comb/deps.clj | 44 ++++++-------- src/lice_comb/files.clj | 40 ++++++++----- src/lice_comb/impl/data.clj | 53 +++++++++++++++++ src/lice_comb/{ => impl}/utils.clj | 18 ++++-- src/lice_comb/maven.clj | 32 ++++++---- src/lice_comb/spdx.clj | 82 ++++++++++++++++---------- test/lice_comb/files_test.clj | 36 +++++------ test/lice_comb/spdx_test.clj | 34 +++++------ 14 files changed, 287 insertions(+), 156 deletions(-) create mode 100644 resources/lice_comb/deps/fallbacks.edn create mode 100644 resources/lice_comb/deps/overrides.edn create mode 100644 resources/lice_comb/spdx/aliases.edn delete mode 100644 src/lice_comb/data.clj create mode 100644 src/lice_comb/impl/data.clj rename src/lice_comb/{ => impl}/utils.clj (88%) diff --git a/README.md b/README.md index a045c8f..21b481e 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Express the correct maven dependencies in your `deps.edn`: Implementing [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in the creation of a [new SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library). Because of irreconcilable differences in how that Java library represents license data compared to `lice-comb` v1.x, as well as the addition of support for SPDX license exceptions, it was not possible to retain backwards compatibility. -The backwards compatibility breaking changes are limited to the `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx). It offers all of the same functionality (and more) as the `lice-comb` v1.x functionality, and by virtue of using the official SPDX Java library is far more battle tested than that code was. +The backwards compatibility breaking changes are limited to the `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx). It offers all of the same functionality (and more) as the `lice-comb` v1.x functionality, and by virtue of using the official SPDX Java library is far more battle tested than the earlier code. ## Contributor Information diff --git a/deps.edn b/deps.edn index 036eb64..f85178e 100644 --- a/deps.edn +++ b/deps.edn @@ -22,8 +22,7 @@ org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.48"}} + com.github.pmonks/clj-spdx {:mvn/version "1.0.74"}} :aliases - {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.9.4" :git/sha "76b78fe"} - com.github.pmonks/pbr {:mvn/version "RELEASE"}} + {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} diff --git a/resources/lice_comb/deps/fallbacks.edn b/resources/lice_comb/deps/fallbacks.edn new file mode 100644 index 0000000..2f61dea --- /dev/null +++ b/resources/lice_comb/deps/fallbacks.edn @@ -0,0 +1,5 @@ +{ + borkdude/sci.impl.reflector {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/borkdude/sci/blob/master/LICENSE"} + org.ow2.asm/asm {:spdx true :licenses #{"BSD-3-Clause"} :evidence "https://asm.ow2.io/license.html"} + slipset/deps-deploy {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/slipset/deps-deploy/blob/master/LICENSE"} +} \ No newline at end of file diff --git a/resources/lice_comb/deps/overrides.edn b/resources/lice_comb/deps/overrides.edn new file mode 100644 index 0000000..f7052bc --- /dev/null +++ b/resources/lice_comb/deps/overrides.edn @@ -0,0 +1,3 @@ +{ + javax.mail/mail {:spdx true :licenses #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} :evidence "https://javaee.github.io/javamail/JavaMail-License"} +} \ No newline at end of file diff --git a/resources/lice_comb/spdx/aliases.edn b/resources/lice_comb/spdx/aliases.edn new file mode 100644 index 0000000..4b656e5 --- /dev/null +++ b/resources/lice_comb/spdx/aliases.edn @@ -0,0 +1,62 @@ +; Important note: we can't use regexes as map keys as they don't implement equality / hash 🙄 +; Instead we use the string representation, and compile-on-demand+memoize + +; Note escaping of \, as these are string, not regex, literals +{ + "3-clause\\s+bsd\\s+license" #{"BSD-3-Clause"} + "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*1(\\.0)?" #{"Apache-1.0"} + "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*1\\.1" #{"Apache-1.1"} + "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*2(\\.0)?" #{"Apache-2.0"} + "apache(\\s+software)?(\\s+license(s)?)?" #{"Apache-1.0"} ; Assume earliest version + "attribution\\s+3\\.0\\s+unported" #{"CC-BY-3.0"} + "attribution\\s+4\\.0\\s+international" #{"CC-BY-4.0"} + "bouncy\\s+castle\\s+licence" #{"MIT"} ; See https://github.com/spdx/license-list-XML/issues/910 + "bsd\\s+3-clause\\s+attribution" #{"BSD-3-Clause-Attribution"} + "bsd\\s+3-clause\\s+license" #{"BSD-3-Clause"} + "bsd\\s+license\\s+3" #{"BSD-3-Clause"} + "cc0(\\s+1(\\.0)?)?(\\s+universal)?" #{"CC0-1.0"} + "cddl" #{"CDDL-1.0"} + "cddl/gplv2\\+ce" #{"CDDL-1.0" "GPL-2.0-with-classpath-exception"} + "cddl\\+gpl\\s+license" #{"CDDL-1.0" "GPL-2.0"} + "cddl\\s+1(\\.0)?\\+gpl\\s+license" #{"CDDL-1.1" "GPL-2.0"} + "cddl\\s+1\\.1\\+gpl\\s+license" #{"CDDL-1.1" "GPL-2.0"} + "cddl\\s+\\+\\s+gpl\\s*v2\\s+with\\s+classpath\\s+exception" #{"CDDL-1.0" "GPL-2.0-with-classpath-exception"} + "common\\s+development\\s+and\\s+distribution\\s+license(\\s+\\(cddl\\))?\\s+version\\s+1(\\.0|\\s+|\\z)" #{"CDDL-1.0"} + "common\\s+development\\s+and\\s+distribution\\s+license(\\s+\\(cddl\\))?\\s+version\\s+1\\.1" #{"CDDL-1.1"} + "copyright(\\s+\\(c\\)|©)?\\s+2011\\s+matthew\\s+lee\\s+hinman" #{"MIT"} + "copyright\\s+\\(c\\)\\s+2000-2012\\s+jason\\s+hunter\\s+\\&\\s+brett\\s+mclaughlin" #{"NON-SPDX-JDOM"} ; Note: not an SPDX license + "creative\\s+commons(\\s+legal\\s+code)?\\s+attribution\\s+3\\.0\\s+unported" #{"CC-BY-3.0"} + "creative\\s+commons\\s+attribution-sharealike\\s+4\\.0\\s+international\\s+public\\s+license" #{"CC-BY-SA-4.0"} + "cup\\s+parser\\s+generator\\s+copyright\\s+notice,\\s+license,\\s+and\\s+disclaimer" #{"MIT"} ; See https://www.apache.org/legal/resolved.html#category-a + "do\\s+what\\s+the\\s+fuck\\s+you\\s+want\\s+to\\s+public\\s+license" #{"WTFPL"} + "dual\\s+license\\s+consisting\\s+of\\s+the\\s+cddl\\s+v1(\\.0)?\\s+and\\s+gpl\\s+v2" #{"CDDL-1.0" "GPL-2.0"} + "dual\\s+license\\s+consisting\\s+of\\s+the\\s+cddl\\s+v1\\.1\\s+and\\s+gpl\\s+v2" #{"CDDL-1.1" "GPL-2.0"} + "eclipse\\s+distribution\\s+license\\s+-\\s+v\\s+1\\.0" #{"BSD-3-Clause"} ; See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration#Processed_License_Requests + "eclipse\\s+public\\s+license" #{"EPL-1.0"} ; Assume earliest version + "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*1(\\.0|\\s+|\\z)" #{"EPL-1.0"} + "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*1\\.1" #{"EPL-1.1"} + "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*2(\\.0|\\s+|\\z)" #{"EPL-2.0"} + "json\\.org" #{"JSON"} + "gnu\\s+affero\\s+general\\s+public\\s+license" #{"AGPL-3.0"} ; Assume earliest version + "gnu\\s+affero\\s+general\\s+public\\s+license(\\s+v(ersion)?)?\\s+3" #{"AGPL-3.0"} + "gnu\\s+general\\s+public\\s+license" #{"GPL-1.0"} ; Assume earliest version + "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+1" #{"GPL-1.0"} + "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2" #{"GPL-2.0"} + "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2(\\s+\\(gpl2\\))?(\\s*[,-])?\\s+with\\s+the(\\s+gnu)?\\s+classpath\\s+exception" #{"GPL-2.0-with-classpath-exception"} + "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3" #{"GPL-3.0"} + "gnu\\s+lesser\\s+general\\s+public\\s+license" #{"LGPL-2.1"} ; Assume earliest version (note: "lesser" didn't appear until v2.1 - it was "library before that") + "gnu\\s+lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2\\.1" #{"LGPL-2.1"} + "gnu\\s+lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3" #{"LGPL-3.0"} + "gnu\\s+library\\s+general\\s+public\\s+license" #{"LGPL-2.0"} ; There was only ever one version of the "GNU Library General Public License" (v2.0) - "Library" was renamed to "Lesser" as of v2.1 + "gwt\\s+terms" #{"Apache-2.0"} ; See http://www.gwtproject.org/terms.html + "lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3\\s+or\\s+greater" #{"LGPL-3.0"} + "mozilla\\s+public\\s+license" #{"MPL-1.0"} + "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+1(\\.0|\\s+|\\z)" #{"MPL-1.0"} + "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+1.1" #{"MPL-1.1"} + "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+2(\\.0|\\s+|\\z)" #{"MPL-2.0"} + "new\\s+bsd\\s+license" #{"BSD-3-Clause"} + "public\\s+domain" #{"LicenseRef-lice-comb-public-domain"} + "similar\\s+to\\s+apache\\s+license\\s+but\\s+with\\s+the\\s+acknowledgment\\s+clause\\s+removed" #{"Plexus"} ; Note: see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + "the\\s+mx4j\\s+license(\\s*[,-])?\\s+version\\s+1\\.0" #{"Apache-1.1"} ; See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration#Processed_License_Requests + "this\\s+is\\s+free\\s+and\\s+unencumbered\\s+software\\s+released\\s+into\\s+the\\s+public\\s+domain\\." #{"Unlicense"} +} \ No newline at end of file diff --git a/src/lice_comb/data.clj b/src/lice_comb/data.clj deleted file mode 100644 index 623fe13..0000000 --- a/src/lice_comb/data.clj +++ /dev/null @@ -1,27 +0,0 @@ -; -; Copyright © 2021 Peter Monks -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; SPDX-License-Identifier: Apache-2.0 -; - -(ns lice-comb.data - "Data handling functionality." - (:require [lice-comb.utils :as lcu])) - -(defn uri-for-data - "Returns a URI (as a string) for the given data file. May be a local file path or a URI to a remote resource." - [file] - (when file - (str (lcu/getenv "LICE_COMB_DATA_DIR" "https://raw.githubusercontent.com/pmonks/lice-comb/data") file))) diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 137b2a9..0938458 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -17,29 +17,17 @@ ; (ns lice-comb.deps - "deps (in tools.deps lib-map format) related functionality." - (:require [clojure.string :as s] - [clojure.reflect :as cr] - [clojure.edn :as edn] - [spdx.licenses :as sl] - [lice-comb.maven :as lcm] - [lice-comb.files :as lcf] - [lice-comb.data :as lcd] - [lice-comb.utils :as lcu])) + "Functionality related to finding and determining license information from + deps in tools.deps lib-map format." + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [lice-comb.maven :as lcm] + [lice-comb.files :as lcf] + [lice-comb.impl.data :as lcd] + [lice-comb.impl.utils :as lcu])) -(def ^:private overrides-uri (lcd/uri-for-data "/deps/overrides.edn")) -(def ^:private overrides-d (delay - (try - (edn/read-string (slurp overrides-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " overrides-uri ". Please check your internet connection and try again.") {} e)))))) - -(def ^:private fallbacks-uri (lcd/uri-for-data "/deps/fallbacks.edn")) -(def ^:private fallbacks-d (delay - (try - (edn/read-string (slurp fallbacks-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " fallbacks-uri ". Please check your internet connection and try again.") {} e)))))) +(def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) +(def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) (defn- check-overrides "Checks if an override should be used for the given dep" @@ -49,7 +37,8 @@ (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version (defn- check-fallbacks - "Checks if a fallback should be used for the given dep, given the set of detected ids" + "Checks if a fallback should be used for the given dep, given the set of + detected ids" [ga ids] (if (or (empty? ids) (every? #(not (sl/listed-id? %)) ids)) @@ -57,13 +46,15 @@ ids)) (defn- normalise-dep - "Normalises a dep, by removing any classifier suffixes from the artifact-id (e.g. the $blah suffix in com.foo/bar$blah)." + "Normalises a dep, by removing any classifier suffixes from the artifact-id + (e.g. the $blah suffix in com.foo/bar$blah)." [[ga info]] (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) (defmulti dep->ids - "Attempt to detect the license(s) in a tools.deps style dep (a MapEntry or two-element sequence of [groupId/artifactId dep-info])." + "Attempt to detect the license(s) in a tools.deps style dep (a MapEntry or + two-element sequence of [groupId/artifactId dep-info])." {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) @@ -99,7 +90,8 @@ (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) {:dep dep}))) (defn deps-licenses - "Attempt to detect the license(s) in a tools.deps 'lib map', returning a new lib map with the licenses assoc'ed in (in key :lice-comb/licenses)" + "Attempt to detect the license(s) in a tools.deps 'lib map', returning a new + lib map with the licenses assoc'ed in (in key :lice-comb/licenses)" [deps] (when deps (into {} diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 8845cde..5b5c550 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -17,18 +17,20 @@ ; (ns lice-comb.files - "Files related functionality." - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [lice-comb.spdx :as lcs] - [lice-comb.maven :as lcm] - [lice-comb.utils :as lcu])) + "Functionality related to finding and determining license information from + files and directories." + (:require [clojure.string :as s] + [clojure.set :as set] + [clojure.java.io :as io] + [lice-comb.spdx :as lcs] + [lice-comb.maven :as lcm] + [lice-comb.impl.utils :as lcu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... (defn probable-license-file? - "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." + "Returns true if the given file-like thing (String, File, ZipEntry) is a + probable license file, false otherwise." [f] (and (not (nil? f)) (let [fname (s/lower-case (lcu/filename f))] @@ -37,7 +39,9 @@ (s/ends-with? fname ".pom")))))) (defn probable-license-files - "Returns all probable license files in the given directory, recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, both of which must refer to a directory." + "Returns all probable license files in the given directory, recursively, as a + set of java.io.File objects. dir may be a String or a java.io.File, both of + which must refer to a directory." [dir] (when dir (let [dir (io/file dir)] @@ -48,28 +52,32 @@ (throw (java.io.FileNotFoundException. (str dir))))))) (defn file->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on it). - If an InputStream is provided, the associated filename MUST also be provided as the second parameter." + "Attempts to determine the SPDX license identifier(s) (a set) from the given + file (an InputStream or something that can have an io/input-stream opened on + it). If an InputStream is provided, the associated filename MUST also be + provided as the second parameter." ([f] (file->ids f (lcu/filename f))) ([f fname] (when (and f fname) (let [fname (s/lower-case fname)] (cond (= fname "pom.xml") (lcm/pom->ids f) (s/ends-with? fname ".pom") (lcm/pom->ids f) - :else (lcs/text->ids f)))))) + :else (lcs/text->ids (io/input-stream f))))))) ; Default is to assume it's a plain text file containing license text(s) (defn dir->ids - "Attempt to detect the license(s) in a directory. dir may be a String or a java.io.File, both of which must refer to a directory." + "Attempt to detect the license(s) in a directory. dir may be a String or a + java.io.File, both of which must refer to a directory." [dir] (when dir (lcu/nset (mapcat file->ids (probable-license-files dir))))) (defn zip->ids - "Attempt to detect the license(s) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file." + "Attempt to detect the license(s) in a ZIP file. zip may be a String or a + java.io.File, both of which must refer to a ZIP-format compressed file." [zip] (when zip (let [zip-file (io/file zip)] - (java.util.zip.ZipFile. zip-file) ; This forces validation of the zip file - ZipInputStream does not reliably perform validation + (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] (loop [licenses nil entry (.getNextEntry zip-is)] @@ -77,4 +85,4 @@ (if (probable-license-file? entry) (recur (set/union licenses (file->ids zip-is (lcu/filename entry))) (.getNextEntry zip-is)) (recur licenses (.getNextEntry zip-is))) - licenses)))))) + (doall (some-> (seq licenses) set)))))))) ; Realise the result before we exit the `with-open` scope diff --git a/src/lice_comb/impl/data.clj b/src/lice_comb/impl/data.clj new file mode 100644 index 0000000..cd490d9 --- /dev/null +++ b/src/lice_comb/impl/data.clj @@ -0,0 +1,53 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.data + "Data handling functionality. Note: this namespace is not part of the public + API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.reflect :as cr] + [clojure.edn :as edn])) + +(defn load-string-resource + "Loads the given classpath resources from the classpath, returning it as a + String. Throws ex-info on error. + + Notes: + * Classpath resource paths must not start with a forward slash ('/'). + * The JVM does not support hyphens ('-') in classpath resource path elements. + Use underscore ('_') instead. + * Unlike during class loading, Clojure does not automatically switch hyphens + in classpath resource path elements to underscores. This inconsistency can + be a time-wasting trap." + [path] + (when-not (s/blank? path) + (try + (if-let [resource (io/resource path)] + (slurp resource) + (throw (ex-info (str "No resource found in classpath at " path) {}))) + (catch clojure.lang.ExceptionInfo ie + (throw ie)) + (catch Exception e + (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " path) {} e)))))) + +(defn load-edn-resource + "Loads and parses the given EDN file from the classpath." + [path] + (when-let [edn-string (load-string-resource path)] + (edn/read-string edn-string))) diff --git a/src/lice_comb/utils.clj b/src/lice_comb/impl/utils.clj similarity index 88% rename from src/lice_comb/utils.clj rename to src/lice_comb/impl/utils.clj index fc14793..b64c8a1 100644 --- a/src/lice_comb/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -16,13 +16,16 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.utils - "General purpose utility fns that I seem to end up needing in every single project I write..." +(ns lice-comb.impl.utils + "General purpose utility fns that I seem to end up needing in every single + project I write... Note: this namespace is not part of the public API of + lice-comb and may change without notice." (:require [clojure.string :as s] [clojure.java.io :as io])) (defn map-pad - "Like map, but when presented with multiple collections of different lengths, 'pads out' the missing elements with nil rather than terminating early." + "Like map, but when presented with multiple collections of different lengths, + 'pads out' the missing elements with nil rather than terminating early." [f & cs] (loop [result nil firsts (map first cs) @@ -70,7 +73,8 @@ }))) (defn simplify-uri - "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI). Returns a string." + "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI). + Returns a string." [uri] (when uri (s/replace (s/replace (s/lower-case (s/trim (str uri))) @@ -78,7 +82,8 @@ "://www." "://"))) (defmulti filename - "Returns just the name component of the given file or path string, excluding any parents." + "Returns just the name component of the given file or path string, excluding + any parents." type) (defmethod filename nil @@ -105,7 +110,8 @@ (filename (.getPath url))) (defn getenv - "Obtain the given environment variable, returning default (or nil, if default is not provided) if it isn't set." + "Obtain the given environment variable, returning default (or nil, if default + is not provided) if it isn't set." ([var] (getenv var nil)) ([var default] (let [val (System/getenv var)] diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index a640fde..79c25c8 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -17,16 +17,16 @@ ; (ns lice-comb.maven - "Maven related functionality, mostly related to POMs." + "Functionality related to finding and determining license information from + Maven POMs." (:require [clojure.string :as s] - [clojure.set :as set] [clojure.java.io :as io] [clojure.data.xml :as xml] [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] [lice-comb.spdx :as lcs] - [lice-comb.utils :as lcu])) + [lice-comb.impl.utils :as lcu])) (def ^:private local-maven-repo-d (delay @@ -38,7 +38,8 @@ (catch java.io.IOException _ (str (System/getProperty "user.home") "/.m2/repository"))))) -(def ^:private remote-maven-repos #{"https://repo1.maven.org/maven2" "https://repo.clojars.org"}) +; TODO: make this configurable +(def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) (defn- uri-resolves? "Does the given URI resolve (i.e. does the resource it points to exist)?" @@ -49,7 +50,9 @@ (= 200 (.getResponseCode http))))) (defn pom-uri-for-gav - "Attempts to locate the POM for the given GAV, which is a URI that may point to a file in the local Maven repository or a remote Maven repository (e.g. on Maven Central or Clojars)." + "Attempts to locate the POM for the given GAV, which is a URI that may point + to a file in the local Maven repository or a remote Maven repository (e.g. on + Maven Central or Clojars)." ([{:keys [group-id artifact-id version]}] (pom-uri-for-gav group-id artifact-id version)) ([group-id artifact-id version] (when (and (not (s/blank? group-id)) @@ -65,24 +68,31 @@ (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair." [{:keys [name url]}] - (if-let [licenses (some-> (seq (set/union (lcs/fuzzy-match-uri->license-ids url) ; Because clojure.set functions are idiotic wrt nils 🙄 - (lcs/fuzzy-match-name->license-ids name))) - set)] + ; Attempt to find a match by URL first + (if-let [licenses (lcs/fuzzy-match-uri->license-ids url)] licenses - (when name #{(str "UNKNOWN (" name ")")}))) ; Last resort - return a dummy identifier that includes the name + ; Then match by name + (if-let [licenses (lcs/fuzzy-match-name->license-ids name)] + licenses + #{(lcs/unlisted-license-id name)}))) ; Last resort - return an unlisted identifier that includes the name (if any) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") (defmulti pom->ids - "Attempt to detect the license(s) reported in a pom.xml file. pom may be a java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream." + "Attempt to detect the license(s) reported in a pom.xml file. pom may be a + java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream. + + Note: if an InputStream is provided, it's the caller's responsibility to open + and close it." {:arglists '([pom])} type) +; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags here (defmethod pom->ids java.io.InputStream [pom-is] (let [pom-xml (xml/parse pom-is) licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) - licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - this case catches those + licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] (if (or licenses licenses-no-ns) ; Licenses block exists - process it (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) diff --git a/src/lice_comb/spdx.clj b/src/lice_comb/spdx.clj index eb5d452..12554cd 100644 --- a/src/lice_comb/spdx.clj +++ b/src/lice_comb/spdx.clj @@ -17,36 +17,32 @@ ; (ns lice-comb.spdx - "SPDX related functionality that isn't already provided by https://github.com/pmonks/clj-spdx" + "SPDX related functionality that isn't already provided by + https://github.com/pmonks/clj-spdx" (:require [clojure.string :as s] [clojure.set :as set] [clojure.java.io :as io] - [clojure.reflect :as cr] - [clojure.edn :as edn] [clojure.tools.logging :as log] [spdx.licenses :as sl] [spdx.exceptions :as se] [spdx.matching :as sm] - [lice-comb.data :as lcd] - [lice-comb.utils :as lcu])) + [spdx.expressions :as sexp] + [lice-comb.impl.data :as lcd] + [lice-comb.impl.utils :as lcu])) ; The lists (def ^:private license-list-d (delay (map sl/id->info (sl/ids)))) (def ^:private exception-list-d (delay (map sl/id->info (sl/ids)))) ; License name aliases -(def ^:private aliases-uri (lcd/uri-for-data "/spdx/aliases.edn")) ; ####TODO: UPGRADE THIS TO USE LicenseRef-lice-comb-public-domain INSTEAD OF NON-SPDX-Public-Domain -(def ^:private aliases-d (delay - (try - (edn/read-string (slurp aliases-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " aliases-uri ". Please check your internet connection and try again.") {} e)))))) +(def ^:private aliases-d (delay (lcd/load-edn-resource "lice_comb/spdx/aliases.edn"))) (defn- name->license-ids "Returns the SPDX license identifier(s) (a set) for the given license name (matched case insensitively), or nil if there aren't any. - Note that SPDX license names are not guaranteed to be unique - see https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" + Note that SPDX license names are not guaranteed to be unique - see + https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" [name] (when-not (s/blank? name) (let [lname (s/trim (s/lower-case name))] @@ -66,20 +62,32 @@ [uri] (when-not (s/blank? uri) (let [suri (lcu/simplify-uri uri)] - (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (get-in % [:see-also :url]))))) + (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (concat (:see-also %) (get-in % [:cross-refs :url])))))) @license-list-d))) set)))) +(def public-domain-license-id "LicenseRef-lice-comb-PUBLIC-DOMAIN") +(def ^:private unlisted-license-id-prefix "LicenseRef-lice-comb-UNLISTED") + +(defn unlisted-license-id + "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an + unlisted license, using the given suffix." + [suffix] + (str unlisted-license-id-prefix (when-not (s/blank? suffix) (str "-" (s/replace (s/trim suffix) #"\s+" "-"))))) + (defn id->name - "Returns the name of the given license or exception identifier; either the - official SPDX license or exception name or (if the id is not a listed SPDX id - but is used by the library) an unofficial name. Returns the id as-is if unable - to determine a name." + "Returns the human readable name of the given license or exception identifier; + either the official SPDX license or exception name or (if the id is not a + listed SPDX id but is used by the library) an unofficial name. Returns the id + as-is if unable to determine a name." [id] - (cond (sl/listed-id? id) (:name (sl/id->info id)) - (se/listed-id? id) (:name (se/id->info id)) - (= (s/lower-case id) "licenseref-lice-comb-public-domain") "Public domain" - :else id)) + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (= (s/lower-case id) (s/lower-case public-domain-license-id)) "Public domain" + (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-id-prefix)) (str "Unlisted" + (when (> (count id) (count unlisted-license-id-prefix)) + (str " (" (s/replace (subs id (inc (count unlisted-license-id-prefix))) "-" " ") ")"))) + :else id)) ; Index of alias regexes @@ -92,23 +100,33 @@ (def ^:private regexes-d (delay (reverse (sort-by #(count %) (concat (keys @idx-regex-to-id-d) (keys @idx-regex-to-id-d)))))) (def ^:private re-pattern-mem (memoize re-pattern)) ; So we memomize re-pattern to save having to recompile the regex string representations every time we use them +(defn- parse-expression-and-extract-ids + [s] + (when-let [expression (sexp/parse s)] + (sexp/extract-ids expression))) + (defn fuzzy-match-name->license-ids "Fuzzily attempts to determine the SPDX license identifier(s) (a set) from the given name (a string), or nil if there aren't any. This involves three steps: - 1. checking if the name is actually an id (this is rare, but sometimes appears - in pom.xml files) + 1. checking if the name is actually an SPDX expression (this is rare, but + sometimes an SPDX identifier (which is also a valid expression) appears in + a pom.xml file) 2. looking up the name using name->license-ids - 3. falling back on a manually maintained list of common name aliases: https://github.com/pmonks/lice-comb/blob/data/spdx/aliases.edn" + 3. falling back on a manually maintained list of common name aliases: + https://github.com/pmonks/lice-comb/blob/data/spdx/aliases.edn" [name] (when-not (s/blank? name) (let [name (s/trim name)] - (if-let [list-id-match (sl/id->info name)] ; First we exact match on the id, for those (rare) cases where someone has used an SPDX license id as the name (e.g. in a pom.xml file) - #{(:id list-id-match)} - (if-let [list-name-matches (name->license-ids name)] ; Then we look up by name + ; 1. Parse the name as an SPDX exception, and if that succeeds, return all ids in the expression + (if-let [ids-in-expression (parse-expression-and-extract-ids name)] + ids-in-expression + ; 2. Then we look up by name + (if-let [list-name-matches (name->license-ids name)] list-name-matches - (if-let [re-name-matches (get @idx-regex-to-id-d (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) @regexes-d)))] ; Then the last resort is to match on the regexes + ; 3. Then the last resort is to match on the name regexes + (if-let [re-name-matches (get @idx-regex-to-id-d (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) @regexes-d)))] re-name-matches - (log/warn "Unable to find a license for" (str "'" name "'")))))))) + (log/warn "Unable to find a listed SPDX license for" (str "'" name "'")))))))) (defmulti text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) @@ -119,7 +137,7 @@ * the caller is expected to close a Reader or InputStream passed to this function (e.g. using clojure.core/with-open) * you cannot pass a String representation of a filename to this method - you - should pass filenames to clojure.java.io/file first" + should pass filenames through clojure.java.io/file first" {:arglists '([text])} type) @@ -132,7 +150,9 @@ (defmethod text->ids java.io.Reader [r] - (text->ids (slurp r))) + (let [sw (java.io.StringWriter.)] + (io/copy r sw) + (text->ids (str sw)))) (defmethod text->ids java.io.InputStream [is] diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index 493d058..ce269f4 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -83,23 +83,23 @@ (is (thrown? java.io.FileNotFoundException (file->ids "this_file_does_not_exist")))) (testing "License files" ; (is (= #{"Apache-1.0"} (file->ids "https://www.apache.org/licenses/LICENSE-1.0"))) ; Note: this page incorrectly lists itself as Apache 1.1 - (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) - (is (= #{"MPL-2.0"} (file->ids (str test-data-path "/MPL-2.0/LICENSE")))) - (is (= #{"Apache-1.1"} (file->ids "https://www.apache.org/licenses/LICENSE-1.1"))) - (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"EPL-1.0"} (file->ids "https://www.eclipse.org/org/documents/epl-1.0/EPL-1.0.txt"))) - (is (= #{"EPL-2.0"} (file->ids "https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt"))) - (is (= #{"CDDL-1.0"} (file->ids "https://spdx.org/licenses/CDDL-1.0.txt"))) - (is (= #{"CDDL-1.1"} (file->ids "https://spdx.org/licenses/CDDL-1.1.txt"))) - (is (= #{"GPL-1.0"} (file->ids "https://www.gnu.org/licenses/gpl-1.0.txt"))) - (is (= #{"GPL-2.0"} (file->ids "https://www.gnu.org/licenses/gpl-2.0.txt"))) - (is (= #{"GPL-3.0"} (file->ids "https://www.gnu.org/licenses/gpl-3.0.txt"))) - (is (= #{"LGPL-2.0"} (file->ids "https://www.gnu.org/licenses/lgpl-2.0.txt"))) - (is (= #{"LGPL-2.1"} (file->ids "https://www.gnu.org/licenses/lgpl-2.1.txt"))) - (is (= #{"LGPL-3.0"} (file->ids "https://www.gnu.org/licenses/lgpl-3.0.txt"))) - (is (= #{"AGPL-3.0"} (file->ids "https://www.gnu.org/licenses/agpl-3.0.txt"))) - (is (= #{"Unlicense"} (file->ids "https://unlicense.org/UNLICENSE"))) - (is (= #{"WTFPL"} (file->ids "http://www.wtfpl.net/txt/copying/")))) + (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) + (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->ids (str test-data-path "/MPL-2.0/LICENSE")))) + (is (= #{"Apache-1.1"} (file->ids "https://www.apache.org/licenses/LICENSE-1.1"))) + (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"EPL-1.0"} (file->ids "https://www.eclipse.org/org/documents/epl-1.0/EPL-1.0.txt"))) + (is (= #{"EPL-2.0"} (file->ids "https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt"))) + (is (= #{"CDDL-1.0"} (file->ids "https://spdx.org/licenses/CDDL-1.0.txt"))) + (is (= #{"CDDL-1.1"} (file->ids "https://spdx.org/licenses/CDDL-1.1.txt"))) + (is (= #{"GPL-1.0"} (file->ids "https://www.gnu.org/licenses/gpl-1.0.txt"))) + (is (= #{"GPL-2.0"} (file->ids "https://www.gnu.org/licenses/gpl-2.0.txt"))) + (is (= #{"GPL-3.0"} (file->ids "https://www.gnu.org/licenses/gpl-3.0.txt"))) + (is (= #{"LGPL-2.0"} (file->ids "https://www.gnu.org/licenses/lgpl-2.0.txt"))) + (is (= #{"LGPL-2.1"} (file->ids "https://www.gnu.org/licenses/lgpl-2.1.txt"))) + (is (= #{"LGPL-3.0"} (file->ids "https://www.gnu.org/licenses/lgpl-3.0.txt"))) + (is (= #{"AGPL-3.0"} (file->ids "https://www.gnu.org/licenses/agpl-3.0.txt"))) + (is (= #{"Unlicense"} (file->ids "https://unlicense.org/UNLICENSE"))) + (is (= #{"WTFPL"} (file->ids "http://www.wtfpl.net/txt/copying/")))) (testing "POM files" (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/simple.pom")))) (is (= #{"BSD-3-Clause"} (file->ids (str test-data-path "/no-xml-ns.pom")))) @@ -117,7 +117,7 @@ (is (thrown? java.io.FileNotFoundException (dir->ids "this_directory_does_not_exist"))) (is (thrown? java.nio.file.NotDirectoryException (dir->ids "deps.edn")))) (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->ids "."))))) + (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->ids "."))))) (deftest zip->ids-tests (testing "Nil, empty, or blank zip file name" diff --git a/test/lice_comb/spdx_test.clj b/test/lice_comb/spdx_test.clj index 32cb4bd..1fb68d1 100644 --- a/test/lice_comb/spdx_test.clj +++ b/test/lice_comb/spdx_test.clj @@ -20,7 +20,7 @@ (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.spdx :refer [fuzzy-match-name->license-ids fuzzy-match-uri->license-ids text->ids]])) + [lice-comb.spdx :refer [fuzzy-match-name->license-ids fuzzy-match-uri->license-ids]])) (use-fixtures :once fixture) @@ -114,25 +114,24 @@ (deftest uri->license-ids-tests (testing "Nil, empty or blank uri" - (is (nil? (fuzzy-match-uri->license-ids nil))) - (is (nil? (fuzzy-match-uri->license-ids ""))) - (is (nil? (fuzzy-match-uri->license-ids " "))) - (is (nil? (fuzzy-match-uri->license-ids "\n"))) - (is (nil? (fuzzy-match-uri->license-ids "\t")))) + (is (nil? (fuzzy-match-uri->license-ids nil))) + (is (nil? (fuzzy-match-uri->license-ids ""))) + (is (nil? (fuzzy-match-uri->license-ids " "))) + (is (nil? (fuzzy-match-uri->license-ids "\n"))) + (is (nil? (fuzzy-match-uri->license-ids "\t")))) (testing "URIs that appear verbatim in the SPDX license list" - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace - (is (let [license-id (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt")] - (or (= "AGPL-3.0" license-id) - (= "AGPL-3.0-only" license-id)))) - (is (= "CC-BY-SA-4.0" (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= "GPL-2.0-with-classpath-exception" (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids " http://apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) +(comment ; We don't test text->ids, since it's been moved to clj-spdx, where it is far more extensively tested (defn- string-text->ids [s] (with-open [is (io/input-stream (.getBytes s "UTF-8"))] @@ -155,3 +154,4 @@ (is (= #{"AGPL-3.0"} (string-text->ids "GNU AFFERO GENERAL PUBLIC LICENSE\nVersion 3, 19 November 2007"))) (is (= #{"CC-BY-SA-4.0"} (string-text->ids "Creative Commons Attribution-ShareAlike\n4.0 International Public License"))) (is (= #{"JSON"} (string-text->ids "Copyright (c) 2002 JSON.org"))))) +) \ No newline at end of file From 4bd9bbb845a5fe4f38d1448771fb81ecc373a065 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 14 Jul 2023 15:14:25 -0700 Subject: [PATCH 08/34] :construction: Add 579 license names obtained from pom.xml files on Clojars, and start working on fixing ensuing test failures --- README.md | 39 +- deps.edn | 4 +- .../lice_comb/{spdx => matching}/aliases.edn | 2 +- src/lice_comb/deps.clj | 6 +- src/lice_comb/files.clj | 14 +- src/lice_comb/impl/utils.clj | 19 +- src/lice_comb/{spdx.clj => matching.clj} | 59 +- src/lice_comb/maven.clj | 8 +- test/lice_comb/files_test.clj | 30 +- test/lice_comb/matching_test.clj | 682 ++++++++++++++++++ test/lice_comb/spdx_test.clj | 157 ---- test/lice_comb/untitled.clj | 33 + 12 files changed, 825 insertions(+), 228 deletions(-) rename resources/lice_comb/{spdx => matching}/aliases.edn (99%) rename src/lice_comb/{spdx.clj => matching.clj} (79%) create mode 100644 test/lice_comb/matching_test.clj delete mode 100644 test/lice_comb/spdx_test.clj create mode 100644 test/lice_comb/untitled.clj diff --git a/README.md b/README.md index 21b481e..fb810f6 100644 --- a/README.md +++ b/README.md @@ -13,39 +13,44 @@ A Clojure library for software license detection. It does this by combing throu This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). -## Using the library +## Installation -### Documentation +`lice-comb` is available as a Maven artifact from [Clojars](https://clojars.org/com.github.pmonks/lice-comb). -[API documentation is available here](https://pmonks.github.io/lice-comb/). +### Trying it Out -[An FAQ is available here](https://github.com/pmonks/lice-comb/wiki/FAQ). +#### Clojure CLI -### Dependency +```shell +$ # Where #.#.# is replaced with an actual version number (see badge above) +$ clj -Sdeps '{:deps {com.github.pmonks/lice-comb {:mvn/version "#.#.#"}}}' +``` -Express the correct maven dependencies in your `deps.edn`: +#### Leiningen -```edn -{:deps {com.github.pmonks/lice-comb {:mvn/version "LATEST_CLOJARS_VERSION"}}} +```shell +$ lein try com.github.pmonks/lice-comb ``` -### Require one or more of the namespaces +#### deps-try -```clojure -(ns your.ns - (:require [lice-comb.deps :as lcd] - [lice-comb.files :as lcf] - [lice-comb.maven :as lcm] - [lice-comb.spdx :as lcs])) +```shell +$ deps-try com.github.pmonks/lice-comb ``` +### API Documentation + +[API documentation is available here](https://pmonks.github.io/lice-comb/), or [here on cljdoc](https://cljdoc.org/d/com.github.pmonks/lice-comb/). + +[An FAQ is available here](https://github.com/pmonks/lice-comb/wiki/FAQ). + ## Upgrading -### 1.x -> 2.0 +### 1.x -> 2.x Implementing [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in the creation of a [new SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library). Because of irreconcilable differences in how that Java library represents license data compared to `lice-comb` v1.x, as well as the addition of support for SPDX license exceptions, it was not possible to retain backwards compatibility. -The backwards compatibility breaking changes are limited to the `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx). It offers all of the same functionality (and more) as the `lice-comb` v1.x functionality, and by virtue of using the official SPDX Java library is far more battle tested than the earlier code. +The backwards compatibility breaking changes are limited to the (removed) `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx), and (possibly) the `lice-comb.matching` namespace. ## Contributor Information diff --git a/deps.edn b/deps.edn index f85178e..b85084a 100644 --- a/deps.edn +++ b/deps.edn @@ -22,7 +22,9 @@ org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.74"}} + miikka/clj-base62 {:mvn/version "0.1.0"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.74"} + com.github.pmonks/rencg {:mvn/version "1.0.26"}} :aliases {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} diff --git a/resources/lice_comb/spdx/aliases.edn b/resources/lice_comb/matching/aliases.edn similarity index 99% rename from resources/lice_comb/spdx/aliases.edn rename to resources/lice_comb/matching/aliases.edn index 4b656e5..a04832a 100644 --- a/resources/lice_comb/spdx/aliases.edn +++ b/resources/lice_comb/matching/aliases.edn @@ -55,7 +55,7 @@ "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+1.1" #{"MPL-1.1"} "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+2(\\.0|\\s+|\\z)" #{"MPL-2.0"} "new\\s+bsd\\s+license" #{"BSD-3-Clause"} - "public\\s+domain" #{"LicenseRef-lice-comb-public-domain"} + "public\\s+domain" #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} "similar\\s+to\\s+apache\\s+license\\s+but\\s+with\\s+the\\s+acknowledgment\\s+clause\\s+removed" #{"Plexus"} ; Note: see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html "the\\s+mx4j\\s+license(\\s*[,-])?\\s+version\\s+1\\.0" #{"Apache-1.1"} ; See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration#Processed_License_Requests "this\\s+is\\s+free\\s+and\\s+unencumbered\\s+software\\s+released\\s+into\\s+the\\s+public\\s+domain\\." #{"Unlicense"} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 0938458..8bee5c3 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -21,7 +21,7 @@ deps in tools.deps lib-map format." (:require [clojure.string :as s] [spdx.licenses :as sl] - [lice-comb.maven :as lcm] + [lice-comb.maven :as lcmvn] [lice-comb.files :as lcf] [lice-comb.impl.data :as lcd] [lice-comb.impl.utils :as lcu])) @@ -66,9 +66,9 @@ version (:mvn/version info)] (if-let [override (check-overrides ga version)] override - (let [pom-uri (lcm/pom-uri-for-gav group-id artifact-id version) + (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) license-ids (check-fallbacks ga - (if-let [license-ids (lcm/pom->ids pom-uri)] + (if-let [license-ids (lcmvn/pom->ids pom-uri)] license-ids (lcu/nset (mapcat lcf/zip->ids (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too license-ids))))) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 5b5c550..0935a81 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -22,8 +22,8 @@ (:require [clojure.string :as s] [clojure.set :as set] [clojure.java.io :as io] - [lice-comb.spdx :as lcs] - [lice-comb.maven :as lcm] + [lice-comb.matching :as lcmtch] + [lice-comb.maven :as lcmvn] [lice-comb.impl.utils :as lcu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... @@ -54,15 +54,15 @@ (defn file->ids "Attempts to determine the SPDX license identifier(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on - it). If an InputStream is provided, the associated filename MUST also be - provided as the second parameter." + it). If an InputStream is provided, the associated filename should also be + provided as the second parameter (it is unnecessary in other cases)." ([f] (file->ids f (lcu/filename f))) ([f fname] (when (and f fname) (let [fname (s/lower-case fname)] - (cond (= fname "pom.xml") (lcm/pom->ids f) - (s/ends-with? fname ".pom") (lcm/pom->ids f) - :else (lcs/text->ids (io/input-stream f))))))) ; Default is to assume it's a plain text file containing license text(s) + (cond (= fname "pom.xml") (lcmvn/pom->ids f) + (s/ends-with? fname ".pom") (lcmvn/pom->ids f) + :else (lcmtch/text->ids (io/input-stream f))))))) ; Default is to assume it's a plain text file containing license text(s) (defn dir->ids "Attempt to detect the license(s) in a directory. dir may be a String or a diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index b64c8a1..0c7945c 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -21,7 +21,8 @@ project I write... Note: this namespace is not part of the public API of lice-comb and may change without notice." (:require [clojure.string :as s] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [clj-base62.core :as base62])) (defn map-pad "Like map, but when presented with multiple collections of different lengths, @@ -72,6 +73,18 @@ \> "\\>" }))) +(defn base62-encode + "Encodes the given string to Base62/UTF-8." + [^String s] + (when s + (base62/encode (.getBytes s (java.nio.charset.StandardCharsets/UTF_8))))) + +(defn base62-decode + "Decodes the given Base62/UTF-8 string." + [^String s] + (when s + (java.lang.String. ^bytes (base62/decode s) (java.nio.charset.StandardCharsets/UTF_8)))) + (defn simplify-uri "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI). Returns a string." @@ -109,6 +122,10 @@ [^java.net.URL url] (filename (.getPath url))) +(defmethod filename java.io.InputStream + [_] + (throw (ex-info "Cannot determine filename of an InputStream - did you forget to provide it separately?" {}))) + (defn getenv "Obtain the given environment variable, returning default (or nil, if default is not provided) if it isn't set." diff --git a/src/lice_comb/spdx.clj b/src/lice_comb/matching.clj similarity index 79% rename from src/lice_comb/spdx.clj rename to src/lice_comb/matching.clj index 12554cd..ff98940 100644 --- a/src/lice_comb/spdx.clj +++ b/src/lice_comb/matching.clj @@ -16,8 +16,8 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.spdx - "SPDX related functionality that isn't already provided by +(ns lice-comb.matching + "Matching functionality, a lot of which is provided by https://github.com/pmonks/clj-spdx" (:require [clojure.string :as s] [clojure.set :as set] @@ -35,7 +35,13 @@ (def ^:private exception-list-d (delay (map sl/id->info (sl/ids)))) ; License name aliases -(def ^:private aliases-d (delay (lcd/load-edn-resource "lice_comb/spdx/aliases.edn"))) +(def ^:private aliases-d (delay (lcd/load-edn-resource "lice_comb/matching/aliases.edn"))) + +; Regexes used for license name matching - these are expected to include named-capturing group called "name" and "version" +(def ^:private license-name-regexes [ + #"(?i)\s*(The\s+)?(?Apache)(\s+Software)?(\s+License(s)?)?(\s*[,-])?(\s+V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*" + ]) + (defn- name->license-ids "Returns the SPDX license identifier(s) (a set) for the given license name @@ -69,25 +75,44 @@ (def public-domain-license-id "LicenseRef-lice-comb-PUBLIC-DOMAIN") (def ^:private unlisted-license-id-prefix "LicenseRef-lice-comb-UNLISTED") -(defn unlisted-license-id +(defn public-domain? + "Is the given id lice-comb's custom 'public domain' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case public-domain-license-id))) + +(defn unlisted? + "Is the given id a lice-comb custom 'unlisted' LicenseRef?" + [id] + (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-id-prefix))) + +(defn name->unlisted "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an - unlisted license, using the given suffix." - [suffix] - (str unlisted-license-id-prefix (when-not (s/blank? suffix) (str "-" (s/replace (s/trim suffix) #"\s+" "-"))))) + unlisted license, with the given name appended as Base62 (since clj-spdx + identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." + [name] + (str unlisted-license-id-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) + +(defn unlisted->name + "Get the original name of the given unlisted license. Returns nil if id is nil + or is not a lice-comb's unlisted LicenseRef." + [id] + (when (and id (unlisted? id)) + (str "Unlisted" + (when (> (count id) (count unlisted-license-id-prefix)) + (str " (" (lcu/base62-decode (subs id (+ 2 (count unlisted-license-id-prefix)))) ")"))))) (defn id->name "Returns the human readable name of the given license or exception identifier; - either the official SPDX license or exception name or (if the id is not a - listed SPDX id but is used by the library) an unofficial name. Returns the id - as-is if unable to determine a name." + either the official SPDX license or exception name or (if the id is a + lice-comb specific LicenseRef an unofficial name. Returns the id verbatim if + unable to determine a name." [id] - (cond (sl/listed-id? id) (:name (sl/id->info id)) - (se/listed-id? id) (:name (se/id->info id)) - (= (s/lower-case id) (s/lower-case public-domain-license-id)) "Public domain" - (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-id-prefix)) (str "Unlisted" - (when (> (count id) (count unlisted-license-id-prefix)) - (str " (" (s/replace (subs id (inc (count unlisted-license-id-prefix))) "-" " ") ")"))) - :else id)) + (when-not (s/blank? id) + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (public-domain? id) "Public domain" + (unlisted? id) (str "Unlisted" (when-let [original (unlisted->name id)] (str " (" original ")"))) + :else id))) ; Index of alias regexes diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 79c25c8..61c9158 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -25,7 +25,7 @@ [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] - [lice-comb.spdx :as lcs] + [lice-comb.matching :as lcmtch] [lice-comb.impl.utils :as lcu])) (def ^:private local-maven-repo-d @@ -69,12 +69,12 @@ "Attempts to determine the license(s) (a set) from a POM license name/URL pair." [{:keys [name url]}] ; Attempt to find a match by URL first - (if-let [licenses (lcs/fuzzy-match-uri->license-ids url)] + (if-let [licenses (lcmtch/fuzzy-match-uri->license-ids url)] licenses ; Then match by name - (if-let [licenses (lcs/fuzzy-match-name->license-ids name)] + (if-let [licenses (lcmtch/fuzzy-match-name->license-ids name)] licenses - #{(lcs/unlisted-license-id name)}))) ; Last resort - return an unlisted identifier that includes the name (if any) + #{(lcmtch/name->unlisted name)}))) ; Last resort - return an unlisted identifier that includes the name (if any) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index ce269f4..8db6d83 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -81,25 +81,15 @@ (is (thrown? java.io.FileNotFoundException (file->ids "\t")))) (testing "Non-existent files" (is (thrown? java.io.FileNotFoundException (file->ids "this_file_does_not_exist")))) - (testing "License files" -; (is (= #{"Apache-1.0"} (file->ids "https://www.apache.org/licenses/LICENSE-1.0"))) ; Note: this page incorrectly lists itself as Apache 1.1 - (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) - (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->ids (str test-data-path "/MPL-2.0/LICENSE")))) - (is (= #{"Apache-1.1"} (file->ids "https://www.apache.org/licenses/LICENSE-1.1"))) - (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"EPL-1.0"} (file->ids "https://www.eclipse.org/org/documents/epl-1.0/EPL-1.0.txt"))) - (is (= #{"EPL-2.0"} (file->ids "https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt"))) - (is (= #{"CDDL-1.0"} (file->ids "https://spdx.org/licenses/CDDL-1.0.txt"))) - (is (= #{"CDDL-1.1"} (file->ids "https://spdx.org/licenses/CDDL-1.1.txt"))) - (is (= #{"GPL-1.0"} (file->ids "https://www.gnu.org/licenses/gpl-1.0.txt"))) - (is (= #{"GPL-2.0"} (file->ids "https://www.gnu.org/licenses/gpl-2.0.txt"))) - (is (= #{"GPL-3.0"} (file->ids "https://www.gnu.org/licenses/gpl-3.0.txt"))) - (is (= #{"LGPL-2.0"} (file->ids "https://www.gnu.org/licenses/lgpl-2.0.txt"))) - (is (= #{"LGPL-2.1"} (file->ids "https://www.gnu.org/licenses/lgpl-2.1.txt"))) - (is (= #{"LGPL-3.0"} (file->ids "https://www.gnu.org/licenses/lgpl-3.0.txt"))) - (is (= #{"AGPL-3.0"} (file->ids "https://www.gnu.org/licenses/agpl-3.0.txt"))) - (is (= #{"Unlicense"} (file->ids "https://unlicense.org/UNLICENSE"))) - (is (= #{"WTFPL"} (file->ids "http://www.wtfpl.net/txt/copying/")))) + (testing "Files on disk" + (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->ids (str test-data-path "/MPL-2.0/LICENSE"))))) + (testing "URLs" + (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (file->ids (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (testing "InputStreams" + (is (thrown? clojure.lang.ExceptionInfo (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->ids is)))) + (is (= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->ids is "LICENSE_2.0.txt"))))) (testing "POM files" (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/simple.pom")))) (is (= #{"BSD-3-Clause"} (file->ids (str test-data-path "/no-xml-ns.pom")))) @@ -117,7 +107,7 @@ (is (thrown? java.io.FileNotFoundException (dir->ids "this_directory_does_not_exist"))) (is (thrown? java.nio.file.NotDirectoryException (dir->ids "deps.edn")))) (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->ids "."))))) + (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->ids "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 (deftest zip->ids-tests (testing "Nil, empty, or blank zip file name" diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj new file mode 100644 index 0000000..c9a3e6a --- /dev/null +++ b/test/lice_comb/matching_test.clj @@ -0,0 +1,682 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.matching :refer [unlisted? fuzzy-match-name->license-ids fuzzy-match-uri->license-ids]])) + +(use-fixtures :once fixture) + +(defn unlisted-only? + "Does the given set of ids contain only a single unlisted license?" + [ids] + (and (= 1 (count ids)) + (unlisted? (first ids)))) + +; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) +(deftest fuzzy-match-name->license-ids-tests + (testing "Nil, empty or blank names" + (is (nil? (fuzzy-match-name->license-ids nil))) + (is (nil? (fuzzy-match-name->license-ids ""))) + (is (nil? (fuzzy-match-name->license-ids " "))) + (is (nil? (fuzzy-match-name->license-ids "\n"))) + (is (nil? (fuzzy-match-name->license-ids "\t")))) + (testing "Names that are SPDX license ids" + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) + (testing "Names that are SPDX expressions" + (is (= #{"GPL-2.0" "Classpath-exception-2.0"} (fuzzy-match-name->license-ids "GPL-2.0 WITH Classpath-exception-2.0")))) + (testing "Names" + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) + (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) + (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) + (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) + (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) + (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) + (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) + (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) + (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) + (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) + (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (testing "Names that appear in POMs on Clojars" ; as of 2023-07-13 + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache License, Version 2.0"))) + (is (= #{"GPL-3.0"} (fuzzy-match-name->license-ids " GNU GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids " MIT License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "${license.id}"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "2-Clause BSD"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "2-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause BSD license"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause license (New BSD License or Modified BSD License)"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "A Clojure library for Google Cloud Pub/Sub."))) + (is (= #{} (fuzzy-match-name->license-ids "AGPL"))) ; Listed license missing version + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL v3"))) + (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPLv3"))) + (is (= #{} (fuzzy-match-name->license-ids "APACHE"))) ; Listed license missing version + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "APACHE LICENSE, VERSION 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "APGL"))) ; Probable typo + (is (= #{} (fuzzy-match-name->license-ids "ASL"))) ; Listed license missing version + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "ASL 2.0"))) + (is (= #{"AFL-3.0"} (fuzzy-match-name->license-ids "Academic Free License 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Aduna BSD license"))) ; Listed license missing clause info + (is (= #{} (fuzzy-match-name->license-ids "Affero GNU Public License v3"))) + (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License v3 or later (at your option)"))) + (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License version 3 or lator"))) + (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License,"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "All Rights Reserved"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "All rights reserved"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Amazon Software License"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2 License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2 Public License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2, see LICENSE"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0 License"))) + (is (= #{"Apache-2.0" "LLVM-exception"} (fuzzy-match-name->license-ids "Apache 2.0 with LLVM Exception"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache Licence"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Licence 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Licence, Version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - v 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License V2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License V2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0, January 2004"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0."))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, version 2."))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache Public License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License, version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache Software License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - v 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Apache Software Licesne"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Sofware Licencse 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Sofware License 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache V2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache V2 License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache license version 2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache license, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2 License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0 License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache2 License"))) + (is (= #{} (fuzzy-match-name->license-ids "Artistic License"))) ; Listed license missing version + (is (= #{} (fuzzy-match-name->license-ids "Artistic License/GPL"))) ; Listed license missing version + (is (= #{"Artistic-2.0"} (fuzzy-match-name->license-ids "Artistic-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (= #{} (fuzzy-match-name->license-ids "BSD"))) ; Listed license missing clause info + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (2 Clause)"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (2-Clause)"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (Type 2) Public License"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2 Clause"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2 clause license"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause \"Simplified\" License"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause Licence"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause License"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause license"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-clause \"Simplified\" License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3 Clause"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause \"New\" or \"Revised\" License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause 'New' or 'Revised' License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause license"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause license"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD C2"))) + (is (= #{} (fuzzy-match-name->license-ids "BSD License"))) ; Listed license missing clause info + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD New, Version 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "BSD Standard License"))) ; Listed license missing clause info + (is (= #{} (fuzzy-match-name->license-ids "BSD license"))) ; Listed license missing clause info + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD-2-Clause"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD-3"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD-3-Clause"))) + (is (= #{} (fuzzy-match-name->license-ids "BSD-style"))) ; Listed license missing clause info + (is (unlisted-only? (fuzzy-match-name->license-ids "BankersBox License"))) + (is (= #{"Beerware"} (fuzzy-match-name->license-ids "Beerware 42"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Bespoke"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Bloomberg Open API"))) + (is (= #{"BSL-1.0"} (fuzzy-match-name->license-ids "Boost Software License - Version 1.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Bostock"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Built In Project License"))) + (is (= #{} (fuzzy-match-name->license-ids "CC Attribution 4.0 International with exception for binary distribution"))) + (is (= #{} (fuzzy-match-name->license-ids "CC BY-NC"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC BY-SA 4.0"))) + (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "CC-BY-4.0"))) + (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0"))) + (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0 1.0 Universal"))) + (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "CRAPL License"))) + (is (= #{} (fuzzy-match-name->license-ids "CeCILL License"))) ; Listed license, but need a version + (is (= #{} (fuzzy-match-name->license-ids "Common Development and Distribution License"))) + (is (= #{} (fuzzy-match-name->license-ids "Common Development and Distribution License (CDDL)"))) + (is (= #{} (fuzzy-match-name->license-ids "Common Public License - v 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Common Public License Version 1.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Contact JMonkeyEngine forums for license details"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright & all rights reserved Lean Pixel"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2015 by Glowbox LLC"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (c) 2011 Drew Colthorp"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (c) 2017, Lingchao Xin"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2016, klaraHealth, Inc."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2017 All Rights Reserved"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2017 Zensight"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 4A Volcano. 2015."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright Ona Systems Inc."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright meissa GmbH"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright © SparX 2014"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution 2.5 License"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution License"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-NonCommercial 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Custom"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Cydeas Public License"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "DO-WTF-U-WANT-2"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "Distributed under an MIT-style license (see LICENSE for details)."))) + (is (= #{} (fuzzy-match-name->license-ids "Distributed under the Eclipse Public License, the same as Clojure."))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "Do What The Fuck You Want To Public License"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "Do What The Fuck You Want To Public License, Version 2"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Don't steal my stuff"))) + (is (= #{"Apache-2.0" "EPL-1.0"} (fuzzy-match-name->license-ids "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Dropbox ToS"))) + (is (= #{} (fuzzy-match-name->license-ids "Dual MIT & Proprietary"))) + (is (= #{} (fuzzy-match-name->license-ids "Dual: EPL and LGPL"))) + (is (= #{} (fuzzy-match-name->license-ids "ECLIPSE PUBLIC LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-3.0-or-later"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPL-v1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "EPLv2"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse License"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public Licence"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License (EPL) - v 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - Version 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 1.0 (EPL-1.0)"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0,"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, v. 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, v2"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, version 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public Licese - v 1.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public MIT"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse Pulic License"))) + (is (= #{} (fuzzy-match-name->license-ids "Eclipse public license, the same as Clojure"))) + (is (= #{"EUPL-1.1"} (fuzzy-match-name->license-ids "European Union Public Licence (EUPL v.1.1)"))) + (is (= #{"EUPL-1.2"} (fuzzy-match-name->license-ids "European Union Public Licence v. 1.2"))) + (is (= #{} (fuzzy-match-name->license-ids "European Union Public License"))) + (is (= #{"EUPL-1.2"} (fuzzy-match-name->license-ids "European Union Public License 1.2 or later"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "Expat (MIT) license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "FIXME: choose"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Firebase ToS"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "FreeBSD License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "GG Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU AGPL-V3 or later"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU AGPLv3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public Licence"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License 3.0 (AGPL-3.0)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License Version 3; Other commercial licenses available."))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License, Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License, version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GENERAL PUBLIC LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL V2+"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v. 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3+"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPL, version 3, 29 June 2007"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU GPLv3+"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Lesser Public License (LGPL) version 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License (GPL)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License 2"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License V3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3.0 or later"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 2, with the Classpath Exception"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 3 (or later)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 3 (GPLv3)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License,version 2.0 or (at your option) any later version"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL v2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL-3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU LGPLv3 "))) ; Note trailing space + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser GPL"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public Licence"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public Licence 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL) Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License 2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License v2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License version 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 3 or later"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, v. 3 or later"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 2.1 or newer"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 3 or later"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Pulic License v2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser Genereal Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL)"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL) 2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL) V2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License V. 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License V3"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License v2"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, Version 2"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, Version 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, v2"))) + (is (= #{} (fuzzy-match-name->license-ids "GNU public licence V3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GNUv3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL 2.0+"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL V3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL V3+"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL v2"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL v2+ or Swiss Ephemeris"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL v3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL version 3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL-3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL-3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL-3.0-only"))) + (is (= #{} (fuzzy-match-name->license-ids "GPL3"))) + (is (= #{} (fuzzy-match-name->license-ids "GPLv2"))) + (is (= #{} (fuzzy-match-name->license-ids "GPLv2 with Classpath exception"))) + (is (= #{} (fuzzy-match-name->license-ids "GPLv3"))) + (is (= #{} (fuzzy-match-name->license-ids "General Public License 3"))) + (is (= #{} (fuzzy-match-name->license-ids "General Public License v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Gnu Lesser Public License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Google Maps ToS"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "GraphiQL license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Hackthorn Innovation Ltd"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Hackthorn Innovation copyright"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Heap ToS"))) + (is (= #{"Hippocratic-2.1"} (fuzzy-match-name->license-ids "Hippocratic License"))) + (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC"))) + (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC Licence"))) + (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC License"))) + (is (= #{"ISC" "Classpath-exception-2.0"} (fuzzy-match-name->license-ids "ISC WITH Classpath-exception-2.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Interel"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "JLGL Backend"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Jedis License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Jiegao Owned"))) + (is (= #{} (fuzzy-match-name->license-ids "L GPL 3"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL 2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL 3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL 3.0 (GNU Lesser General Public License)"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL License"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL Open Source license"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL v3"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL-2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL-2.1-only"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0-only"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0-or-later"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPLv2.1"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPLv3"))) + (is (= #{} (fuzzy-match-name->license-ids "LGPLv3+"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "Lesser GPL"))) + (is (= #{} (fuzzy-match-name->license-ids "Lesser General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "Lesser General Public License (LGPL)"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Libre Uso MX"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "License of respective package"))) + (is (= #{} (fuzzy-match-name->license-ids "Licensed under GNU Lesser General Public License Version 3 or later (the "))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Like Clojure."))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT LICENSE"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Licence"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Licens"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License (MIT)"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Public License"))) + (is (= #{"X11"} (fuzzy-match-name->license-ids "MIT X11 License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT public License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT public license"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT-style license (see LICENSE for details)."))) + (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (fuzzy-match-name->license-ids "MIT/Apache-2.0/BSD-3-Clause"))) + (is (= #{"ISC"} (fuzzy-match-name->license-ids "MIT/ISC"))) + (is (= #{"ISC"} (fuzzy-match-name->license-ids "MIT/ISC License"))) + (is (= #{"X11"} (fuzzy-match-name->license-ids "MIT/X11"))) + (is (= #{} (fuzzy-match-name->license-ids "MPL"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL 2"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL 2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL v2"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL-2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL-v2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL2.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Mixed"))) + (is (= #{} (fuzzy-match-name->license-ids "Modified BSD License"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public Licence 2.0"))) + (is (= #{} (fuzzy-match-name->license-ids "Mozilla Public License"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License (Version 2.0)"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License 2.0"))) + (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 1.0"))) + (is (= #{"MPL-1.1"} (fuzzy-match-name->license-ids "Mozilla Public License Version 1.1"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License v2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License v2.0+"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License version 2"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License version 2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License, v. 2.0"))) + (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License, version 2.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Multiple"))) + (is (= #{"NASA-1.3"} (fuzzy-match-name->license-ids "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) + (is (= #{"NASA-1.3"} (fuzzy-match-name->license-ids "NASA Open Source Agreement, Version 1.3"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "New BSD 2-clause license"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD License or Modified BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD license"))) + (is (= #{"BSD-3-Clause" "MIT"} (fuzzy-match-name->license-ids "New-BSD / MIT"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "OTN License Agreement"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Open Source Community License - Type C version 1.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Other License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Private"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Private License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietary"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietary License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Provisdom"))) + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public Domain"))) + (is (= #{"CC0"} (fuzzy-match-name->license-ids "Public domain (CC0)"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Research License 1.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Restricted Distribution."))) + (is (= #{} (fuzzy-match-name->license-ids "Revised BSD"))) + (is (= #{"Ruby"} (fuzzy-match-name->license-ids "Ruby License"))) + (is (= #{} (fuzzy-match-name->license-ids "SGI"))) + (is (= #{"SMPPL"} (fuzzy-match-name->license-ids "SMPPL"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "SYNNEX China Owned"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "See the LICENSE file"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Shen License"))) + (is (= #{} (fuzzy-match-name->license-ids "Simplified BSD License"))) + (is (= #{} (fuzzy-match-name->license-ids "Simplified BSD license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Slick2D License"))) + (is (= #{} (fuzzy-match-name->license-ids "Some Eclipse Public License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Stripe ToS"))) + (is (= #{"Beerware"} (fuzzy-match-name->license-ids "THE BEER-WARE LICENSE"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "THE MIT LICENSE"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "TODO"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "TODO: Choose a license"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The 3-Clause BSD License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache 2 License"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "The BSD 2-Clause License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License"))) + (is (= #{} (fuzzy-match-name->license-ids "The BSD License"))) + (is (= #{"EUPL-1.1"} (fuzzy-match-name->license-ids "The European Union Public License, Version 1.1"))) + (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License"))) + (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License v3.0"))) + (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License, Version 2"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "The I Haven't Got Around To This Yet License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT Licence"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT)"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT) "))) ; Note trailing space + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT) | Open Source Initiative"))) + (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License."))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The New BSD License"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The New BSD license"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The UnLicense"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The Unlicence"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The Unlicense"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "Three Clause BSD-like License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "To ill!"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Tulos Commercial License"))) + (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "Two clause BSD license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "UNLICENSED"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "UnLicense"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "University of Buffalo Public License"))) + (is (= #{"NCSA"} (fuzzy-match-name->license-ids "University of Illinois/NCSA Open Source License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Unknown"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "Unlicense"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "Unlicense License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "VNETLPL - Limited Public License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "VNet PL"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Various"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Vimeo License"))) + (is (= #{"W3C"} (fuzzy-match-name->license-ids "W3C Software license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "WIP"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL v2"))) + (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL – Do What the Fuck You Want to Public License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "Wildbit Proprietary License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "YouTube ToS"))) + (is (= #{"Zlib"} (fuzzy-match-name->license-ids "Zlib License"))) + (is (= #{} (fuzzy-match-name->license-ids "apache"))) + (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "apache-2.0"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "avi license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "esl-sdk-external-signer-verification"))) + (is (= #{} (fuzzy-match-name->license-ids "http://opensource.org/licenses/MIT"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/clafka/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/party/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/radix/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/riverford/datagrep/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) + (is (= #{} (fuzzy-match-name->license-ids "https://opensource.org/licenses/BSD-3-Clause"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "jank license"))) + (is (= #{} (fuzzy-match-name->license-ids "lgpl_v2_1"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "name"))) + (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "new BSD License"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "none"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "proprietary"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "state-node license"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "trove"))) + (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "unlicense"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "url"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "wisdragon"))) + (is (unlisted-only? (fuzzy-match-name->license-ids "wiseloong"))) + (is (= #{"Zlib"} (fuzzy-match-name->license-ids "zlib License"))) + (is (= #{"Zlib"} (fuzzy-match-name->license-ids "zlib license"))) + (is (= #{"Libpng"} (fuzzy-match-name->license-ids "zlib/libpng License")))) + (testing "Names that appear in licensey things, but are ambiguous" + (is (nil? (fuzzy-match-name->license-ids "BSD")))) + (testing "Names that appear in licensey things, but aren't in the SPDX license list" + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public Domain"))) + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public domain"))))) + +(deftest uri->license-ids-tests + (testing "Nil, empty or blank uri" + (is (nil? (fuzzy-match-uri->license-ids nil))) + (is (nil? (fuzzy-match-uri->license-ids ""))) + (is (nil? (fuzzy-match-uri->license-ids " "))) + (is (nil? (fuzzy-match-uri->license-ids "\n"))) + (is (nil? (fuzzy-match-uri->license-ids "\t")))) + (testing "URIs that appear verbatim in the SPDX license list" + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) + (testing "URI variations that should be handled identically" + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) + (testing "URIs that appear in licensey things, but aren't in the SPDX license list" + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) diff --git a/test/lice_comb/spdx_test.clj b/test/lice_comb/spdx_test.clj deleted file mode 100644 index 1fb68d1..0000000 --- a/test/lice_comb/spdx_test.clj +++ /dev/null @@ -1,157 +0,0 @@ -; -; Copyright © 2021 Peter Monks -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; SPDX-License-Identifier: Apache-2.0 -; - -(ns lice-comb.spdx-test - (:require [clojure.test :refer [deftest testing is use-fixtures]] - [clojure.java.io :as io] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.spdx :refer [fuzzy-match-name->license-ids fuzzy-match-uri->license-ids]])) - -(use-fixtures :once fixture) - -; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) -(deftest fuzzy-match-name->license-ids-tests - (testing "Nil, empty or blank names" - (is (nil? (fuzzy-match-name->license-ids nil))) - (is (nil? (fuzzy-match-name->license-ids ""))) - (is (nil? (fuzzy-match-name->license-ids " "))) - (is (nil? (fuzzy-match-name->license-ids "\n"))) - (is (nil? (fuzzy-match-name->license-ids "\t")))) - (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) - (testing "Names" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) - (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) - (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (fuzzy-match-name->license-ids "BSD")))) - (testing "Names that appear in licensey things, but aren't in the SPDX license list, and don't have identified SPDX identifiers" - (is (= #{"LicenseRef-lice-comb-public-domain"} (fuzzy-match-name->license-ids "Public Domain"))) - (is (= #{"LicenseRef-lice-comb-public-domain"} (fuzzy-match-name->license-ids "Public domain"))))) - -(deftest uri->license-ids-tests - (testing "Nil, empty or blank uri" - (is (nil? (fuzzy-match-uri->license-ids nil))) - (is (nil? (fuzzy-match-uri->license-ids ""))) - (is (nil? (fuzzy-match-uri->license-ids " "))) - (is (nil? (fuzzy-match-uri->license-ids "\n"))) - (is (nil? (fuzzy-match-uri->license-ids "\t")))) - (testing "URIs that appear verbatim in the SPDX license list" - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids " http://apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace - (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) - (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) - -(comment ; We don't test text->ids, since it's been moved to clj-spdx, where it is far more extensively tested -(defn- string-text->ids - [s] - (with-open [is (io/input-stream (.getBytes s "UTF-8"))] - (text->ids is))) - -(deftest text->ids-tests - (testing "Nil, empty or blank text" - (is (nil? (text->ids nil))) - (is (nil? (text->ids ""))) - (is (nil? (text->ids " "))) - (is (nil? (text->ids "\n"))) - (is (nil? (text->ids "\t"))) - (is (thrown? java.io.FileNotFoundException (text->ids (io/file "")))) - (is (thrown? java.io.FileNotFoundException (text->ids (io/file " ")))) - (is (thrown? java.io.FileNotFoundException (text->ids (io/file "\n")))) - (is (thrown? java.io.FileNotFoundException (text->ids (io/file "\t"))))) - (testing "Text" - (is (= #{"Apache-2.0"} (string-text->ids "Apache License\nVersion 2.0, January 2004"))) - (is (= #{"Apache-2.0"} (string-text->ids " Apache License\n Version 2.0, January 2004 "))) - (is (= #{"AGPL-3.0"} (string-text->ids "GNU AFFERO GENERAL PUBLIC LICENSE\nVersion 3, 19 November 2007"))) - (is (= #{"CC-BY-SA-4.0"} (string-text->ids "Creative Commons Attribution-ShareAlike\n4.0 International Public License"))) - (is (= #{"JSON"} (string-text->ids "Copyright (c) 2002 JSON.org"))))) -) \ No newline at end of file diff --git a/test/lice_comb/untitled.clj b/test/lice_comb/untitled.clj new file mode 100644 index 0000000..1066a5c --- /dev/null +++ b/test/lice_comb/untitled.clj @@ -0,0 +1,33 @@ +(defn re-named-groups + "Returns a sequence of the names of all of the named capturing groups in the + given regular expression, or nil if there are none. + + Note: workaround for https://bugs.openjdk.org/browse/JDK-7032377 (fixed + in JDK 20)" + [re] + (seq (map second (re-seq #"\(\?<([a-zA-Z][a-zA-Z0-9]*)>" (str re))))) + + +(defn re-matches-ncg + "Returns the match, if any, of string to pattern, using + java.util.regex.Matcher.matches(). Returns a (potentially + empty) map of the named-capturing groups in the regex if there + was a match, or nil otherwise. Each key in the map is the name + of a name-capturing group, and each value is the corresponding + value in the string that matched that group." + [re s] + (let [matcher (re-matcher re s)] + (when (.matches matcher) + (let [ncgs (re-named-groups re)] + (loop [result {} + f (first ncgs) + r (rest ncgs)] + (if f + (let [v (try (.group matcher f) (catch java.lang.IllegalArgumentException _ nil))] + (recur (merge result (when v {f v})) + (first r) + (rest r))) + result)))))) + + +(re-matches-ncg #"(?i)(?Apache)(\s+Software)?(\s+License(s)?(\s*[,-])?)?(\s+V(ersion)?)?\s*(?\d+(\.\d+)?)?" "Apache 2.0") From 82eef19af7ad7913f10030bbe6bc6c821a52f0d8 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 14 Jul 2023 15:17:05 -0700 Subject: [PATCH 09/34] :fire: Delete accidentally added file --- test/lice_comb/untitled.clj | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 test/lice_comb/untitled.clj diff --git a/test/lice_comb/untitled.clj b/test/lice_comb/untitled.clj deleted file mode 100644 index 1066a5c..0000000 --- a/test/lice_comb/untitled.clj +++ /dev/null @@ -1,33 +0,0 @@ -(defn re-named-groups - "Returns a sequence of the names of all of the named capturing groups in the - given regular expression, or nil if there are none. - - Note: workaround for https://bugs.openjdk.org/browse/JDK-7032377 (fixed - in JDK 20)" - [re] - (seq (map second (re-seq #"\(\?<([a-zA-Z][a-zA-Z0-9]*)>" (str re))))) - - -(defn re-matches-ncg - "Returns the match, if any, of string to pattern, using - java.util.regex.Matcher.matches(). Returns a (potentially - empty) map of the named-capturing groups in the regex if there - was a match, or nil otherwise. Each key in the map is the name - of a name-capturing group, and each value is the corresponding - value in the string that matched that group." - [re s] - (let [matcher (re-matcher re s)] - (when (.matches matcher) - (let [ncgs (re-named-groups re)] - (loop [result {} - f (first ncgs) - r (rest ncgs)] - (if f - (let [v (try (.group matcher f) (catch java.lang.IllegalArgumentException _ nil))] - (recur (merge result (when v {f v})) - (first r) - (rest r))) - result)))))) - - -(re-matches-ncg #"(?i)(?Apache)(\s+Software)?(\s+License(s)?(\s*[,-])?)?(\s+V(ersion)?)?\s*(?\d+(\.\d+)?)?" "Apache 2.0") From d3218362989c380401ca19d89d0592a1ccece98f Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 1 Aug 2023 15:18:33 -0700 Subject: [PATCH 10/34] :construction: Ongoing work on issue #3 --- README.md | 2 +- deps.edn | 6 +- src/lice_comb/impl/utils.clj | 30 +- src/lice_comb/matching.clj | 528 +++++++++--- src/lice_comb/maven.clj | 7 +- test/lice_comb/matching_test.clj | 1318 +++++++++++++++--------------- test/lice_comb/utils_test.clj | 60 ++ 7 files changed, 1195 insertions(+), 756 deletions(-) create mode 100644 test/lice_comb/utils_test.clj diff --git a/README.md b/README.md index 5154636..6631808 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A Clojure library for software license detection. It does this by combing throu This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). -## Requirements +## System Requirements * `lice-comb` (all versions) requires an internet connection. diff --git a/deps.edn b/deps.edn index b85084a..c79c32b 100644 --- a/deps.edn +++ b/deps.edn @@ -19,12 +19,14 @@ {:paths ["src" "resources"] :deps {org.clojure/tools.logging {:mvn/version "1.2.4"} + commons-validator/commons-validator {:mvn/version "1.7"} org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} + hato/hato {:mvn/version "0.9.0"} miikka/clj-base62 {:mvn/version "0.1.0"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.74"} - com.github.pmonks/rencg {:mvn/version "1.0.26"}} + com.github.pmonks/clj-spdx {:mvn/version "1.0.86"} + com.github.pmonks/rencg {:mvn/version "1.0.32"}} :aliases {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 0c7945c..d8701b7 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -24,6 +24,14 @@ [clojure.java.io :as io] [clj-base62.core :as base62])) +(defn mapfonv + "Returns a new map where f has been applied to all of the values of m." + [f m] + (when m + (into {} + (for [[k v] m] + [k (f v)])))) + (defn map-pad "Like map, but when presented with multiple collections of different lengths, 'pads out' the missing elements with nil rather than terminating early." @@ -85,14 +93,24 @@ (when s (java.lang.String. ^bytes (base62/decode s) (java.nio.charset.StandardCharsets/UTF_8)))) +(defn valid-http-uri? + "Returns true if given string is a valid HTTP or HTTPS URI." + [^String s] + ; Note: no nil check needed since the isValid method handles nil sanely + (.isValid (org.apache.commons.validator.routines.UrlValidator. ^"[Ljava.lang.String;" (into-array String ["http" "https"])) s)) + (defn simplify-uri - "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI). - Returns a string." + "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI) if + possible, returning a String. Returns nil if the input is nil or blank." [uri] - (when uri - (s/replace (s/replace (s/lower-case (s/trim (str uri))) - "https://" "http://") - "://www." "://"))) + (let [uri (str uri)] + (when-not (s/blank? uri) + (let [luri (s/lower-case (s/trim uri))] + (if (valid-http-uri? luri) + (-> luri + (s/replace #"\Ahttps?://(www\.)?" "http://") ; Normalise to http and strip any www. extension on hostname + (s/replace #"\.[\p{Alnum}]{3,}\z" "")) ; Strip file type extension (if any) + luri))))) (defmulti filename "Returns just the name component of the given file or path string, excluding diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index ff98940..7b5fc5c 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -17,150 +17,85 @@ ; (ns lice-comb.matching - "Matching functionality, a lot of which is provided by + "Matching functionality, some of which is provided by https://github.com/pmonks/clj-spdx" (:require [clojure.string :as s] [clojure.set :as set] [clojure.java.io :as io] - [clojure.tools.logging :as log] + [hato.client :as hc] [spdx.licenses :as sl] [spdx.exceptions :as se] [spdx.matching :as sm] [spdx.expressions :as sexp] - [lice-comb.impl.data :as lcd] + [rencg.api :as rencg] [lice-comb.impl.utils :as lcu])) -; The lists +; The license and exception lists (def ^:private license-list-d (delay (map sl/id->info (sl/ids)))) -(def ^:private exception-list-d (delay (map sl/id->info (sl/ids)))) +(def ^:private exception-list-d (delay (map se/id->info (se/ids)))) -; License name aliases -(def ^:private aliases-d (delay (lcd/load-edn-resource "lice_comb/matching/aliases.edn"))) - -; Regexes used for license name matching - these are expected to include named-capturing group called "name" and "version" -(def ^:private license-name-regexes [ - #"(?i)\s*(The\s+)?(?Apache)(\s+Software)?(\s+License(s)?)?(\s*[,-])?(\s+V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*" - ]) - - -(defn- name->license-ids - "Returns the SPDX license identifier(s) (a set) for the given license name - (matched case insensitively), or nil if there aren't any. - - Note that SPDX license names are not guaranteed to be unique - see - https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" - [name] - (when-not (s/blank? name) - (let [lname (s/trim (s/lower-case name))] - (some-> (seq (map :id (filter #(= lname (s/trim (s/lower-case (:name %)))) @license-list-d))) - set)))) - -(defn fuzzy-match-uri->license-ids - "Returns the SPDX license identifiers (a set) for the given uri, or nil if - there aren't any. - - Notes: - 1. this does not perform exact matching; rather it simplifies URIs in various - ways to avoid irrelevant differences, including performing a - case-insensitive comparison, ignoring protocol differences (http vs https), - ignoring extensions representing MIME types (.txt vs .html, etc.), etc. - 2. SPDX license list URIs are not guaranteed to be unique" - [uri] - (when-not (s/blank? uri) - (let [suri (lcu/simplify-uri uri)] - (some-> (seq (map :id (filter #(some identity (map (fn [see-also] (s/starts-with? suri see-also)) (distinct (map lcu/simplify-uri (concat (:see-also %) (get-in % [:cross-refs :url])))))) - @license-list-d))) - set)))) - -(def public-domain-license-id "LicenseRef-lice-comb-PUBLIC-DOMAIN") -(def ^:private unlisted-license-id-prefix "LicenseRef-lice-comb-UNLISTED") +; The unlisted license refs lice-comb uses (note: the unlisted one usually has a base62 suffix appended) +(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") +(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" [id] - (= (s/lower-case id) (s/lower-case public-domain-license-id))) + (= (s/lower-case id) (s/lower-case public-domain-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing public domain." + :arglists '([])} + public-domain + (constantly public-domain-license-ref)) (defn unlisted? "Is the given id a lice-comb custom 'unlisted' LicenseRef?" [id] - (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-id-prefix))) + (when id + (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-ref-prefix)))) (defn name->unlisted "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an unlisted license, with the given name appended as Base62 (since clj-spdx identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." [name] - (str unlisted-license-id-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) + (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) (defn unlisted->name "Get the original name of the given unlisted license. Returns nil if id is nil or is not a lice-comb's unlisted LicenseRef." [id] - (when (and id (unlisted? id)) - (str "Unlisted" - (when (> (count id) (count unlisted-license-id-prefix)) - (str " (" (lcu/base62-decode (subs id (+ 2 (count unlisted-license-id-prefix)))) ")"))))) + (when (unlisted? id) + (str "Unlisted (" + (if (> (count id) (count unlisted-license-ref-prefix)) + (lcu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) + "-original name not available-") + ")"))) (defn id->name "Returns the human readable name of the given license or exception identifier; - either the official SPDX license or exception name or (if the id is a - lice-comb specific LicenseRef an unofficial name. Returns the id verbatim if - unable to determine a name." + either the official SPDX license or exception name or, if the id is a + lice-comb specific LicenseRef, a lice-comb specific name. Returns the id + verbatim if unable to determine a name. Returns nil if the id is blank." [id] (when-not (s/blank? id) (cond (sl/listed-id? id) (:name (sl/id->info id)) (se/listed-id? id) (:name (se/id->info id)) (public-domain? id) "Public domain" - (unlisted? id) (str "Unlisted" (when-let [original (unlisted->name id)] (str " (" original ")"))) + (unlisted? id) (unlisted->name id) :else id))) - -; Index of alias regexes -(def ^:private idx-regex-to-id-d (delay - (merge @aliases-d - (apply merge (map #(hash-map (s/replace (lcu/escape-re (s/lower-case (:name %))) #"\s+" "\\\\s+") #{(:id %)}) @license-list-d))))) - -; Store regexes in reverse size order, on the assumption that longer regexes are more specific and should be processed first -; Note: `regexes` actually contains string representations, since regexes in Clojure don't implement equality / hash 🙄 -(def ^:private regexes-d (delay (reverse (sort-by #(count %) (concat (keys @idx-regex-to-id-d) (keys @idx-regex-to-id-d)))))) -(def ^:private re-pattern-mem (memoize re-pattern)) ; So we memomize re-pattern to save having to recompile the regex string representations every time we use them - -(defn- parse-expression-and-extract-ids - [s] - (when-let [expression (sexp/parse s)] - (sexp/extract-ids expression))) - -(defn fuzzy-match-name->license-ids - "Fuzzily attempts to determine the SPDX license identifier(s) (a set) from the - given name (a string), or nil if there aren't any. This involves three steps: - 1. checking if the name is actually an SPDX expression (this is rare, but - sometimes an SPDX identifier (which is also a valid expression) appears in - a pom.xml file) - 2. looking up the name using name->license-ids - 3. falling back on a manually maintained list of common name aliases: - https://github.com/pmonks/lice-comb/blob/data/spdx/aliases.edn" - [name] - (when-not (s/blank? name) - (let [name (s/trim name)] - ; 1. Parse the name as an SPDX exception, and if that succeeds, return all ids in the expression - (if-let [ids-in-expression (parse-expression-and-extract-ids name)] - ids-in-expression - ; 2. Then we look up by name - (if-let [list-name-matches (name->license-ids name)] - list-name-matches - ; 3. Then the last resort is to match on the name regexes - (if-let [re-name-matches (get @idx-regex-to-id-d (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) @regexes-d)))] - re-name-matches - (log/warn "Unable to find a listed SPDX license for" (str "'" name "'")))))))) - (defmulti text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). Notes: - * the caller is expected to close a Reader or InputStream passed to this - function (e.g. using clojure.core/with-open) + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) * you cannot pass a String representation of a filename to this method - you should pass filenames through clojure.java.io/file first" {:arglists '([text])} @@ -189,6 +124,399 @@ (with-open [r (io/reader src)] (text->ids r)))) +(defn- urls-to-id-tuples + "Extracts all urls for a given list (license or exception) entry." + [list-entry] + (let [id (:id list-entry) + simplified-uris (map lcu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] + (map #(vec [% id]) simplified-uris))) + +(def ^:private index-uri-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) + (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) + +(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 + :redirect-policy :always + :cookie-policy :none}))) + +(defn- github-raw-uri + "Converts a GitHub 'UI' URI into a 'raw' (CDN) GitHub URI. + + e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE + + If the given URI is not a GitHub 'UI' URI, returns the URI unchanged." + [uri] + (if-let [uri-obj (try (io/as-url uri) (catch Exception _ nil))] + (if (= "github.com" (s/lower-case (.getHost uri-obj))) + (-> uri + (s/replace "github.com" "raw.githubusercontent.com") + (s/replace "/blob/" "/")) + uri) + uri)) + +(defn- attempt-text-http-get + "Attempts to get plain text as a String from the given URI, returning nil if + unable to do so (including for error conditions - there is no way to + disambiguate errors from non-text content, for example)." + [uri] + (when (lcu/valid-http-uri? uri) + (try + (when-let [response (hc/get (github-raw-uri uri) + {:http-client @http-client-d + :accept "text/plain;q=1,*/*;q=0"})] ; Kindly request server to only return text/plain... ...even though this gets ignored a lot of the time 🙄 + (when (= :text/plain (:content-type response)) + (:body response))) + (catch Exception _ + nil)))) + +(defn uri->ids + "Returns the SPDX license and/or exception identifiers (a set) for the given + uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions." + [uri] + (when-let [suri (lcu/simplify-uri uri)] + ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) + (if-let [result (get @index-uri-to-id-d suri)] + result + ; Second, attempt to retrieve it as text/plain and perform full license matching on it + (when-let [license-text (attempt-text-http-get uri)] + (text->ids license-text))))) + +(defn- name-to-id-tuple + [list-entry] + [(s/lower-case (s/trim (:name list-entry))) (:id list-entry)]) + +(def ^:private index-name-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) + (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) + +(defn- listed-name->ids + "Returns the SPDX license and/or exception identifier(s) (a set) for the given license name + (matched case insensitively), or nil if there aren't any. + + Note that SPDX license names are not guaranteed to be unique - see + https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" + [name] + (when-not (s/blank? name) + (get @index-name-to-id-d (s/trim (s/lower-case name))))) + +(defn- parse-expression-and-extract-ids + "Parse s as if it were an SPDX expression, and if it is, extract all ids + (for licenses and exceptions) out of it." + [s] + (when-let [expression (sexp/parse s)] + (sexp/extract-ids expression))) + +(defn- get-rencgs + "Get a value for an re-ncg, potentially looking at multiple ncgs in order until a non-blank value is found. Also trims and lower-cases the value." + ([m names] (get-rencgs m names nil)) + ([m names default] + (loop [f (first names) + r (rest names)] + (if f + (let [value (get m f)] + (if (s/blank? value) + (recur (first r) (rest r)) + (s/lower-case (s/trim value)))) + default)))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- generic-id-constructor + [m] + (when m + (str (:id m) + (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] + (str "-" + ver + (when (and (:pad-ver? m) + (not (s/includes? ver "."))) + (let [pad (last (s/split (:latest-ver m) #"\."))] + (when-not (s/blank? pad) + (str "." pad))))))))) + +(defn- number-name-to-number + "Converts the name of a number to that number (as a string). e.g. \"two\" -> \"2\". Returns s unchanged if it's not a number name." + [^String s] + (when s + (case s + "two" "2" + "three" "3" + "four" "4" + s))) + +(defn- is-digits? + "Does the given string contains digits only?" + [^String s] + (boolean ; Eliminate nil-punning, since we use the output of this method in case + (when s + (every? #(Character/isDigit ^Character %) s)))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- bsd-id-constructor + [m] + (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) + clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) + preferred-clause-count (case [(is-digits? clause-count1) (is-digits? clause-count2)] + [true true] clause-count1 + [true false] clause-count1 + [false true] clause-count2 + (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) + clause-count1 + clause-count2)) + clause-count (case preferred-clause-count + ("2" "simplified") "2" + ("3" "new" "revised" "modified" "aduna") "3" + "4")] ; Note: we default to 4 clause, since it was the original form of the BSD license + (str (:id m) "-" clause-count "-Clause"))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- cc-id-constructor + [m] + (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) + nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) + sa? (not (s/blank? (get-rencgs m ["sharealike"]))) + version (get-rencgs m ["version"] (:latest-ver m)) + base-id (str "CC-BY-" + (when nc? "NC-") + (when nd? "ND-") + (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND + version) + region (case (get-rencgs m ["region"]) + "australia" "AU" + "austria" "AT" + ("england" "england and wales" "england & wales" "uk") "UK" + "france" "FR" + "germany" "DE" + "igo" "IGO" + "japan" "JP" + "netherlands" "NL" + ("united states" "usa" "us") "US" + nil) + id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] + (if (contains? (sl/ids) id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it + id-with-region + (if (contains? (sl/ids) base-id) + base-id + (throw (ex-info "Invalid Creative Commons license information found" (dissoc m :id :regex :fn :pad-ver? :latest-ver))))))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- gpl-id-constructor + [m] + (let [id (case (get-rencgs m ["edition1" "edition2"]) + ("affero" "agpl") "AGPL" + ("lesser" "library" "lgpl") "LGPL" + "GPL") + version (let [ver (get-rencgs m ["version"] (:latest-ver m))] + (if (s/includes? ver ".") + ver + (str ver ".0"))) + suffix (case (get-rencgs m ["suffix"]) + ("later" "newer" "+") "or-later" + ("only") "only" + "only")] ; Note: we (conservatively) default to "only" when we don't have an explicit suffix + (str id "-" version (when-not (= id "AGPL") (str "-" suffix))))) + +(defn- simple-regex-match + "Constructs a 'simple' name match structure" + [s] + {:id s + :regex (re-pattern (str "(?i)\\b" s "\\b")) + :fn (constantly s)}) + +; Regexes used for license name matching, along with functions for constructing an SPDX id +(def ^:private license-name-matching (concat + ; By default we add every single id as a "simple" regex match, excluding MIT and Zlib (they're explicitly handled below) + (map simple-regex-match (disj (sl/ids) "MIT" "Zlib")) + (map simple-regex-match (se/ids)) + [ + {:id "AFL" + :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "3.0"} + {:id "Apache" + :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Artistic" + :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Beerware" + :regex #"(?i)\bBeer-?ware\b" + :fn (constantly "Beerware")} + {:id "BSL" + :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "BSD" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?" + :fn bsd-id-constructor} + {:id "CC0" + :regex #"(?i)\bCC\s*0" + :fn (constantly "CC0-1.0")} + {:id "CECILL" + :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.1"} + {:id "Classpath-exception" + :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "CDDL" + :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.1"} + {:id "CPL" + :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "Creative commons family" + :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons\s+(Attribution)?|Attribution))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" + :fn cc-id-constructor + :pad-ver? true + :latest-ver "4.0"} + {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 + :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "EUPL" + :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.2"} + {:id "FreeBSD" + :regex #"(?i)\bFreeBSD\b" + :fn (constantly "BSD-2-Clause-FreeBSD")} + {:id "GNU license family" + :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(or(\s+\(?at\s+your\s+option\)?)?)?(\s+any)?(\s*(?later|newer|only|\+))?\b" + :fn gpl-id-constructor + :pad-ver? true + :latest-ver 3.0} + {:id "Hippocratic" + :regex #"(?i)\bHippocratic\b" + :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license + {:id "LLVM-exception" + :regex #"(?i)\bLLVM[\s-]+Exception\b" + :fn (constantly "LLVM-exception")} + {:id "MIT" + :regex #"(?i)\bMIT(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" + :fn (constantly "MIT")} + {:id "MPL" + :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "NASA" + :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.3"} + {:id "Public Domain" + :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" + :fn (constantly public-domain-license-ref)} + {:id "Ruby" + :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" + :fn (constantly "Ruby")} + {:id "SGI-B" + :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Unlicense" + :regex #"(?i)\bUnlicen[cs]e\b" + :fn (constantly "Unlicense")} + {:id "WTFPL" + :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" + :fn (constantly "WTFPL")} + {:id "Zlib" + :regex #"\b(?i)zlib(?![\s/]+libpng)\b" + :fn (constantly "Zlib")} + ])) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- match-regex + "Returns the SPDX license-id for the given elem from license-name-matching, if a match occurred, or nil if there was no match." + [name elem] + (when-let [matches (rencg/re-find-ncg (:regex elem) name)] + ((:fn elem) (merge {:name name} elem matches)))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- match-regexes + "Returns all of the matched SPDX license-id for the given name, or nil if there were no matches." + [name] + (some-> (seq (filter identity (pmap (partial match-regex name) license-name-matching))) + set)) + +(defn- fix-public-domain-cc0 + [ids] + (if (and (contains? ids public-domain-license-ref) + (contains? ids "CC0-1.0")) + (disj ids public-domain-license-ref) + ids)) + +(defn- fix-classpath-exception + [ids] + (if (contains? ids "GPL-2.0-with-classpath-exception") + (conj (disj ids "GPL-2.0-with-classpath-exception") "GPL-2.0-only" "Classpath-exception-2.0") + ids)) + +(defn- manual-fixes + "Manually fix certain combinations of license identifiers." + [ids] + (when ids + (-> ids + fix-public-domain-cc0 + fix-classpath-exception))) + +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AND SOURCE!!!! +(defn name->ids + "Attempts to determine the SPDX license identifier(s) (a set) from the given + name (a string), or nil if there aren't any. This involves: + 1. checking if the name is actually an SPDX expression (this is rare, but + sometimes an SPDX identifier or expression appears in a pom.xml file) + 2. looking up the name case insensitively in the SPDX license list + 3. matching lice-comb specific 'name matching' regexes against the name + 4. if the name is actually a URI, running it through uri->ids + + If those steps all fail, a lice-comb custom 'unlisted' LicenseRef is returned + instead (which can be checked using the unlisted? fn)." + [name] + (when-not (s/blank? name) + (manual-fixes + (let [name (s/trim name)] + ; 1. Parse the name as an SPDX exception, and if that succeeds, return all ids in the expression + (if-let [ids-in-expression (parse-expression-and-extract-ids name)] + ids-in-expression + ; 2. Then we look up by name + (if-let [listed-name-matches (listed-name->ids name)] + listed-name-matches + ; 3. Then we fallback on regex name matching + (if-let [re-name-matches (match-regexes name)] + re-name-matches + ; 4. Then we see if it's actually a URI, and URI match if so - this is to handle some dumb corner cases that exist in the real world + (if-let [uri-matches (uri->ids name)] + uri-matches + #{(name->unlisted name)})))))))) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call @@ -201,7 +529,7 @@ (se/init!) @license-list-d @exception-list-d - @aliases-d - @idx-regex-to-id-d - @regexes-d + @index-uri-to-id-d + @index-name-to-id-d + @http-client-d nil) diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 61c9158..845b0c4 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -65,16 +65,15 @@ (.toURI local-pom) (first (filter uri-resolves? (map #(java.net.URI. (str % "/" gav-path)) remote-maven-repos)))))))) +;####TODO: Check both URI and name and merge the results! (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair." [{:keys [name url]}] ; Attempt to find a match by URL first - (if-let [licenses (lcmtch/fuzzy-match-uri->license-ids url)] + (if-let [licenses (lcmtch/uri->ids url)] licenses ; Then match by name - (if-let [licenses (lcmtch/fuzzy-match-name->license-ids name)] - licenses - #{(lcmtch/name->unlisted name)}))) ; Last resort - return an unlisted identifier that includes the name (if any) + (lcmtch/name->ids name))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index c9a3e6a..8657b66 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -19,7 +19,9 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.matching :refer [unlisted? fuzzy-match-name->license-ids fuzzy-match-uri->license-ids]])) + [lice-comb.matching :refer [unlisted? name->unlisted text->ids name->ids uri->ids]] + [spdx.licenses :as sl] + [spdx.exceptions :as se])) (use-fixtures :once fixture) @@ -29,654 +31,684 @@ (and (= 1 (count ids)) (unlisted? (first ids)))) +(deftest unlisted?-tests + (testing "Nil, empty or blank ids" + (is (nil? (unlisted? nil))) + (is (false? (unlisted? ""))) + (is (false? (unlisted? " "))) + (is (false? (unlisted? "\n"))) + (is (false? (unlisted? "\t")))) + (testing "Unlisted ids" + (is (true? (unlisted? (name->unlisted "foo"))))) + (testing "Listed ids" + (is (true? (every? false? (map unlisted? (sl/ids))))) + (is (true? (every? false? (map unlisted? (se/ids))))))) + ; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) -(deftest fuzzy-match-name->license-ids-tests +(deftest name->ids-tests (testing "Nil, empty or blank names" - (is (nil? (fuzzy-match-name->license-ids nil))) - (is (nil? (fuzzy-match-name->license-ids ""))) - (is (nil? (fuzzy-match-name->license-ids " "))) - (is (nil? (fuzzy-match-name->license-ids "\n"))) - (is (nil? (fuzzy-match-name->license-ids "\t")))) + (is (nil? (name->ids nil))) + (is (nil? (name->ids ""))) + (is (nil? (name->ids " "))) + (is (nil? (name->ids "\n"))) + (is (nil? (name->ids "\t")))) (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GPL-2.0-with-classpath-exception")))) + (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0"))) + (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0-only"))) + (is (= #{"Apache-2.0"} (name->ids " Apache-2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) + (is (= #{"CC-BY-SA-4.0"} (name->ids "CC-BY-SA-4.0"))) + (is (= #{"GPL-2.0-only"} (name->ids "GPL-2.0"))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPL-2.0-with-classpath-exception")))) (testing "Names that are SPDX expressions" - (is (= #{"GPL-2.0" "Classpath-exception-2.0"} (fuzzy-match-name->license-ids "GPL-2.0 WITH Classpath-exception-2.0")))) - (testing "Names" - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (fuzzy-match-name->license-ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (fuzzy-match-name->license-ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (fuzzy-match-name->license-ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (fuzzy-match-name->license-ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (fuzzy-match-name->license-ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0"} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-name->license-ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (fuzzy-match-name->license-ids "JSON License"))) - (is (= #{"LGPL-2.0"} (fuzzy-match-name->license-ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1"} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) - (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (fuzzy-match-name->license-ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (testing "Names that appear in POMs on Clojars" ; as of 2023-07-13 - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids " Apache License, Version 2.0"))) - (is (= #{"GPL-3.0"} (fuzzy-match-name->license-ids " GNU GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids " MIT License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "${license.id}"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "2-Clause BSD"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "2-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause BSD license"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "3-clause license (New BSD License or Modified BSD License)"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "A Clojure library for Google Cloud Pub/Sub."))) - (is (= #{} (fuzzy-match-name->license-ids "AGPL"))) ; Listed license missing version - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPL v3"))) - (is (= #{"AGPL-3.0"} (fuzzy-match-name->license-ids "AGPLv3"))) - (is (= #{} (fuzzy-match-name->license-ids "APACHE"))) ; Listed license missing version - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "APACHE LICENSE, VERSION 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "APGL"))) ; Probable typo - (is (= #{} (fuzzy-match-name->license-ids "ASL"))) ; Listed license missing version - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "ASL 2.0"))) - (is (= #{"AFL-3.0"} (fuzzy-match-name->license-ids "Academic Free License 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Aduna BSD license"))) ; Listed license missing clause info - (is (= #{} (fuzzy-match-name->license-ids "Affero GNU Public License v3"))) - (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License v3 or later (at your option)"))) - (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License version 3 or lator"))) - (is (= #{} (fuzzy-match-name->license-ids "Affero General Public License,"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "All Rights Reserved"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "All rights reserved"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Amazon Software License"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2 License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2 Public License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2, see LICENSE"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache 2.0 License"))) - (is (= #{"Apache-2.0" "LLVM-exception"} (fuzzy-match-name->license-ids "Apache 2.0 with LLVM Exception"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache Licence"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Licence 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Licence, Version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - v 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License - v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License V2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License V2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License Version 2.0, January 2004"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, Version 2.0."))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, version 2."))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache License, version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache Public License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Public License, version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache Software License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License - v 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Software License, Version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Apache Software Licesne"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Sofware Licencse 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Sofware License 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache V2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache V2 License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache license version 2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache license, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2 License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache v2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache-2.0 License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "Apache2 License"))) - (is (= #{} (fuzzy-match-name->license-ids "Artistic License"))) ; Listed license missing version - (is (= #{} (fuzzy-match-name->license-ids "Artistic License/GPL"))) ; Listed license missing version - (is (= #{"Artistic-2.0"} (fuzzy-match-name->license-ids "Artistic-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) - (is (= #{} (fuzzy-match-name->license-ids "BSD"))) ; Listed license missing clause info - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (2 Clause)"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (2-Clause)"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD (Type 2) Public License"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2 Clause"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2 clause license"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause \"Simplified\" License"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause Licence"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause License"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-Clause license"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD 2-clause \"Simplified\" License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3 Clause"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause \"New\" or \"Revised\" License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause 'New' or 'Revised' License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-Clause license"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD 3-clause license"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD C2"))) - (is (= #{} (fuzzy-match-name->license-ids "BSD License"))) ; Listed license missing clause info - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD New, Version 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "BSD Standard License"))) ; Listed license missing clause info - (is (= #{} (fuzzy-match-name->license-ids "BSD license"))) ; Listed license missing clause info - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "BSD-2-Clause"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD-3"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "BSD-3-Clause"))) - (is (= #{} (fuzzy-match-name->license-ids "BSD-style"))) ; Listed license missing clause info - (is (unlisted-only? (fuzzy-match-name->license-ids "BankersBox License"))) - (is (= #{"Beerware"} (fuzzy-match-name->license-ids "Beerware 42"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Bespoke"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Bloomberg Open API"))) - (is (= #{"BSL-1.0"} (fuzzy-match-name->license-ids "Boost Software License - Version 1.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Bostock"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Built In Project License"))) - (is (= #{} (fuzzy-match-name->license-ids "CC Attribution 4.0 International with exception for binary distribution"))) - (is (= #{} (fuzzy-match-name->license-ids "CC BY-NC"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-name->license-ids "CC BY-SA 4.0"))) - (is (= #{"CC-BY-4.0"} (fuzzy-match-name->license-ids "CC-BY-4.0"))) - (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0"))) - (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0 1.0 Universal"))) - (is (= #{"CC0-1.0"} (fuzzy-match-name->license-ids "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "CRAPL License"))) - (is (= #{} (fuzzy-match-name->license-ids "CeCILL License"))) ; Listed license, but need a version - (is (= #{} (fuzzy-match-name->license-ids "Common Development and Distribution License"))) - (is (= #{} (fuzzy-match-name->license-ids "Common Development and Distribution License (CDDL)"))) - (is (= #{} (fuzzy-match-name->license-ids "Common Public License - v 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Common Public License Version 1.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Contact JMonkeyEngine forums for license details"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright & all rights reserved Lean Pixel"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (C) 2015 by Glowbox LLC"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (c) 2011 Drew Colthorp"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright (c) 2017, Lingchao Xin"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2013 The Fresh Diet. All rights reserved."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2016, klaraHealth, Inc."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2017 All Rights Reserved"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 2017 Zensight"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright 4A Volcano. 2015."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright Ona Systems Inc."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright meissa GmbH"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Copyright © SparX 2014"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution 2.5 License"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution License"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-NonCommercial 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) - (is (= #{} (fuzzy-match-name->license-ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Custom"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Cydeas Public License"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "DO-WTF-U-WANT-2"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "Distributed under an MIT-style license (see LICENSE for details)."))) - (is (= #{} (fuzzy-match-name->license-ids "Distributed under the Eclipse Public License, the same as Clojure."))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "Do What The Fuck You Want To Public License"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "Do What The Fuck You Want To Public License, Version 2"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Don't steal my stuff"))) - (is (= #{"Apache-2.0" "EPL-1.0"} (fuzzy-match-name->license-ids "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Dropbox ToS"))) - (is (= #{} (fuzzy-match-name->license-ids "Dual MIT & Proprietary"))) - (is (= #{} (fuzzy-match-name->license-ids "Dual: EPL and LGPL"))) - (is (= #{} (fuzzy-match-name->license-ids "ECLIPSE PUBLIC LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-3.0-or-later"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPL-v1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "EPLv2"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse License"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public Licence"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License (EPL)"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License (EPL) - v 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - Version 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License - v1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 1.0 (EPL-1.0)"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License 2.0,"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License v2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 2"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, v. 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, v2"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public License, version 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public Licese - v 1.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Public MIT"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse Pulic License"))) - (is (= #{} (fuzzy-match-name->license-ids "Eclipse public license, the same as Clojure"))) - (is (= #{"EUPL-1.1"} (fuzzy-match-name->license-ids "European Union Public Licence (EUPL v.1.1)"))) - (is (= #{"EUPL-1.2"} (fuzzy-match-name->license-ids "European Union Public Licence v. 1.2"))) - (is (= #{} (fuzzy-match-name->license-ids "European Union Public License"))) - (is (= #{"EUPL-1.2"} (fuzzy-match-name->license-ids "European Union Public License 1.2 or later"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "Expat (MIT) license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "FIXME: choose"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Firebase ToS"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "FreeBSD License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "GG Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU AGPL-V3 or later"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU AGPLv3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public Licence"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License (AGPL)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License 3.0 (AGPL-3.0)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License Version 3; Other commercial licenses available."))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License, Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Affero General Public License, version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GENERAL PUBLIC LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL V2+"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v. 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3+"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPL, version 3, 29 June 2007"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU GPLv3+"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Lesser Public License (LGPL) version 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License (GPL)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License 2"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License V3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License v3.0 or later"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 2, with the Classpath Exception"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, Version 3 (or later)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 2"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License, version 3 (GPLv3)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU General Public License,version 2.0 or (at your option) any later version"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL v2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPL-3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU LGPLv3 "))) ; Note trailing space - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser GPL"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public Licence"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public Licence 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License (LGPL) Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License - v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License 2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License v2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License version 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, Version 3 or later"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, v. 3 or later"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 2.1 or newer"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 3 or later"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser General Pulic License v2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser Genereal Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Lesser Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL)"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL) 2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Library or Lesser General Public License (LGPL) V2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License V. 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License V3"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License v2"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, Version 2"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, Version 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU Public License, v2"))) - (is (= #{} (fuzzy-match-name->license-ids "GNU public licence V3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GNUv3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL 2.0+"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL V3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL V3+"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL v2"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL v2+ or Swiss Ephemeris"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL v3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL version 3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL-3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL-3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL-3.0-only"))) - (is (= #{} (fuzzy-match-name->license-ids "GPL3"))) - (is (= #{} (fuzzy-match-name->license-ids "GPLv2"))) - (is (= #{} (fuzzy-match-name->license-ids "GPLv2 with Classpath exception"))) - (is (= #{} (fuzzy-match-name->license-ids "GPLv3"))) - (is (= #{} (fuzzy-match-name->license-ids "General Public License 3"))) - (is (= #{} (fuzzy-match-name->license-ids "General Public License v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Gnu Lesser Public License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Google Maps ToS"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "GraphiQL license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Hackthorn Innovation Ltd"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Hackthorn Innovation copyright"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Heap ToS"))) - (is (= #{"Hippocratic-2.1"} (fuzzy-match-name->license-ids "Hippocratic License"))) - (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC"))) - (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC Licence"))) - (is (= #{"ISC"} (fuzzy-match-name->license-ids "ISC License"))) - (is (= #{"ISC" "Classpath-exception-2.0"} (fuzzy-match-name->license-ids "ISC WITH Classpath-exception-2.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Interel"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "JLGL Backend"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Jedis License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Jiegao Owned"))) - (is (= #{} (fuzzy-match-name->license-ids "L GPL 3"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL 2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL 3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL 3.0 (GNU Lesser General Public License)"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL License"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL Open Source license"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL v3"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL-2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL-2.1-only"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0-only"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPL-3.0-or-later"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPLv2.1"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPLv3"))) - (is (= #{} (fuzzy-match-name->license-ids "LGPLv3+"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "Lesser GPL"))) - (is (= #{} (fuzzy-match-name->license-ids "Lesser General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "Lesser General Public License (LGPL)"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Libre Uso MX"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "License of respective package"))) - (is (= #{} (fuzzy-match-name->license-ids "Licensed under GNU Lesser General Public License Version 3 or later (the "))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Like Clojure."))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT LICENSE"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Licence"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Licens"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT License (MIT)"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT Public License"))) - (is (= #{"X11"} (fuzzy-match-name->license-ids "MIT X11 License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT license"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT public License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT public license"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (fuzzy-match-name->license-ids "MIT/Apache-2.0/BSD-3-Clause"))) - (is (= #{"ISC"} (fuzzy-match-name->license-ids "MIT/ISC"))) - (is (= #{"ISC"} (fuzzy-match-name->license-ids "MIT/ISC License"))) - (is (= #{"X11"} (fuzzy-match-name->license-ids "MIT/X11"))) - (is (= #{} (fuzzy-match-name->license-ids "MPL"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL 2"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL 2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL v2"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL-2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL-v2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "MPL2.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Mixed"))) - (is (= #{} (fuzzy-match-name->license-ids "Modified BSD License"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public Licence 2.0"))) - (is (= #{} (fuzzy-match-name->license-ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License (Version 2.0)"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License 2.0"))) - (is (= #{"MPL-1.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 1.0"))) - (is (= #{"MPL-1.1"} (fuzzy-match-name->license-ids "Mozilla Public License Version 1.1"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License Version 2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License v2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License v2.0+"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License version 2"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License version 2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License, v. 2.0"))) - (is (= #{"MPL-2.0"} (fuzzy-match-name->license-ids "Mozilla Public License, version 2.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Multiple"))) - (is (= #{"NASA-1.3"} (fuzzy-match-name->license-ids "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) - (is (= #{"NASA-1.3"} (fuzzy-match-name->license-ids "NASA Open Source Agreement, Version 1.3"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "New BSD 2-clause license"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD License or Modified BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "New BSD license"))) - (is (= #{"BSD-3-Clause" "MIT"} (fuzzy-match-name->license-ids "New-BSD / MIT"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Not fit for public use so formally proprietary software - this is not open-source"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "OTN License Agreement"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Open Source Community License - Type C version 1.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Other License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Private"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Private License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietary"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietary License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Provisdom"))) - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public Domain"))) - (is (= #{"CC0"} (fuzzy-match-name->license-ids "Public domain (CC0)"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Research License 1.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Restricted Distribution."))) - (is (= #{} (fuzzy-match-name->license-ids "Revised BSD"))) - (is (= #{"Ruby"} (fuzzy-match-name->license-ids "Ruby License"))) - (is (= #{} (fuzzy-match-name->license-ids "SGI"))) - (is (= #{"SMPPL"} (fuzzy-match-name->license-ids "SMPPL"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "SYNNEX China Owned"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "See the LICENSE file"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Shen License"))) - (is (= #{} (fuzzy-match-name->license-ids "Simplified BSD License"))) - (is (= #{} (fuzzy-match-name->license-ids "Simplified BSD license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Slick2D License"))) - (is (= #{} (fuzzy-match-name->license-ids "Some Eclipse Public License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Stripe ToS"))) - (is (= #{"Beerware"} (fuzzy-match-name->license-ids "THE BEER-WARE LICENSE"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "THE MIT LICENSE"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "TODO"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "TODO: Choose a license"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The 3-Clause BSD License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache 2 License"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "The Apache Software License, Version 2.0"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "The BSD 2-Clause License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The BSD 3-Clause License"))) - (is (= #{} (fuzzy-match-name->license-ids "The BSD License"))) - (is (= #{"EUPL-1.1"} (fuzzy-match-name->license-ids "The European Union Public License, Version 1.1"))) - (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License"))) - (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License v3.0"))) - (is (= #{} (fuzzy-match-name->license-ids "The GNU General Public License, Version 2"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "The I Haven't Got Around To This Yet License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT Licence"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT)"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT) "))) ; Note trailing space - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License (MIT) | Open Source Initiative"))) - (is (= #{"MIT"} (fuzzy-match-name->license-ids "The MIT License."))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The New BSD License"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "The New BSD license"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The UnLicense"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The Unlicence"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "The Unlicense"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "Three Clause BSD-like License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "To ill!"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Tulos Commercial License"))) - (is (= #{"BSD-2-Clause"} (fuzzy-match-name->license-ids "Two clause BSD license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "UNLICENSED"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "UnLicense"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "University of Buffalo Public License"))) - (is (= #{"NCSA"} (fuzzy-match-name->license-ids "University of Illinois/NCSA Open Source License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Unknown"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "Unlicense"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "Unlicense License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "VNETLPL - Limited Public License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "VNet PL"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Various"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Vimeo License"))) - (is (= #{"W3C"} (fuzzy-match-name->license-ids "W3C Software license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "WIP"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL v2"))) - (is (= #{"WTFPL"} (fuzzy-match-name->license-ids "WTFPL – Do What the Fuck You Want to Public License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "Wildbit Proprietary License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "YouTube ToS"))) - (is (= #{"Zlib"} (fuzzy-match-name->license-ids "Zlib License"))) - (is (= #{} (fuzzy-match-name->license-ids "apache"))) - (is (= #{"Apache-2.0"} (fuzzy-match-name->license-ids "apache-2.0"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "avi license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "esl-sdk-external-signer-verification"))) - (is (= #{} (fuzzy-match-name->license-ids "http://opensource.org/licenses/MIT"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/cmiles74/uio/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/clafka/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/party/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/mixradio/radix/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/riverford/datagrep/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) - (is (= #{} (fuzzy-match-name->license-ids "https://opensource.org/licenses/BSD-3-Clause"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "jank license"))) - (is (= #{} (fuzzy-match-name->license-ids "lgpl_v2_1"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "name"))) - (is (= #{"BSD-3-Clause"} (fuzzy-match-name->license-ids "new BSD License"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "none"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "proprietary"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "state-node license"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "trove"))) - (is (= #{"Unlicense"} (fuzzy-match-name->license-ids "unlicense"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "url"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "wisdragon"))) - (is (unlisted-only? (fuzzy-match-name->license-ids "wiseloong"))) - (is (= #{"Zlib"} (fuzzy-match-name->license-ids "zlib License"))) - (is (= #{"Zlib"} (fuzzy-match-name->license-ids "zlib license"))) - (is (= #{"Libpng"} (fuzzy-match-name->license-ids "zlib/libpng License")))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPL-2.0 WITH Classpath-exception-2.0"))) + (is (= #{"Apache-2.0" "GPL-3.0-only"} (name->ids "Apache-2.0 OR GPL-3.0"))) + (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0" "MIT" "BSD-3-Clause" "Apache-2.0"} + (name->ids "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) +(comment ; ####TODO: RE-ENABLE ME!!!! + (testing "Names, with an emphasis on those seen in POMs on Maven Central" + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License (AGPL) version 3.0"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0 only"))) + (is (= #{"Apache-1.0"} (name->ids "Apache Software License"))) + (is (= #{"Apache-1.0"} (name->ids "Apache License 1"))) + (is (= #{"Apache-1.0"} (name->ids "Apache License 1.0"))) + (is (= #{"Apache-1.0"} (name->ids "Apache License Version 1.0"))) + (is (= #{"Apache-1.0"} (name->ids "Apache License, Version 1.0"))) + (is (= #{"Apache-1.0"} (name->ids "Apache Software License - Version 1.0"))) + (is (= #{"Apache-1.1"} (name->ids "Apache License 1.1"))) + (is (= #{"Apache-1.1"} (name->ids "Apache License Version 1.1"))) + (is (= #{"Apache-1.1"} (name->ids "Apache License, Version 1.1"))) + (is (= #{"Apache-1.1"} (name->ids "Apache Software License - Version 1.1"))) + (is (= #{"Apache-1.1"} (name->ids "The MX4J License, version 1.0"))) + (is (= #{"Apache-2.0"} (name->ids " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (name->ids "Apache 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License - Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) + (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) + (is (= #{"MIT"} (name->ids "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License (BSD3)"))) + (is (= #{"BSD-3-Clause-Attribution"} (name->ids "BSD 3-Clause Attribution"))) + (is (= #{"CC-BY-3.0"} (name->ids "Attribution 3.0 Unported"))) + (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (= #{"CC-BY-4.0"} (name->ids "Attribution 4.0 International"))) + (is (= #{"CC-BY-SA-4.0"} (name->ids "Creative Commons Attribution Share Alike 4.0 International"))) + (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL)"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, Version 1.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License v2.0 w/Classpath exception"))) + (is (= #{"JSON"} (name->ids "JSON License"))) + (is (= #{"LGPL-2.0-only"} (name->ids "GNU Library General Public License"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License (LGPL)"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License"))) + (is (= #{"MIT"} (name->ids "MIT License"))) + (is (= #{"MIT"} (name->ids "MIT license"))) ; Test capitalisation + (is (= #{"MIT"} (name->ids "The MIT License"))) + (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) + (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (fuzzy-match-name->license-ids "BSD")))) + (is (nil? (name->ids "BSD")))) (testing "Names that appear in licensey things, but aren't in the SPDX license list" - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public Domain"))) - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (fuzzy-match-name->license-ids "Public domain"))))) + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public Domain"))) + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public domain")))) +) + (testing "Distinct license names that appear in POMs on Clojars" ; synced from Clojars 2023-07-13 +;####TODO: SORT ALL OF THESE!!!! + (is (= #{"AFL-3.0"} (name->ids "Academic Free License 3.0"))) +(comment ;####TODO: UNCOMMENT THIS!!!! + (is (= #{"AGPL-3.0-only"} (name->ids "AGPL v3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "AGPLv3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-only"} (name->ids "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU AGPLv3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License 3.0 (AGPL-3.0)"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License, Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License, version 3"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "AGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License v3 or later (at your option)"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License version 3 or lator"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License,"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU AGPL-V3 or later"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest +) + (is (= #{"Apache-2.0" "EPL-2.0"} (name->ids "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0" "LLVM-exception"} (name->ids "Apache 2.0 with LLVM Exception"))) + (is (= #{"Apache-2.0"} (name->ids " Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) + (is (= #{"Apache-2.0"} (name->ids "APACHE LICENSE, VERSION 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "APACHE"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->ids "ASL 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "ASL"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->ids "Apache 2 License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2 Public License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2, see LICENSE"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2.0 License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Licence 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Licence"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache Licence, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License - Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License - v 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License - v2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License V2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License V2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0, January 2004"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License v 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License v2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License v2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache License, 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0."))) + (is (= #{"Apache-2.0"} (name->ids "Apache License, version 2."))) + (is (= #{"Apache-2.0"} (name->ids "Apache License, version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Public License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Public License v2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Public License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache Public License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Public License, version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License - v 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Software Licesne"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache Sofware Licencse 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Sofware License 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache V2 License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache V2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache license version 2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache license, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache v2 License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) + (is (= #{"Apache-2.0"} (name->ids "Apache v2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->ids "Apache, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache-2.0 License"))) + (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) + (is (= #{"Apache-2.0"} (name->ids "Apache2 License"))) + (is (= #{"Apache-2.0"} (name->ids "The Apache 2 License"))) + (is (= #{"Apache-2.0"} (name->ids "The Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->ids "apache"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->ids "apache-2.0"))) + (is (= #{"Artistic-2.0" "GPL-3.0-only"} (name->ids "Artistic License/GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"Artistic-2.0"} (name->ids "Artistic License"))) ; Listed license missing version - we assume the latest + (is (= #{"Artistic-2.0"} (name->ids "Artistic-2.0"))) + (is (= #{"BSD-2-Clause"} (name->ids "2-Clause BSD License"))) + (is (= #{"BSD-2-Clause"} (name->ids "2-Clause BSD"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD (2 Clause)"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD (2-Clause)"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD (Type 2) Public License"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2 Clause"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2 clause license"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause Licence"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause License"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause \"Simplified\" License"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause license"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-clause \"Simplified\" License"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD C2"))) + (is (= #{"BSD-2-Clause"} (name->ids "BSD-2-Clause"))) + (is (= #{"BSD-2-Clause"} (name->ids "New BSD 2-clause license"))) + (is (= #{"BSD-2-Clause"} (name->ids "Simplified BSD License"))) + (is (= #{"BSD-2-Clause"} (name->ids "Simplified BSD license"))) + (is (= #{"BSD-2-Clause"} (name->ids "The BSD 2-Clause License"))) + (is (= #{"BSD-2-Clause"} (name->ids "Two clause BSD license"))) + (is (= #{"BSD-3-Clause" "MIT"} (name->ids "New-BSD / MIT"))) + (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD"))) + (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) + (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD license"))) + (is (= #{"BSD-3-Clause"} (name->ids "3-clause license (New BSD License or Modified BSD License)"))) + (is (= #{"BSD-3-Clause"} (name->ids "Aduna BSD license"))) ; Listed license missing clause info, but the license text shows BSD-3-Clause + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3 Clause"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause 'New' or 'Revised' License"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause \"New\" or \"Revised\" License"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause license"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause License"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause license"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD New, Version 3.0"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD-3"))) + (is (= #{"BSD-3-Clause"} (name->ids "BSD-3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->ids "Modified BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "New BSD License or Modified BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "New BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "New BSD license"))) + (is (= #{"BSD-3-Clause"} (name->ids "Revised BSD"))) + (is (= #{"BSD-3-Clause"} (name->ids "The 3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->ids "The New BSD License"))) + (is (= #{"BSD-3-Clause"} (name->ids "The New BSD license"))) + (is (= #{"BSD-3-Clause"} (name->ids "Three Clause BSD-like License"))) + (is (unlisted-only? (name->ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (= #{"BSD-3-Clause"} (name->ids "https://opensource.org/licenses/BSD-3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->ids "new BSD License"))) + (is (= #{"BSD-4-Clause"} (name->ids "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->ids "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->ids "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->ids "BSD"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->ids "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->ids "The BSD License"))) + (is (= #{"BSL-1.0"} (name->ids "Boost Software License - Version 1.0"))) + (is (= #{"Beerware"} (name->ids "Beerware 42"))) + (is (= #{"Beerware"} (name->ids "THE BEER-WARE LICENSE"))) + (is (= #{"CC-BY-2.5"} (name->ids "Creative Commons Attribution 2.5 License"))) + (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons 3.0"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0"))) + (is (= #{"CC-BY-4.0"} (name->ids "CC Attribution 4.0 International with exception for binary distribution"))) + (is (= #{"CC-BY-4.0"} (name->ids "CC-BY-4.0"))) + (is (= #{"CC-BY-4.0"} (name->ids "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest + (is (= #{"CC-BY-NC-3.0"} (name->ids "Creative Commons Attribution-NonCommercial 3.0"))) + (is (= #{"CC-BY-NC-4.0"} (name->ids "CC BY-NC"))) ; Listed license missing version - we assume the latest + (is (= #{"CC-BY-NC-ND-3.0"} (name->ids "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (= #{"CC-BY-SA-4.0"} (name->ids "CC BY-SA 4.0"))) + (is (= #{"CC0-1.0"} (name->ids "Public domain (CC0)"))) + (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) + (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal"))) + (is (= #{"CC0-1.0"} (name->ids "CC0"))) + (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info + (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License"))) ; Listed license missing clause info + (is (= #{"CECILL-2.1"} (name->ids "CeCILL License"))) ; Listed license missing version - we assume the latest + (is (= #{"CPL-1.0"} (name->ids "Common Public License - v 1.0"))) + (is (= #{"CPL-1.0"} (name->ids "Common Public License Version 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "EPL 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "EPL-1.0"))) + (is (= #{"EPL-1.0"} (name->ids "EPL-v1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL) - v 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - Version 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0 (EPL-1.0)"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License v 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License v1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License version 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, version 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "Eclipse Public Licese - v 1.0"))) + (is (= #{"EPL-1.0"} (name->ids "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) +; (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! +; (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest + (is (= #{"EPL-2.0" "GPL-2.0-or-later"} (name->ids "EPL-2.0 OR GPL-2.0-or-later"))) + (is (= #{"EPL-2.0" "GPL-3.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"EPL-2.0" "GPL-3.0-or-later"} (name->ids "EPL-2.0 OR GPL-3.0-or-later"))) +; (is (= #{"EPL-2.0" "LGPL-3.0-or-later"} (name->ids "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0" "MIT"} (name->ids "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (= #{"EPL-2.0"} (name->ids "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "EPL"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "EPL-2.0"))) + (is (= #{"EPL-2.0"} (name->ids "EPLv2"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License - v 2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0,"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License v2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License, v. 2.0"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License, v2"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Eclipse public license, the same as Clojure"))) + (is (= #{"EPL-2.0"} (name->ids "Eclipse"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->ids "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"EUPL-1.1"} (name->ids "European Union Public Licence (EUPL v.1.1)"))) + (is (= #{"EUPL-1.1"} (name->ids "The European Union Public License, Version 1.1"))) + (is (= #{"EUPL-1.2"} (name->ids "European Union Public Licence v. 1.2"))) + (is (= #{"EUPL-1.2"} (name->ids "European Union Public License 1.2 or later"))) + (is (= #{"EUPL-1.2"} (name->ids "European Union Public License"))) ; Listed license missing version - we assume the latest +(comment ;####TODO: UNCOMMENT THIS!!!! + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, Version 2, with the Classpath Exception"))) + (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPLv2 with Classpath exception"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License 2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License v2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, Version 2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, Version 2.0"))) + (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, v2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GPL v2"))) + (is (= #{"GPL-2.0-only"} (name->ids "GPL-2.0"))) + (is (= #{"GPL-2.0-only"} (name->ids "GPLv2"))) + (is (= #{"GPL-2.0-only"} (name->ids "The GNU General Public License, Version 2"))) + (is (= #{"GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"GPL-2.0-or-later"} (name->ids "GNU GPL V2+"))) + (is (= #{"GPL-2.0-or-later"} (name->ids "GPL 2.0+"))) + (is (= #{"GPL-2.0-or-later"} (name->ids "GPL v2+ or Swiss Ephemeris"))) ; ####TODO: THINK MORE ABOUT THIS + (is (= #{"GPL-3.0-only"} (name->ids " GNU GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v 3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v. 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL, version 3, 29 June 2007"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License V3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License Version 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License v3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License v3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, Version 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, version 3 (GPLv3)"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, version 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU Public License V. 3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU Public License V3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNU public licence V3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GNUv3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL 3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL V3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL v3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL version 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL-3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL-3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL-3.0-only"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPL3"))) + (is (= #{"GPL-3.0-only"} (name->ids "GPLv3"))) + (is (= #{"GPL-3.0-only"} (name->ids "General Public License 3"))) + (is (= #{"GPL-3.0-only"} (name->ids "General Public License v3.0"))) + (is (= #{"GPL-3.0-only"} (name->ids "The GNU General Public License v3.0"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPL v3+"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPLv3+"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License v3.0 or later"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License, Version 3 (or later)"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License,version 2.0 or (at your option) any later version"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GNU"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "GPL V3+"))) + (is (= #{"GPL-3.0-or-later"} (name->ids "GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->ids "The GNU General Public License"))) ; Listed license missing version - we assume the latest +) + (is (= #{"Hippocratic-2.1"} (name->ids "Hippocratic License"))) + (is (= #{"ISC" "Classpath-exception-2.0"} (name->ids "ISC WITH Classpath-exception-2.0"))) + (is (= #{"ISC"} (name->ids "ISC Licence"))) + (is (= #{"ISC"} (name->ids "ISC License"))) + (is (= #{"ISC"} (name->ids "ISC"))) + (is (= #{"ISC"} (name->ids "MIT/ISC License"))) + (is (= #{"ISC"} (name->ids "MIT/ISC"))) +(comment ;####TODO: UNCOMMENT THIS!!!! + (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU LGPL v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License, Version 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Pulic License v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Library or Lesser General Public License (LGPL) 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "GNU Library or Lesser General Public License (LGPL) V2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "LGPL 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "LGPL-2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "LGPL-2.1-only"))) + (is (= #{"LGPL-2.1-only"} (name->ids "LGPLv2.1"))) + (is (= #{"LGPL-2.1-only"} (name->ids "lgpl_v2_1"))) + (is (= #{"LGPL-2.1-or-later"} (name->ids "GNU Lesser General Public License, version 2.1 or newer"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU General Lesser Public License (LGPL) version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL v3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL version 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL-3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPLv3 "))) ; Note trailing space + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public Licence 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License (LGPL) Version 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License v3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License version 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License, Version 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "L GPL 3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL 3.0 (GNU Lesser General Public License)"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL Open Source license"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL v3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL-3.0"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPL-3.0-only"))) + (is (= #{"LGPL-3.0-only"} (name->ids "LGPLv3"))) + (is (= #{"LGPL-3.0-only"} (name->ids "Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->ids "Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, Version 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, v. 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, version 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPL-3.0-or-later"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPLv3+"))) + (is (= #{"LGPL-3.0-or-later"} (name->ids "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space +) + (is (= #{"Libpng"} (name->ids "zlib/libpng License"))) + (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public Domain"))) + (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->ids "MIT/Apache-2.0/BSD-3-Clause"))) + (is (= #{"MIT"} (name->ids " MIT License"))) + (is (= #{"MIT"} (name->ids "Distributed under an MIT-style license (see LICENSE for details)."))) + (is (= #{"MIT"} (name->ids "Dual MIT & Proprietary"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (= #{"MIT"} (name->ids "Expat (MIT) license"))) + (is (= #{"MIT"} (name->ids "MIT LICENSE"))) + (is (= #{"MIT"} (name->ids "MIT Licence"))) + (is (= #{"MIT"} (name->ids "MIT Licens"))) + (is (= #{"MIT"} (name->ids "MIT License (MIT)"))) + (is (= #{"MIT"} (name->ids "MIT License"))) + (is (= #{"MIT"} (name->ids "MIT Public License"))) + (is (= #{"MIT"} (name->ids "MIT license"))) + (is (= #{"MIT"} (name->ids "MIT public License"))) + (is (= #{"MIT"} (name->ids "MIT public license"))) + (is (= #{"MIT"} (name->ids "MIT"))) + (is (= #{"MIT"} (name->ids "MIT-style license (see LICENSE for details)."))) + (is (= #{"MIT"} (name->ids "THE MIT LICENSE"))) + (is (= #{"MIT"} (name->ids "The MIT Licence"))) + (is (= #{"MIT"} (name->ids "The MIT License (MIT) "))) ; Note trailing space + (is (= #{"MIT"} (name->ids "The MIT License (MIT) | Open Source Initiative"))) + (is (= #{"MIT"} (name->ids "The MIT License (MIT)"))) + (is (= #{"MIT"} (name->ids "The MIT License"))) + (is (= #{"MIT"} (name->ids "The MIT License."))) +;####TODO: UNCOMMENT ONCE URL DETECTION AND RESOLUTION IS IMPLEMENTED!!!! +; (is (= #{"MIT"} (name->ids "http://opensource.org/licenses/MIT"))) +; (is (= #{"MIT"} (name->ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) + (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License Version 1.0"))) + (is (= #{"MPL-1.1"} (name->ids "Mozilla Public License Version 1.1"))) + (is (= #{"MPL-2.0"} (name->ids "MPL 2"))) + (is (= #{"MPL-2.0"} (name->ids "MPL 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "MPL v2"))) + (is (= #{"MPL-2.0"} (name->ids "MPL"))) ; Listed license missing version - we assume the latest + (is (= #{"MPL-2.0"} (name->ids "MPL-2.0"))) + (is (= #{"MPL-2.0"} (name->ids "MPL-v2.0"))) + (is (= #{"MPL-2.0"} (name->ids "MPL2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public Licence 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License (Version 2.0)"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License v2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License v2.0+"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License version 2"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License version 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License, v. 2.0"))) + (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License, version 2.0"))) + (is (= #{"NASA-1.3"} (name->ids "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) + (is (= #{"NASA-1.3"} (name->ids "NASA Open Source Agreement, Version 1.3"))) + (is (= #{"NCSA"} (name->ids "University of Illinois/NCSA Open Source License"))) + (is (= #{"Ruby"} (name->ids "Ruby License"))) + (is (= #{"SGI-B-2.0"} (name->ids "SGI"))) ; Listed license missing version - we assume the latest + (is (= #{"SMPPL"} (name->ids "SMPPL"))) + (is (= #{"Unlicense"} (name->ids "The UnLicense"))) + (is (= #{"Unlicense"} (name->ids "The Unlicence"))) + (is (= #{"Unlicense"} (name->ids "The Unlicense"))) + (is (= #{"Unlicense"} (name->ids "UnLicense"))) + (is (= #{"Unlicense"} (name->ids "Unlicense License"))) + (is (= #{"Unlicense"} (name->ids "Unlicense"))) + (is (= #{"Unlicense"} (name->ids "unlicense"))) + (is (= #{"W3C"} (name->ids "W3C Software license"))) + (is (= #{"WTFPL"} (name->ids "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) + (is (= #{"WTFPL"} (name->ids "DO-WTF-U-WANT-2"))) + (is (= #{"WTFPL"} (name->ids "Do What The Fuck You Want To Public License"))) + (is (= #{"WTFPL"} (name->ids "Do What The Fuck You Want To Public License, Version 2"))) + (is (= #{"WTFPL"} (name->ids "WTFPL v2"))) + (is (= #{"WTFPL"} (name->ids "WTFPL – Do What the Fuck You Want to Public License"))) + (is (= #{"WTFPL"} (name->ids "WTFPL"))) + (is (= #{"X11"} (name->ids "MIT X11 License"))) + (is (= #{"X11"} (name->ids "MIT/X11"))) + (is (= #{"Zlib"} (name->ids "Zlib License"))) + (is (= #{"Zlib"} (name->ids "zlib License"))) + (is (= #{"Zlib"} (name->ids "zlib license"))) + (is (unlisted-only? (name->ids "${license.id}"))) +;####TODO: UNCOMMENT ME!!!! +; (is (unlisted-only? (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (unlisted-only? (name->ids "A Clojure library for Google Cloud Pub/Sub."))) + (is (unlisted-only? (name->ids "APGL"))) ; Probable typo + (is (unlisted-only? (name->ids "All Rights Reserved"))) + (is (unlisted-only? (name->ids "All rights reserved"))) + (is (unlisted-only? (name->ids "Amazon Software License"))) + (is (unlisted-only? (name->ids "BankersBox License"))) + (is (unlisted-only? (name->ids "Bespoke"))) + (is (unlisted-only? (name->ids "Bloomberg Open API"))) + (is (unlisted-only? (name->ids "Bostock"))) + (is (unlisted-only? (name->ids "Built In Project License"))) + (is (unlisted-only? (name->ids "CRAPL License"))) + (is (unlisted-only? (name->ids "Contact JMonkeyEngine forums for license details"))) + (is (unlisted-only? (name->ids "Copyright & all rights reserved Lean Pixel"))) + (is (unlisted-only? (name->ids "Copyright (C) 2015 by Glowbox LLC"))) + (is (unlisted-only? (name->ids "Copyright (c) 2011 Drew Colthorp"))) + (is (unlisted-only? (name->ids "Copyright (c) 2017, Lingchao Xin"))) + (is (unlisted-only? (name->ids "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (unlisted-only? (name->ids "Copyright 2016, klaraHealth, Inc."))) + (is (unlisted-only? (name->ids "Copyright 2017 All Rights Reserved"))) + (is (unlisted-only? (name->ids "Copyright 2017 Zensight"))) + (is (unlisted-only? (name->ids "Copyright 4A Volcano. 2015."))) + (is (unlisted-only? (name->ids "Copyright Ona Systems Inc."))) + (is (unlisted-only? (name->ids "Copyright meissa GmbH"))) + (is (unlisted-only? (name->ids "Copyright © SparX 2014"))) + (is (unlisted-only? (name->ids "Copyright"))) + (is (unlisted-only? (name->ids "Custom"))) + (is (unlisted-only? (name->ids "Cydeas Public License"))) + (is (unlisted-only? (name->ids "Don't steal my stuff"))) + (is (unlisted-only? (name->ids "Dropbox ToS"))) + (is (unlisted-only? (name->ids "FIXME: choose"))) + (is (unlisted-only? (name->ids "Firebase ToS"))) + (is (= #{"BSD-2-Clause-FreeBSD"} (name->ids "FreeBSD License"))) + (is (unlisted-only? (name->ids "GG Public License"))) + (is (unlisted-only? (name->ids "Google Maps ToS"))) + (is (unlisted-only? (name->ids "GraphiQL license"))) + (is (unlisted-only? (name->ids "Hackthorn Innovation Ltd"))) + (is (unlisted-only? (name->ids "Hackthorn Innovation copyright"))) + (is (unlisted-only? (name->ids "Heap ToS"))) + (is (unlisted-only? (name->ids "Interel"))) + (is (unlisted-only? (name->ids "JLGL Backend"))) + (is (unlisted-only? (name->ids "Jedis License"))) + (is (unlisted-only? (name->ids "Jiegao Owned"))) + (is (unlisted-only? (name->ids "LICENSE"))) + (is (unlisted-only? (name->ids "Libre Uso MX"))) + (is (unlisted-only? (name->ids "License of respective package"))) + (is (unlisted-only? (name->ids "License"))) + (is (unlisted-only? (name->ids "Like Clojure."))) + (is (unlisted-only? (name->ids "Mixed"))) + (is (unlisted-only? (name->ids "Multiple"))) + (is (unlisted-only? (name->ids "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (unlisted-only? (name->ids "OTN License Agreement"))) + (is (unlisted-only? (name->ids "Open Source Community License - Type C version 1.0"))) + (is (unlisted-only? (name->ids "Other License"))) + (is (unlisted-only? (name->ids "Private License"))) + (is (unlisted-only? (name->ids "Private"))) + (is (unlisted-only? (name->ids "Proprietary License"))) + (is (unlisted-only? (name->ids "Proprietary"))) + (is (unlisted-only? (name->ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (unlisted-only? (name->ids "Provisdom"))) + (is (unlisted-only? (name->ids "Research License 1.0"))) + (is (unlisted-only? (name->ids "Restricted Distribution."))) + (is (unlisted-only? (name->ids "SYNNEX China Owned"))) + (is (unlisted-only? (name->ids "See the LICENSE file"))) + (is (unlisted-only? (name->ids "Shen License"))) + (is (unlisted-only? (name->ids "Slick2D License"))) + (is (unlisted-only? (name->ids "Stripe ToS"))) + (is (unlisted-only? (name->ids "TODO"))) + (is (unlisted-only? (name->ids "TODO: Choose a license"))) + (is (unlisted-only? (name->ids "The I Haven't Got Around To This Yet License"))) + (is (unlisted-only? (name->ids "To ill!"))) + (is (unlisted-only? (name->ids "Tulos Commercial License"))) + (is (unlisted-only? (name->ids "UNLICENSED"))) + (is (unlisted-only? (name->ids "University of Buffalo Public License"))) + (is (unlisted-only? (name->ids "Unknown"))) + (is (unlisted-only? (name->ids "VNETLPL - Limited Public License"))) + (is (unlisted-only? (name->ids "VNet PL"))) + (is (unlisted-only? (name->ids "Various"))) + (is (unlisted-only? (name->ids "Vimeo License"))) + (is (unlisted-only? (name->ids "WIP"))) + (is (unlisted-only? (name->ids "Wildbit Proprietary License"))) + (is (unlisted-only? (name->ids "YouTube ToS"))) + (is (unlisted-only? (name->ids "avi license"))) + (is (unlisted-only? (name->ids "esl-sdk-external-signer-verification"))) + (is (unlisted-only? (name->ids "jank license"))) + (is (unlisted-only? (name->ids "name"))) + (is (unlisted-only? (name->ids "none"))) + (is (unlisted-only? (name->ids "proprietary"))) + (is (unlisted-only? (name->ids "state-node license"))) + (is (unlisted-only? (name->ids "trove"))) + (is (unlisted-only? (name->ids "url"))) + (is (unlisted-only? (name->ids "wisdragon"))) + (is (unlisted-only? (name->ids "wiseloong"))))) -(deftest uri->license-ids-tests +(deftest uri->ids-tests (testing "Nil, empty or blank uri" - (is (nil? (fuzzy-match-uri->license-ids nil))) - (is (nil? (fuzzy-match-uri->license-ids ""))) - (is (nil? (fuzzy-match-uri->license-ids " "))) - (is (nil? (fuzzy-match-uri->license-ids "\n"))) - (is (nil? (fuzzy-match-uri->license-ids "\t")))) - (testing "URIs that appear verbatim in the SPDX license list" - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace - (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (fuzzy-match-uri->license-ids "https://www.gnu.org/licenses/agpl.txt"))) - (is (= #{"CC-BY-SA-4.0"} (fuzzy-match-uri->license-ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (fuzzy-match-uri->license-ids "https://www.gnu.org/software/classpath/license.html")))) + (is (nil? (uri->ids nil))) + (is (nil? (uri->ids ""))) + (is (nil? (uri->ids " "))) + (is (nil? (uri->ids "\n"))) + (is (nil? (uri->ids "\t")))) + (testing "URIs that appear verbatim in the SPDX license or exception lists" + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URI variations that should be handled identically" - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) - (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= #{"Apache-2.0"} (fuzzy-match-uri->license-ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) + (testing "URIs that appear in licensey things, but aren't in the SPDX license list as shown" + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) + (testing "URIs that aren't in the SPDX license list, but do match via retrieval and full text matching" + (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) ; ####TODO: Not sure about this one diff --git a/test/lice_comb/utils_test.clj b/test/lice_comb/utils_test.clj new file mode 100644 index 0000000..67511a3 --- /dev/null +++ b/test/lice_comb/utils_test.clj @@ -0,0 +1,60 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.utils-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.utils :refer [simplify-uri]])) + +(use-fixtures :once fixture) + +(def simplified-apache2-uri "http://apache.org/licenses/license-2.0") + +(deftest simplify-uri-tests + (testing "Nil, empty or blank values" + (is (nil? (simplify-uri nil))) + (is (nil? (simplify-uri ""))) + (is (nil? (simplify-uri " "))) + (is (nil? (simplify-uri "\n"))) + (is (nil? (simplify-uri "\t")))) + (testing "Values that are not uris" + (is (= "foo" (simplify-uri "FOO"))) + (is (= "foo" (simplify-uri "foo"))) + (is (= "foobar" (simplify-uri " FoObAr ")))) + (testing "Values that are non-http(s) uris" + (is (= "ftp://user@host/foo/bar.txt" (simplify-uri "ftp://user@host/foo/bar.txt"))) + (is (= "ftp://user@host/foo/bar.txt" (simplify-uri "FTP://USER@HOST/FOO/BAR.TXT"))) + (is (= "mailto:someone@example.com?subject=this%20is%20the%20subject&cc=someone_else@example.com&body=this%20is%20the%20body" + (simplify-uri "mailto:someone@example.com?subject=This%20is%20the%20subject&cc=someone_else@example.com&body=This%20is%20the%20body")))) + (testing "Valid uris that don't get simplified" + (is (= simplified-apache2-uri (simplify-uri simplified-apache2-uri))) + (is (= "http://creativecommons.org/licenses/by-sa/4.0/legalcode" (simplify-uri "http://creativecommons.org/licenses/by-sa/4.0/legalcode")))) + (testing "Valid uris that get simplified" + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0"))) + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/license-2.0.txt"))) + (is (= simplified-apache2-uri (simplify-uri "http://apache.org/licenses/LICENSE-2.0.pdf"))) + (is (= simplified-apache2-uri (simplify-uri " http://www.apache.org/licenses/LICENSE-2.0.html "))) + (is (= "http://gnu.org/licenses/agpl" (simplify-uri "https://www.gnu.org/licenses/agpl.txt"))) + (is (= "http://gnu.org/software/classpath/license" (simplify-uri "https://www.gnu.org/software/classpath/license.html"))) + (is (= "http://raw.githubusercontent.com/pmonks/lice-comb/main/license" (simplify-uri "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= "http://github.com/pmonks/lice-comb/blob/main/license" (simplify-uri "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) From 9868d9f214455a9d893f1da029d5ec39f3b30983 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Sat, 12 Aug 2023 18:02:24 -0700 Subject: [PATCH 11/34] :arrow_up: Add new dependencies and upgrade others --- deps.edn | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deps.edn b/deps.edn index c79c32b..a4007e6 100644 --- a/deps.edn +++ b/deps.edn @@ -24,9 +24,9 @@ clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} hato/hato {:mvn/version "0.9.0"} - miikka/clj-base62 {:mvn/version "0.1.0"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.86"} - com.github.pmonks/rencg {:mvn/version "1.0.32"}} + miikka/clj-base62 {:mvn/version "0.1.1"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.88"} + com.github.pmonks/rencg {:mvn/version "1.0.34"}} :aliases {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} From 90656db1320a9662162a3cd821ba526797dc5cbe Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Sat, 12 Aug 2023 18:02:46 -0700 Subject: [PATCH 12/34] :construction: Ongoing work on issue #3 --- src/lice_comb/impl/utils.clj | 2 +- src/lice_comb/matching.clj | 312 +++++++++---- test/lice_comb/matching_test.clj | 743 +++++++++++++++++++++++++++++-- 3 files changed, 922 insertions(+), 135 deletions(-) diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index d8701b7..79feb7c 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -96,7 +96,7 @@ (defn valid-http-uri? "Returns true if given string is a valid HTTP or HTTPS URI." [^String s] - ; Note: no nil check needed since the isValid method handles nil sanely + ; Note: no nil check needed since the isValid method handles null sanely (.isValid (org.apache.commons.validator.routines.UrlValidator. ^"[Ljava.lang.String;" (into-array String ["http" "https"])) s)) (defn simplify-uri diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 7b5fc5c..3196bff 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -35,8 +35,13 @@ (def ^:private exception-list-d (delay (map se/id->info (se/ids)))) ; The unlisted license refs lice-comb uses (note: the unlisted one usually has a base62 suffix appended) -(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") -(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") +(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") +(def ^:private proprietary-commercial-license-ref "LicenseRef-lice-comb-PROPRIETARY-OR-COMMERCIAL") +(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") + +; Lower case id map +(def ^:private spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) (sl/ids))) + (into {} (map #(vec [(s/lower-case %) %]) (se/ids)))))) (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" @@ -49,6 +54,17 @@ public-domain (constantly public-domain-license-ref)) +(defn proprietary-or-commercial? + "Is the given id lice-comb's custom 'proprietary or commercial' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing a proprietary or commercial license." + :arglists '([])} + proprietary-or-commercial + (constantly proprietary-commercial-license-ref)) + (defn unlisted? "Is the given id a lice-comb custom 'unlisted' LicenseRef?" [id] @@ -80,11 +96,42 @@ verbatim if unable to determine a name. Returns nil if the id is blank." [id] (when-not (s/blank? id) - (cond (sl/listed-id? id) (:name (sl/id->info id)) - (se/listed-id? id) (:name (se/id->info id)) - (public-domain? id) "Public domain" - (unlisted? id) (unlisted->name id) - :else id))) + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (public-domain? id) "Public domain" + (proprietary-or-commercial? id) "Proprietary or commercial" + (unlisted? id) (unlisted->name id) + :else id))) + +(defn- fix-public-domain-cc0 + [ids] + (if (and (contains? ids public-domain-license-ref) + (contains? ids "CC0-1.0")) + (disj ids public-domain-license-ref) + ids)) + +(defn- fix-ids-that-end-with-plus + [ids] + (some-> (seq (map #(s/replace % #"\+\z" "-or-later") ids)) ; Note: assumes that all SPDX license identifiers that end in '+' also have a variant that ends in '-or-later' (which is known to be true up to 2023-07-01, and I expect to remain true going forward thanks to SPDX expressions) + set)) + +(defn- fix-classpath-exception + [ids] + (if (contains? ids "GPL-2.0-with-classpath-exception") + (conj (disj ids "GPL-2.0-with-classpath-exception") "GPL-2.0-only" "Classpath-exception-2.0") + ids)) + +(defn- manual-fixes + "Manually fix certain combinations of license identifiers." + [ids] + (when ids + (-> ids + fix-public-domain-cc0 + fix-ids-that-end-with-plus + fix-classpath-exception))) + +; Only match against SPDX license identifiers that do _not_ end with "+" - these are all duplicate/old/deprecated ids that pre-date license expressions (where "+" gained independent semantics) +(def ^:private license-ids-for-matching-d (delay (filter #(not (s/ends-with? % "+")) (sl/ids)))) (defmulti text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) @@ -104,9 +151,9 @@ (defmethod text->ids java.lang.String [s] ; These clj-spdx APIs are *expensive*, so we paralellise them - (let [f-lic (future (sm/licenses-within-text s)) + (let [f-lic (future (sm/licenses-within-text s @license-ids-for-matching-d)) f-exc (future (sm/exceptions-within-text s))] - (set/union @f-lic @f-exc))) + (manual-fixes (set/union @f-lic @f-exc)))) (defmethod text->ids java.io.Reader [r] @@ -139,17 +186,16 @@ :cookie-policy :none}))) (defn- github-raw-uri - "Converts a GitHub 'UI' URI into a 'raw' (CDN) GitHub URI. - + "Converts a GitHub UI URI into a GitHub CDN URI. e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE - If the given URI is not a GitHub 'UI' URI, returns the URI unchanged." + If the given URI is not a GitHub UI URI, returns the input unchanged." [uri] - (if-let [uri-obj (try (io/as-url uri) (catch Exception _ nil))] + (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] (if (= "github.com" (s/lower-case (.getHost uri-obj))) (-> uri - (s/replace "github.com" "raw.githubusercontent.com") - (s/replace "/blob/" "/")) + (s/replace #"(?i)github\.com" "raw.githubusercontent.com") + (s/replace "/blob/" "/")) uri) uri)) @@ -162,7 +208,8 @@ (try (when-let [response (hc/get (github-raw-uri uri) {:http-client @http-client-d - :accept "text/plain;q=1,*/*;q=0"})] ; Kindly request server to only return text/plain... ...even though this gets ignored a lot of the time 🙄 + :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 + :header {"user agent" "com.github.pmonks/lice-comb"}})] (when (= :text/plain (:content-type response)) (:body response))) (catch Exception _ @@ -185,13 +232,15 @@ 2. URIs in the SPDX license and exception lists are not unique - the same URI may represent multiple licenses and/or exceptions." [uri] - (when-let [suri (lcu/simplify-uri uri)] - ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) - (if-let [result (get @index-uri-to-id-d suri)] - result - ; Second, attempt to retrieve it as text/plain and perform full license matching on it - (when-let [license-text (attempt-text-http-get uri)] - (text->ids license-text))))) + (when-not (s/blank? uri) + (manual-fixes + (let [suri (lcu/simplify-uri uri)] + ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) + (if-let [result (get @index-uri-to-id-d suri)] + result + ; Second, attempt to retrieve the text/plain contents of the uri and perform full license matching on it + (when-let [license-text (attempt-text-http-get uri)] + (text->ids license-text))))))) (defn- name-to-id-tuple [list-entry] @@ -218,7 +267,7 @@ (sexp/extract-ids expression))) (defn- get-rencgs - "Get a value for an re-ncg, potentially looking at multiple ncgs in order until a non-blank value is found. Also trims and lower-cases the value." + "Get a value for an re-ncg, potentially looking at multiple ncgs in order until a non-blank value is found. Also trims and lower-cases the value, and replaces all whitespace with a single space." ([m names] (get-rencgs m names nil)) ([m names default] (loop [f (first names) @@ -227,10 +276,12 @@ (let [value (get m f)] (if (s/blank? value) (recur (first r) (rest r)) - (s/lower-case (s/trim value)))) + (-> value + (s/trim) + (s/lower-case) + (s/replace #"\s+" " ")))) default)))) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- generic-id-constructor [m] (when m @@ -240,9 +291,7 @@ ver (when (and (:pad-ver? m) (not (s/includes? ver "."))) - (let [pad (last (s/split (:latest-ver m) #"\."))] - (when-not (s/blank? pad) - (str "." pad))))))))) + ".0")))))) (defn- number-name-to-number "Converts the name of a number to that number (as a string). e.g. \"two\" -> \"2\". Returns s unchanged if it's not a number name." @@ -261,7 +310,6 @@ (when s (every? #(Character/isDigit ^Character %) s)))) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- bsd-id-constructor [m] (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) @@ -276,16 +324,34 @@ clause-count (case preferred-clause-count ("2" "simplified") "2" ("3" "new" "revised" "modified" "aduna") "3" - "4")] ; Note: we default to 4 clause, since it was the original form of the BSD license - (str (:id m) "-" clause-count "-Clause"))) + "4") ; Note: we default to 4 clause, since it was the original form of the BSD license + suffix (case (get-rencgs m ["suffix"]) + "patent" "Patent" + "views" "Views" + "attribution" "Attribution" + "clear" "Clear" + "lbnl" "LBNL" + "modification" "Modification" + ("no military license" "no military licence") "No-Military-License" + ("no nuclear license" "no nuclear licence") "No-Nuclear-License" + ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" + "no nuclear warranty" "No-Nuclear-Warranty" + "open mpi" "Open-MPI" + "shortened" "Shortened" + "uc" "UC" + nil) + base-id (str (:id m) "-" clause-count "-Clause") + id-with-suffix (str base-id "-" suffix)] + (if (contains? (sl/ids) id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it + id-with-suffix + base-id))) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- cc-id-constructor [m] (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) sa? (not (s/blank? (get-rencgs m ["sharealike"]))) - version (get-rencgs m ["version"] (:latest-ver m)) + version (get-rencgs m ["version1" "version2"] (:latest-ver m)) base-id (str "CC-BY-" (when nc? "NC-") (when nd? "ND-") @@ -309,7 +375,6 @@ base-id (throw (ex-info "Invalid Creative Commons license information found" (dissoc m :id :regex :fn :pad-ver? :latest-ver))))))) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- gpl-id-constructor [m] (let [id (case (get-rencgs m ["edition1" "edition2"]) @@ -324,7 +389,7 @@ ("later" "newer" "+") "or-later" ("only") "only" "only")] ; Note: we (conservatively) default to "only" when we don't have an explicit suffix - (str id "-" version (when-not (= id "AGPL") (str "-" suffix))))) + (str id "-" version "-" suffix))) (defn- simple-regex-match "Constructs a 'simple' name match structure" @@ -345,7 +410,7 @@ :pad-ver? true :latest-ver "3.0"} {:id "Apache" - :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" :fn generic-id-constructor :pad-ver? true :latest-ver "2.0"} @@ -363,7 +428,7 @@ :pad-ver? true :latest-ver "1.0"} {:id "BSD" - :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" :fn bsd-id-constructor} {:id "CC0" :regex #"(?i)\bCC\s*0" @@ -389,7 +454,7 @@ :pad-ver? true :latest-ver "1.0"} {:id "Creative commons family" - :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons\s+(Attribution)?|Attribution))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" + :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons(\s+Legal\s+Code)?(\s+Attribution)?|Attribution\s+(?\d(.\d)?)))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" :fn cc-id-constructor :pad-ver? true :latest-ver "4.0"} @@ -407,7 +472,7 @@ :regex #"(?i)\bFreeBSD\b" :fn (constantly "BSD-2-Clause-FreeBSD")} {:id "GNU license family" - :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(or(\s+\(?at\s+your\s+option\)?)?)?(\s+any)?(\s*(?later|newer|only|\+))?\b" + :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU(?!\s*Classpath)|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(or(\s+\(?at\s+your\s+(option|discretion)\)?)?)?(\s+any)?(\s*(?later|newer|only|\+))?\b" :fn gpl-id-constructor :pad-ver? true :latest-ver 3.0} @@ -418,7 +483,7 @@ :regex #"(?i)\bLLVM[\s-]+Exception\b" :fn (constantly "LLVM-exception")} {:id "MIT" - :regex #"(?i)\bMIT(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" + :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" :fn (constantly "MIT")} {:id "MPL" :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" @@ -430,9 +495,15 @@ :fn generic-id-constructor :pad-ver? true :latest-ver "1.3"} + {:id "Plexus" + :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" + :fn (constantly "Plexus")} + {:id "Proprietary or commercial" + :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" + :fn proprietary-or-commercial} {:id "Public Domain" :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" - :fn (constantly public-domain-license-ref)} + :fn public-domain} {:id "Ruby" :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" :fn (constantly "Ruby")} @@ -452,42 +523,115 @@ :fn (constantly "Zlib")} ])) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- match-regex - "Returns the SPDX license-id for the given elem from license-name-matching, if a match occurred, or nil if there was no match." - [name elem] - (when-let [matches (rencg/re-find-ncg (:regex elem) name)] - ((:fn elem) (merge {:name name} elem matches)))) + "Returns a map containing the SPDX :id and :start index of the given + regex in the string if a match occurred, or nil if there was no match." + [s elem] + (when-let [match (rencg/re-find-ncg (:regex elem) s)] + {:id ((:fn elem) (merge {:name s} elem match)) + :start (:start match)})) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- match-regexes - "Returns all of the matched SPDX license-id for the given name, or nil if there were no matches." - [name] - (some-> (seq (filter identity (pmap (partial match-regex name) license-name-matching))) - set)) - -(defn- fix-public-domain-cc0 - [ids] - (if (and (contains? ids public-domain-license-ref) - (contains? ids "CC0-1.0")) - (disj ids public-domain-license-ref) - ids)) - -(defn- fix-classpath-exception - [ids] - (if (contains? ids "GPL-2.0-with-classpath-exception") - (conj (disj ids "GPL-2.0-with-classpath-exception") "GPL-2.0-only" "Classpath-exception-2.0") - ids)) + "Returns a sequence (NOT A SET!) of the matched SPDX license or + exception ids for the given string, or nil if there were no matches. + Results are in the order in which they appear in the string." + [s] + (some->> (seq (filter identity (pmap (partial match-regex s) license-name-matching))) + (sort-by :start) + (map :id))) + +(defn- split-on-operators + "Case insensitively splits a string based on license operators (and, + or, with), but only if they're not also part of a license name (e.g. + 'Common Development and Distribution License', 'GNU General Public + License version 2.0 or (at your option) any later version', etc.)." + [s] + (when-not (s/blank? s) + (map #(if (keyword? %) % (s/trim %)) + (mapcat #(if (keyword? %) [%] (interpose :with (s/split % #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)"))) + (mapcat #(if (keyword? %) [%] (interpose :or (s/split % #"(?i)\bor(?!\s+(later|lator|newer|lesser|library))\b"))) + (interpose :and (s/split s #"(?i)\b(and|\&)(?!(\s+distribution))\b"))))))) -(defn- manual-fixes - "Manually fix certain combinations of license identifiers." - [ids] - (when ids - (-> ids - fix-public-domain-cc0 - fix-classpath-exception))) +;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! +(defn- string->ids + "Converts the given String into a sequence (NOT A SET!) of SPDX + identifier(s), each of which is a listed SPDX license or exception id + if the value is recognised, or a lice-comb specific 'unlisted' + LicenseRef if not. This involves: + 1. Seeing if it's a listed license or exception id + 2. Looking up the value in the names in the SPDX license and exception + lists + 3. If the value is a URI, performing URI matching with it + 4. Using regexes to attempt to identify the license(s) and/or + exception(s) + 5. Returning a lice-comb specific 'unlisted' LicenseRef" + [s] + (when-not (s/blank? s) + ; 1. Is it an SPDX license or exception id? + (let [s (s/trim s)] + (if-let [spdx-id (get @spdx-ids-d (s/lower-case s))] + [spdx-id] + ; 2. Is it an SPDX license or exception name? + (if-let [name-match (listed-name->ids s)] + [name-match] + ; 3. If it's a URI, perform URI matching on it (this is to handle some dumb corner cases that do exist in the real world) + (if-let [uri-matches (uri->ids s)] + (vec uri-matches) + ; 4. Attempt regex name matching + (if-let [re-name-matches (match-regexes s)] + re-name-matches + ; 5. Give up and return a lice-comb "unlisted" LicenseRef + [(name->unlisted s)]))))))) + +(defn- process-expression-element + "Processes a single new element being added to l, and will combine it + with earlier elements in l where appropriate." + [l e] + (if (keyword? e) + (conj l e) + (case (count (take-while keyword? l)) + 0 (if (= (peek l) e) l (conj l e)) + 1 (let [kw (s/upper-case (name (first l))) + prior (second l) + earlier (rest (rest l))] + (if (nil? prior) + (conj earlier e) + (conj earlier (s/join " " [prior kw e])))) + (let [earlier (drop-while keyword? l)] + (conj earlier e))))) + +(defn- build-spdx-expressions + "Builds a set of SPDX expression(s) from the given list containing strings and keywords." + [l] + (let [l (drop-while keyword? l)] + (loop [result '() + f (first l) + r (rest l)] + (if f + (recur (process-expression-element result f) (first r) (rest r)) + (some-> (seq (reverse (drop-while keyword? result))) + set))))) ;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AND SOURCE!!!! +(defn name->expressions + "Attempts to determine the SPDX license expression(s) (a set of Strings) + from the given 'license name' (a String), or nil if there aren't any. + This involves: + 1. Determining whether the name is a valid SPDX license expression, and if so + normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it + 2. constructing one or more SPDX license expressions by " + [name] + (when-not (s/blank? name) + (let [name (s/trim name)] + ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set + (if-let [normalised-expression (sexp/normalise name)] + #{normalised-expression} + ; 2. Attempt to build SPDX expression(s) from the name + (some->> (split-on-operators name) + (mapcat #(if (keyword? %) [%] (string->ids %))) + (map #(if (and (coll? %) (= 1 (count %))) (first %) %)) + build-spdx-expressions))))) + (defn name->ids "Attempts to determine the SPDX license identifier(s) (a set) from the given name (a string), or nil if there aren't any. This involves: @@ -506,16 +650,7 @@ ; 1. Parse the name as an SPDX exception, and if that succeeds, return all ids in the expression (if-let [ids-in-expression (parse-expression-and-extract-ids name)] ids-in-expression - ; 2. Then we look up by name - (if-let [listed-name-matches (listed-name->ids name)] - listed-name-matches - ; 3. Then we fallback on regex name matching - (if-let [re-name-matches (match-regexes name)] - re-name-matches - ; 4. Then we see if it's actually a URI, and URI match if so - this is to handle some dumb corner cases that exist in the real world - (if-let [uri-matches (uri->ids name)] - uri-matches - #{(name->unlisted name)})))))))) + (string->ids name)))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent @@ -525,10 +660,15 @@ Note: this method has a substantial performance cost." [] - (sl/init!) - (se/init!) - @license-list-d - @exception-list-d + ; Parallelise initialisation of the license and exception lists, as they're both sloooooooow + (future + (sl/init!) + @license-list-d) + (future + (se/init!) + @exception-list-d) + @spdx-ids-d + @license-ids-for-matching-d @index-uri-to-id-d @index-name-to-id-d @http-client-d diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 8657b66..d7c91e0 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -19,7 +19,7 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.matching :refer [unlisted? name->unlisted text->ids name->ids uri->ids]] + [lice-comb.matching :refer [unlisted? proprietary-or-commercial? name->unlisted public-domain proprietary-or-commercial text->ids name->expressions name->ids uri->ids]] [spdx.licenses :as sl] [spdx.exceptions :as se])) @@ -44,6 +44,660 @@ (is (true? (every? false? (map unlisted? (sl/ids))))) (is (true? (every? false? (map unlisted? (se/ids))))))) +(deftest name->expressions-tests + (testing "Nil, empty or blank" + (is (nil? (name->expressions nil))) + (is (nil? (name->expressions ""))) + (is (nil? (name->expressions " "))) + (is (nil? (name->expressions "\n"))) + (is (nil? (name->expressions "\t")))) + (testing "SPDX license ids" + (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0-only"))) + (is (= #{"Apache-2.0"} (name->expressions " Apache-2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (= #{"CC-BY-SA-4.0"} (name->expressions "CC-BY-SA-4.0"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (= #{"GPL-2.0-with-classpath-exception"} (name->expressions "GPL-2.0-with-classpath-exception")))) + (testing "Public domain and proprietary/commercial" + (is (= #{(public-domain)} (name->expressions "Public Domain"))) + (is (= #{(public-domain)} (name->expressions "Public domain"))) ; Test lower case + (is (= #{(public-domain)} (name->expressions " Public domain "))) ; Test whitespace + (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Commercial"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "All rights reserved")))) + (testing "Expressions that are valid SPDX" + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) + (is (= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) + (testing "Single expressions that are not valid SPDX" + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache License version 2.0 or GNU General Public License version 3"))) + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)"))) + (is (= #{"Apache-2.0 AND MIT"} (name->expressions "Apache & MIT licence"))) + (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution Licence")))) + (testing "Expressions with weird operators" + (is (= #{"Apache-2.0"} (name->expressions "and and and Apache License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0 or or or"))) + (is (= #{"Apache-2.0 or MIT"} (name->expressions "Apache License 2.0 or or or or or or or or MIT license"))) + (is (= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 and/or MIT licence")))) + (testing "Multiple expressions that are not valid SPDX" + (is (= #{"MIT" "BSD-4-Clause"} (name->expressions "MIT / BSD"))) + (is (= #{"Apache-2.0" "GPL-3.0-only"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3"))) + (is (= #{"Apache-2.0" "GPL-3.0-only WITH Classpath-exception-2.0"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3 with classpath exception"))) + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR BSD-3-Clause AND Apache-2.0"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception or MIT Licence or three clause bsd and Apache Licence")))) + (testing "Names seen in select POMs on Maven Central" + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL) version 3.0"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0 only"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (= #{"Apache-1.0"} (name->expressions "Apache License 1"))) + (is (= #{"Apache-1.0"} (name->expressions "Apache License 1.0"))) + (is (= #{"Apache-1.0"} (name->expressions "Apache License Version 1.0"))) + (is (= #{"Apache-1.0"} (name->expressions "Apache License, Version 1.0"))) + (is (= #{"Apache-1.0"} (name->expressions "Apache Software License - Version 1.0"))) + (is (= #{"Apache-1.1"} (name->expressions "Apache License 1.1"))) + (is (= #{"Apache-1.1"} (name->expressions "Apache License Version 1.1"))) + (is (= #{"Apache-1.1"} (name->expressions "Apache License, Version 1.1"))) + (is (= #{"Apache-1.1"} (name->expressions "Apache Software License - Version 1.1"))) + (is (= #{"Apache-1.1"} (name->expressions "The MX4J License, version 1.0"))) + (is (= #{"Apache-2.0"} (name->expressions " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License - Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License v2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License v2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License (BSD3)"))) + (is (= #{"BSD-3-Clause-Attribution"} (name->expressions "BSD 3-Clause Attribution"))) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD"))) + (is (= #{"CC-BY-3.0"} (name->expressions "Attribution 3.0 Unported"))) + (is (= #{"CC-BY-3.0"} (name->expressions "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (= #{"CC-BY-4.0"} (name->expressions "Attribution 4.0 International"))) + (is (= #{"CC-BY-SA-4.0"} (name->expressions "Creative Commons Attribution Share Alike 4.0 International"))) + (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License, Version 1.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (= #{"JSON"} (name->expressions "JSON License"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Library General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"MIT"} (name->expressions "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (= #{"MIT"} (name->expressions "MIT License"))) + (is (= #{"MIT"} (name->expressions "MIT license"))) ; Test capitalisation + (is (= #{"MIT"} (name->expressions "The MIT License"))) + (is (= #{"MPL-1.0"} (name->expressions "Mozilla Public License 1"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"Plexus"} (name->expressions "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (testing "All names seen in POMs on Clojars as of 2023-07-13" +(comment + (is (= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL v3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "AGPLv3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AGPLv3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License 3.0 (AGPL-3.0)"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, Version 3"))) + (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, version 3"))) + (is (= #{"AGPL-3.0-or-later"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License v3 or later (at your option)"))) + (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License version 3 or lator"))) ; Typo in "lator" + (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License"))) + (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU AGPL-V3 or later"))) + (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0 WITH LLVM-exception"} (name->expressions "Apache 2.0 with LLVM Exception"))) + (is (= #{"Apache-2.0"} (name->expressions " Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) + (is (= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "APACHE"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->expressions "ASL 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "ASL"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->expressions "Apache 2 License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2 Public License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2, see LICENSE"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0 License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Licence"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache Licence, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License - Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License - v 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License - v2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License V2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License V2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0, January 2004"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License v 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License v2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License v2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache License, 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0."))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License, version 2."))) + (is (= #{"Apache-2.0"} (name->expressions "Apache License, version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Public License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Public License v2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Public License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache Public License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Public License, version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License - v 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Software Licesne"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache Sofware Licencse 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Sofware License 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache V2 License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache V2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache license version 2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache license, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache v2 License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache v2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache"))) ; Listed license missing clause info + (is (= #{"Apache-2.0"} (name->expressions "Apache, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0 License"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "Apache2 License"))) + (is (= #{"Apache-2.0"} (name->expressions "The Apache 2 License"))) + (is (= #{"Apache-2.0"} (name->expressions "The Apache License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (= #{"Apache-2.0"} (name->expressions "apache"))) ; Listed license missing version - we assume the latest + (is (= #{"Apache-2.0"} (name->expressions "apache-2.0"))) + (is (= #{"Artistic-2.0" "GPL-3.0-only"} (name->expressions "Artistic License/GPL"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (= #{"Artistic-2.0"} (name->expressions "Artistic License"))) ; Listed license missing version - we assume the latest + (is (= #{"Artistic-2.0"} (name->expressions "Artistic-2.0"))) + (is (= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD (2 Clause)"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD (2-Clause)"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD (Type 2) Public License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2 Clause"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2 clause license"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause Licence"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause \"Simplified\" License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause license"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-clause \"Simplified\" License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD C2"))) + (is (= #{"BSD-2-Clause"} (name->expressions "BSD-2-Clause"))) + (is (= #{"BSD-2-Clause"} (name->expressions "New BSD 2-clause license"))) + (is (= #{"BSD-2-Clause"} (name->expressions "Simplified BSD License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "Simplified BSD license"))) + (is (= #{"BSD-2-Clause"} (name->expressions "The BSD 2-Clause License"))) + (is (= #{"BSD-2-Clause"} (name->expressions "Two clause BSD license"))) + (is (= #{"BSD-2-Clause-FreeBSD"} (name->expressions "FreeBSD License"))) + (is (= #{"BSD-3-Clause" "MIT"} (name->expressions "New-BSD / MIT"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD"))) + (is (= #{"BSD-3-Clause"} (name->expressions "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) + (is (= #{"BSD-3-Clause"} (name->expressions "3-clause BSD license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "3-clause license (New BSD License or Modified BSD License)"))) + (is (= #{"BSD-3-Clause"} (name->expressions "Aduna BSD license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3 Clause"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause 'New' or 'Revised' License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause \"New\" or \"Revised\" License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD New, Version 3.0"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD-3"))) + (is (= #{"BSD-3-Clause"} (name->expressions "BSD-3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->expressions "Modified BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "New BSD License or Modified BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "New BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "New BSD license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "Revised BSD"))) + (is (= #{"BSD-3-Clause"} (name->expressions "The 3-Clause BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "The New BSD License"))) + (is (= #{"BSD-3-Clause"} (name->expressions "The New BSD license"))) + (is (= #{"BSD-3-Clause"} (name->expressions "Three Clause BSD-like License"))) +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (= #{"BSD-3-Clause"} (name->expressions "https://opensource.org/licenses/BSD-3-Clause"))) + (is (= #{"BSD-3-Clause"} (name->expressions "new BSD License"))) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->expressions "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) + (is (= #{"BSD-4-Clause"} (name->expressions "The BSD License"))) + (is (= #{"BSL-1.0"} (name->expressions "Boost Software License - Version 1.0"))) + (is (= #{"Beerware"} (name->expressions "Beerware 42"))) + (is (= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE"))) + (is (= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License"))) + (is (= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0"))) + (is (= #{"CC-BY-4.0"} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) + (is (= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0"))) + (is (= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest + (is (= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0"))) + (is (= #{"CC-BY-NC-4.0"} (name->expressions "CC BY-NC"))) ; Listed license missing version - we assume the latest + (is (= #{"CC-BY-NC-ND-3.0"} (name->expressions "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0"))) + (is (= #{"CC-BY-SA-4.0"} (name->expressions "CC BY-SA 4.0"))) + (is (= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) + (is (= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal"))) + (is (= #{"CC0-1.0"} (name->expressions "CC0"))) + (is (= #{"CC0-1.0"} (name->expressions "Public domain (CC0)"))) + (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info + (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License"))) ; Listed license missing clause info + (is (= #{"CECILL-2.1"} (name->expressions "CeCILL License"))) ; Listed license missing version - we assume the latest + (is (= #{"CPL-1.0"} (name->expressions "Common Public License - v 1.0"))) + (is (= #{"CPL-1.0"} (name->expressions "Common Public License Version 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "EPL 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "EPL-1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "EPL-v1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License (EPL) - v 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - Version 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0 (EPL-1.0)"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License v 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License v1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License version 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License, version 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public Licese - v 1.0"))) + (is (= #{"EPL-1.0"} (name->expressions "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (= #{"EPL-2.0 AND LGPL-3.0-or-later"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0 OR Apache-2.0"} (name->expressions "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (= #{"EPL-2.0 OR GPL-2.0-or-later"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later"))) + (is (= #{"EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"EPL-2.0 OR GPL-3.0-or-later"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later"))) + (is (= #{"EPL-2.0" "MIT"} (name->expressions "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest ; Missing conjunction, so return 2 (singleton) expressions + (is (= #{"EPL-2.0"} (name->expressions "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (= #{"EPL-2.0"} (name->expressions "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (= #{"EPL-2.0"} (name->expressions "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "EPL"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "EPL-2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "EPLv2"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License - v 2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0,"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License v2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v. 2.0"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v2"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Eclipse public license, the same as Clojure"))) + (is (= #{"EPL-2.0"} (name->expressions "Eclipse"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0"} (name->expressions "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"EUPL-1.1"} (name->expressions "European Union Public Licence (EUPL v.1.1)"))) + (is (= #{"EUPL-1.1"} (name->expressions "The European Union Public License, Version 1.1"))) + (is (= #{"EUPL-1.2"} (name->expressions "European Union Public Licence v. 1.2"))) + (is (= #{"EUPL-1.2"} (name->expressions "European Union Public License 1.2 or later"))) + (is (= #{"EUPL-1.2"} (name->expressions "European Union Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, Version 2, with the Classpath Exception"))) + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2 with Classpath exception"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License 2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License v2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2.0"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, v2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GPL v2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (= #{"GPL-2.0-only"} (name->expressions "GPLv2"))) + (is (= #{"GPL-2.0-only"} (name->expressions "The GNU General Public License, Version 2"))) + (is (= #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (= #{"GPL-2.0-or-later"} (name->expressions "GNU GPL V2+"))) + (is (= #{"GPL-2.0-or-later"} (name->expressions "GPL 2.0+"))) + (is (= #{"GPL-3.0-only"} (name->expressions " GNU GENERAL PUBLIC LICENSE Version 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v 3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v. 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL, version 3, 29 June 2007"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License V3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License Version 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, Version 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3 (GPLv3)"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU Public License V. 3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU Public License V3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNU public licence V3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GNUv3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL 3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL V3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL v3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL version 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3.0-only"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPL3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "GPLv3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "General Public License 3"))) + (is (= #{"GPL-3.0-only"} (name->expressions "General Public License v3.0"))) + (is (= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License v3.0"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL v3+"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPLv3+"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License v3.0 or later"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License, Version 3 (or later)"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "GPL V3+"))) + (is (= #{"GPL-3.0-or-later"} (name->expressions "GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"GPL-3.0-or-later"} (name->expressions "The GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"Hippocratic-2.1"} (name->expressions "Hippocratic License"))) + (is (= #{"ISC WITH Classpath-exception-2.0"} (name->expressions "ISC WITH Classpath-exception-2.0"))) + (is (= #{"ISC"} (name->expressions "ISC Licence"))) + (is (= #{"ISC"} (name->expressions "ISC License"))) + (is (= #{"ISC"} (name->expressions "ISC"))) + (is (= #{"ISC"} (name->expressions "MIT/ISC License"))) + (is (= #{"ISC"} (name->expressions "MIT/ISC"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LGPL v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License, Version 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Pulic License v2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) V2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL 2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1-only"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "LGPLv2.1"))) + (is (= #{"LGPL-2.1-only"} (name->expressions "lgpl_v2_1"))) + (is (= #{"LGPL-2.1-or-later"} (name->expressions "GNU Lesser General Public License, version 2.1 or newer"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU General Lesser Public License (LGPL) version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL v3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL version 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL-3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPLv3 "))) ; Note trailing space + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL) Version 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License v3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License, Version 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "L GPL 3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0 (GNU Lesser General Public License)"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL Open Source license"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL v3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0-only"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "LGPLv3"))) + (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, Version 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, v. 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3 or later"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "LGPL-3.0-or-later"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "LGPLv3+"))) + (is (= #{"LGPL-3.0-or-later"} (name->expressions "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space + (is (= #{"Libpng"} (name->expressions "zlib/libpng License"))) + (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->expressions "MIT/Apache-2.0/BSD-3-Clause"))) + (is (= #{"MIT"} (name->expressions " MIT License"))) + (is (= #{"MIT"} (name->expressions "Distributed under an MIT-style license (see LICENSE for details)."))) + (is (= #{"MIT"} (name->expressions "Expat (MIT) license"))) + (is (= #{"MIT"} (name->expressions "MIT LICENSE"))) + (is (= #{"MIT"} (name->expressions "MIT Licence"))) + (is (= #{"MIT"} (name->expressions "MIT Licens"))) + (is (= #{"MIT"} (name->expressions "MIT License (MIT)"))) + (is (= #{"MIT"} (name->expressions "MIT License"))) + (is (= #{"MIT"} (name->expressions "MIT Public License"))) + (is (= #{"MIT"} (name->expressions "MIT license"))) + (is (= #{"MIT"} (name->expressions "MIT public License"))) + (is (= #{"MIT"} (name->expressions "MIT public license"))) + (is (= #{"MIT"} (name->expressions "MIT"))) + (is (= #{"MIT"} (name->expressions "MIT-style license (see LICENSE for details)."))) + (is (= #{"MIT"} (name->expressions "THE MIT LICENSE"))) + (is (= #{"MIT"} (name->expressions "The MIT Licence"))) + (is (= #{"MIT"} (name->expressions "The MIT License (MIT) "))) ; Note trailing space + (is (= #{"MIT"} (name->expressions "The MIT License (MIT) | Open Source Initiative"))) + (is (= #{"MIT"} (name->expressions "The MIT License (MIT)"))) + (is (= #{"MIT"} (name->expressions "The MIT License"))) + (is (= #{"MIT"} (name->expressions "The MIT License."))) + (is (= #{"MIT"} (name->expressions "http://opensource.org/licenses/MIT"))) +; (is (= #{"MIT"} (name->expressions "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (= #{"MPL-1.0"} (name->expressions "Mozilla Public License Version 1.0"))) + (is (= #{"MPL-1.1"} (name->expressions "Mozilla Public License Version 1.1"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL 2"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL v2"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL"))) ; Listed license missing version - we assume the latest + (is (= #{"MPL-2.0"} (name->expressions "MPL-2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL-v2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "MPL2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public Licence 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License (Version 2.0)"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0+"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License, v. 2.0"))) + (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License, version 2.0"))) + (is (= #{"NASA-1.3"} (name->expressions "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) + (is (= #{"NASA-1.3"} (name->expressions "NASA Open Source Agreement, Version 1.3"))) + (is (= #{"NCSA"} (name->expressions "University of Illinois/NCSA Open Source License"))) + (is (= #{"Ruby"} (name->expressions "Ruby License"))) + (is (= #{"SGI-B-2.0"} (name->expressions "SGI"))) ; Listed license missing version - we assume the latest + (is (= #{"SMPPL"} (name->expressions "SMPPL"))) + (is (= #{"Unlicense"} (name->expressions "The UnLicense"))) + (is (= #{"Unlicense"} (name->expressions "The Unlicence"))) + (is (= #{"Unlicense"} (name->expressions "The Unlicense"))) + (is (= #{"Unlicense"} (name->expressions "UnLicense"))) + (is (= #{"Unlicense"} (name->expressions "Unlicense License"))) + (is (= #{"Unlicense"} (name->expressions "Unlicense"))) + (is (= #{"Unlicense"} (name->expressions "unlicense"))) + (is (= #{"W3C"} (name->expressions "W3C Software license"))) + (is (= #{"WTFPL"} (name->expressions "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) + (is (= #{"WTFPL"} (name->expressions "DO-WTF-U-WANT-2"))) + (is (= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License"))) + (is (= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License, Version 2"))) + (is (= #{"WTFPL"} (name->expressions "WTFPL v2"))) + (is (= #{"WTFPL"} (name->expressions "WTFPL – Do What the Fuck You Want to Public License"))) + (is (= #{"WTFPL"} (name->expressions "WTFPL"))) + (is (= #{"X11"} (name->expressions "MIT X11 License"))) + (is (= #{"X11"} (name->expressions "MIT/X11"))) + (is (= #{"Zlib"} (name->expressions "Zlib License"))) + (is (= #{"Zlib"} (name->expressions "zlib License"))) + (is (= #{"Zlib"} (name->expressions "zlib license"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "All Rights Reserved"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "All rights reserved"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Private License"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Private"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary License"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Tulos Commercial License"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "Wildbit Proprietary License"))) + (is (= #{(proprietary-or-commercial)} (name->expressions "proprietary"))) + (is (= #{(public-domain)} (name->expressions "Public Domain"))) + (is (= #{(str "GPL-2.0-or-later OR " (name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) + (is (= #{(str "MIT AND " (proprietary-or-commercial))} (name->expressions "Dual MIT & Proprietary"))) + (is (unlisted-only? (name->expressions "${license.id}"))) + (is (unlisted-only? (name->expressions "A Clojure library for Google Cloud Pub/Sub."))) + (is (unlisted-only? (name->expressions "APGL"))) ; Probable typo + (is (unlisted-only? (name->expressions "Amazon Software License"))) + (is (unlisted-only? (name->expressions "BankersBox License"))) + (is (unlisted-only? (name->expressions "Bespoke"))) + (is (unlisted-only? (name->expressions "Bloomberg Open API"))) + (is (unlisted-only? (name->expressions "Bostock"))) + (is (unlisted-only? (name->expressions "Built In Project License"))) + (is (unlisted-only? (name->expressions "CRAPL License"))) + (is (unlisted-only? (name->expressions "Contact JMonkeyEngine forums for license details"))) + (is (unlisted-only? (name->expressions "Copyright (C) 2015 by Glowbox LLC"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2011 Drew Colthorp"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2017, Lingchao Xin"))) + (is (unlisted-only? (name->expressions "Copyright 2016, klaraHealth, Inc."))) + (is (unlisted-only? (name->expressions "Copyright 2017 Zensight"))) + (is (unlisted-only? (name->expressions "Copyright 4A Volcano. 2015."))) + (is (unlisted-only? (name->expressions "Copyright Ona Systems Inc."))) + (is (unlisted-only? (name->expressions "Copyright meissa GmbH"))) + (is (unlisted-only? (name->expressions "Copyright © SparX 2014"))) + (is (unlisted-only? (name->expressions "Copyright"))) + (is (unlisted-only? (name->expressions "Custom"))) + (is (unlisted-only? (name->expressions "Cydeas Public License"))) + (is (unlisted-only? (name->expressions "Don't steal my stuff"))) + (is (unlisted-only? (name->expressions "Dropbox ToS"))) + (is (unlisted-only? (name->expressions "FIXME: choose"))) + (is (unlisted-only? (name->expressions "Firebase ToS"))) + (is (unlisted-only? (name->expressions "GG Public License"))) + (is (unlisted-only? (name->expressions "Google Maps ToS"))) + (is (unlisted-only? (name->expressions "GraphiQL license"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation Ltd"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation copyright"))) + (is (unlisted-only? (name->expressions "Heap ToS"))) + (is (unlisted-only? (name->expressions "Interel"))) + (is (unlisted-only? (name->expressions "JLGL Backend"))) + (is (unlisted-only? (name->expressions "Jedis License"))) + (is (unlisted-only? (name->expressions "Jiegao Owned"))) + (is (unlisted-only? (name->expressions "LICENSE"))) + (is (unlisted-only? (name->expressions "Libre Uso MX"))) + (is (unlisted-only? (name->expressions "License of respective package"))) + (is (unlisted-only? (name->expressions "License"))) + (is (unlisted-only? (name->expressions "Like Clojure."))) + (is (unlisted-only? (name->expressions "Mixed"))) + (is (unlisted-only? (name->expressions "Multiple"))) + (is (unlisted-only? (name->expressions "OTN License Agreement"))) + (is (unlisted-only? (name->expressions "Open Source Community License - Type C version 1.0"))) + (is (unlisted-only? (name->expressions "Other License"))) + (is (unlisted-only? (name->expressions "Provisdom"))) + (is (unlisted-only? (name->expressions "Research License 1.0"))) + (is (unlisted-only? (name->expressions "Restricted Distribution."))) + (is (unlisted-only? (name->expressions "SYNNEX China Owned"))) + (is (unlisted-only? (name->expressions "See the LICENSE file"))) + (is (unlisted-only? (name->expressions "Shen License"))) + (is (unlisted-only? (name->expressions "Slick2D License"))) + (is (unlisted-only? (name->expressions "Stripe ToS"))) + (is (unlisted-only? (name->expressions "TODO"))) + (is (unlisted-only? (name->expressions "TODO: Choose a license"))) + (is (unlisted-only? (name->expressions "The I Haven't Got Around To This Yet License"))) + (is (unlisted-only? (name->expressions "To ill!"))) + (is (unlisted-only? (name->expressions "UNLICENSED"))) + (is (unlisted-only? (name->expressions "University of Buffalo Public License"))) + (is (unlisted-only? (name->expressions "Unknown"))) + (is (unlisted-only? (name->expressions "VNETLPL - Limited Public License"))) + (is (unlisted-only? (name->expressions "VNet PL"))) + (is (unlisted-only? (name->expressions "Various"))) + (is (unlisted-only? (name->expressions "Vimeo License"))) + (is (unlisted-only? (name->expressions "WIP"))) + (is (unlisted-only? (name->expressions "YouTube ToS"))) + (is (unlisted-only? (name->expressions "avi license"))) + (is (unlisted-only? (name->expressions "esl-sdk-external-signer-verification"))) + (is (unlisted-only? (name->expressions "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet + (is (unlisted-only? (name->expressions "jank license"))) + (is (unlisted-only? (name->expressions "name"))) + (is (unlisted-only? (name->expressions "none"))) + (is (unlisted-only? (name->expressions "state-node license"))) + (is (unlisted-only? (name->expressions "trove"))) + (is (unlisted-only? (name->expressions "url"))) + (is (unlisted-only? (name->expressions "wisdragon"))) + (is (unlisted-only? (name->expressions "wiseloong"))))) +) + +(comment ; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) (deftest name->ids-tests (testing "Nil, empty or blank names" @@ -65,7 +719,6 @@ (is (= #{"Apache-2.0" "GPL-3.0-only"} (name->ids "Apache-2.0 OR GPL-3.0"))) (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0" "MIT" "BSD-3-Clause" "Apache-2.0"} (name->ids "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) -(comment ; ####TODO: RE-ENABLE ME!!!! (testing "Names, with an emphasis on those seen in POMs on Maven Central" (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License (AGPL) version 3.0"))) (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0"))) @@ -132,17 +785,14 @@ (is (= #{"MIT"} (name->ids "The MIT License"))) (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License"))) (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; This is used by JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "Names that appear in licensey things, but are ambiguous" (is (nil? (name->ids "BSD")))) (testing "Names that appear in licensey things, but aren't in the SPDX license list" - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public Domain"))) - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public domain")))) -) + (is (= #{(public-domain)} (name->ids "Public Domain"))) + (is (= #{(public-domain)} (name->ids "Public domain")))) (testing "Distinct license names that appear in POMs on Clojars" ; synced from Clojars 2023-07-13 -;####TODO: SORT ALL OF THESE!!!! (is (= #{"AFL-3.0"} (name->ids "Academic Free License 3.0"))) -(comment ;####TODO: UNCOMMENT THIS!!!! (is (= #{"AGPL-3.0-only"} (name->ids "AGPL v3"))) (is (= #{"AGPL-3.0-only"} (name->ids "AGPLv3"))) (is (= #{"AGPL-3.0-only"} (name->ids "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest @@ -158,14 +808,13 @@ (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License, version 3"))) (is (= #{"AGPL-3.0-or-later"} (name->ids "AGPL"))) ; Listed license missing version - we assume the latest (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License v3 or later (at your option)"))) - (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License version 3 or lator"))) + (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License version 3 or lator"))) ; Typo in "lator" (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License"))) (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License,"))) ; Listed license missing version - we assume the latest (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU AGPL-V3 or later"))) (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest -) (is (= #{"Apache-2.0" "EPL-2.0"} (name->ids "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest (is (= #{"Apache-2.0" "LLVM-exception"} (name->ids "Apache 2.0 with LLVM Exception"))) (is (= #{"Apache-2.0"} (name->ids " Apache License, Version 2.0"))) @@ -257,13 +906,14 @@ (is (= #{"BSD-2-Clause"} (name->ids "Simplified BSD license"))) (is (= #{"BSD-2-Clause"} (name->ids "The BSD 2-Clause License"))) (is (= #{"BSD-2-Clause"} (name->ids "Two clause BSD license"))) + (is (= #{"BSD-2-Clause-FreeBSD"} (name->ids "FreeBSD License"))) (is (= #{"BSD-3-Clause" "MIT"} (name->ids "New-BSD / MIT"))) (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD"))) (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD license"))) (is (= #{"BSD-3-Clause"} (name->ids "3-clause license (New BSD License or Modified BSD License)"))) - (is (= #{"BSD-3-Clause"} (name->ids "Aduna BSD license"))) ; Listed license missing clause info, but the license text shows BSD-3-Clause + (is (= #{"BSD-3-Clause"} (name->ids "Aduna BSD license"))) (is (= #{"BSD-3-Clause"} (name->ids "BSD 3 Clause"))) (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause 'New' or 'Revised' License"))) (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) @@ -286,7 +936,6 @@ (is (= #{"BSD-3-Clause"} (name->ids "The New BSD License"))) (is (= #{"BSD-3-Clause"} (name->ids "The New BSD license"))) (is (= #{"BSD-3-Clause"} (name->ids "Three Clause BSD-like License"))) - (is (unlisted-only? (name->ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet ; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 ; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 ; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 @@ -312,22 +961,22 @@ (is (= #{"Beerware"} (name->ids "THE BEER-WARE LICENSE"))) (is (= #{"CC-BY-2.5"} (name->ids "Creative Commons Attribution 2.5 License"))) (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons 3.0"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0"))) (is (= #{"CC-BY-4.0"} (name->ids "CC Attribution 4.0 International with exception for binary distribution"))) (is (= #{"CC-BY-4.0"} (name->ids "CC-BY-4.0"))) (is (= #{"CC-BY-4.0"} (name->ids "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest (is (= #{"CC-BY-NC-3.0"} (name->ids "Creative Commons Attribution-NonCommercial 3.0"))) (is (= #{"CC-BY-NC-4.0"} (name->ids "CC BY-NC"))) ; Listed license missing version - we assume the latest (is (= #{"CC-BY-NC-ND-3.0"} (name->ids "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0"))) (is (= #{"CC-BY-SA-4.0"} (name->ids "CC BY-SA 4.0"))) - (is (= #{"CC0-1.0"} (name->ids "Public domain (CC0)"))) (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal"))) (is (= #{"CC0-1.0"} (name->ids "CC0"))) + (is (= #{"CC0-1.0"} (name->ids "Public domain (CC0)"))) (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License"))) ; Listed license missing clause info (is (= #{"CECILL-2.1"} (name->ids "CeCILL License"))) ; Listed license missing version - we assume the latest @@ -348,13 +997,13 @@ (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, version 1.0"))) (is (= #{"EPL-1.0"} (name->ids "Eclipse Public Licese - v 1.0"))) (is (= #{"EPL-1.0"} (name->ids "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) -; (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! -; (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest + (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! (is (= #{"EPL-2.0" "GPL-2.0-or-later"} (name->ids "EPL-2.0 OR GPL-2.0-or-later"))) (is (= #{"EPL-2.0" "GPL-3.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) (is (= #{"EPL-2.0" "GPL-3.0-or-later"} (name->ids "EPL-2.0 OR GPL-3.0-or-later"))) -; (is (= #{"EPL-2.0" "LGPL-3.0-or-later"} (name->ids "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (= #{"EPL-2.0" "LGPL-3.0-or-later"} (name->ids "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest (is (= #{"EPL-2.0" "MIT"} (name->ids "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) @@ -385,7 +1034,6 @@ (is (= #{"EUPL-1.2"} (name->ids "European Union Public Licence v. 1.2"))) (is (= #{"EUPL-1.2"} (name->ids "European Union Public License 1.2 or later"))) (is (= #{"EUPL-1.2"} (name->ids "European Union Public License"))) ; Listed license missing version - we assume the latest -(comment ;####TODO: UNCOMMENT THIS!!!! (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, Version 2, with the Classpath Exception"))) (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPLv2 with Classpath exception"))) (is (= #{"GPL-2.0-only"} (name->ids "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) @@ -448,7 +1096,6 @@ (is (= #{"GPL-3.0-or-later"} (name->ids "GPL V3+"))) (is (= #{"GPL-3.0-or-later"} (name->ids "GPL"))) ; Listed license missing version - we assume the latest (is (= #{"GPL-3.0-or-later"} (name->ids "The GNU General Public License"))) ; Listed license missing version - we assume the latest -) (is (= #{"Hippocratic-2.1"} (name->ids "Hippocratic License"))) (is (= #{"ISC" "Classpath-exception-2.0"} (name->ids "ISC WITH Classpath-exception-2.0"))) (is (= #{"ISC"} (name->ids "ISC Licence"))) @@ -456,7 +1103,6 @@ (is (= #{"ISC"} (name->ids "ISC"))) (is (= #{"ISC"} (name->ids "MIT/ISC License"))) (is (= #{"ISC"} (name->ids "MIT/ISC"))) -(comment ;####TODO: UNCOMMENT THIS!!!! (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) (is (= #{"LGPL-2.1-only"} (name->ids "GNU LGPL v2.1"))) @@ -517,13 +1163,12 @@ (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPL-3.0-or-later"))) (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPLv3+"))) (is (= #{"LGPL-3.0-or-later"} (name->ids "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space -) (is (= #{"Libpng"} (name->ids "zlib/libpng License"))) - (is (= #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} (name->ids "Public Domain"))) + (is (= #{(public-domain)} (name->ids "Public Domain"))) (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->ids "MIT/Apache-2.0/BSD-3-Clause"))) (is (= #{"MIT"} (name->ids " MIT License"))) (is (= #{"MIT"} (name->ids "Distributed under an MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT"} (name->ids "Dual MIT & Proprietary"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (= #{"MIT" (proprietary-or-commercial)} (name->ids "Dual MIT & Proprietary"))) (is (= #{"MIT"} (name->ids "Expat (MIT) license"))) (is (= #{"MIT"} (name->ids "MIT LICENSE"))) (is (= #{"MIT"} (name->ids "MIT Licence"))) @@ -543,9 +1188,8 @@ (is (= #{"MIT"} (name->ids "The MIT License (MIT)"))) (is (= #{"MIT"} (name->ids "The MIT License"))) (is (= #{"MIT"} (name->ids "The MIT License."))) -;####TODO: UNCOMMENT ONCE URL DETECTION AND RESOLUTION IS IMPLEMENTED!!!! -; (is (= #{"MIT"} (name->ids "http://opensource.org/licenses/MIT"))) -; (is (= #{"MIT"} (name->ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) + (is (= #{"MIT"} (name->ids "http://opensource.org/licenses/MIT"))) +; (is (= #{"MIT"} (name->ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License Version 1.0"))) (is (= #{"MPL-1.1"} (name->ids "Mozilla Public License Version 1.1"))) (is (= #{"MPL-2.0"} (name->ids "MPL 2"))) @@ -593,12 +1237,11 @@ (is (= #{"Zlib"} (name->ids "zlib License"))) (is (= #{"Zlib"} (name->ids "zlib license"))) (is (unlisted-only? (name->ids "${license.id}"))) -;####TODO: UNCOMMENT ME!!!! -; (is (unlisted-only? (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (unlisted-only? (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) (is (unlisted-only? (name->ids "A Clojure library for Google Cloud Pub/Sub."))) (is (unlisted-only? (name->ids "APGL"))) ; Probable typo - (is (unlisted-only? (name->ids "All Rights Reserved"))) - (is (unlisted-only? (name->ids "All rights reserved"))) + (is (= #{(proprietary-or-commercial)} (name->ids "All Rights Reserved"))) + (is (= #{(proprietary-or-commercial)} (name->ids "All rights reserved"))) (is (unlisted-only? (name->ids "Amazon Software License"))) (is (unlisted-only? (name->ids "BankersBox License"))) (is (unlisted-only? (name->ids "Bespoke"))) @@ -607,13 +1250,13 @@ (is (unlisted-only? (name->ids "Built In Project License"))) (is (unlisted-only? (name->ids "CRAPL License"))) (is (unlisted-only? (name->ids "Contact JMonkeyEngine forums for license details"))) - (is (unlisted-only? (name->ids "Copyright & all rights reserved Lean Pixel"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Copyright & all rights reserved Lean Pixel"))) (is (unlisted-only? (name->ids "Copyright (C) 2015 by Glowbox LLC"))) (is (unlisted-only? (name->ids "Copyright (c) 2011 Drew Colthorp"))) (is (unlisted-only? (name->ids "Copyright (c) 2017, Lingchao Xin"))) - (is (unlisted-only? (name->ids "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (= #{(proprietary-or-commercial)} (name->ids "Copyright 2013 The Fresh Diet. All rights reserved."))) (is (unlisted-only? (name->ids "Copyright 2016, klaraHealth, Inc."))) - (is (unlisted-only? (name->ids "Copyright 2017 All Rights Reserved"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Copyright 2017 All Rights Reserved"))) (is (unlisted-only? (name->ids "Copyright 2017 Zensight"))) (is (unlisted-only? (name->ids "Copyright 4A Volcano. 2015."))) (is (unlisted-only? (name->ids "Copyright Ona Systems Inc."))) @@ -626,7 +1269,6 @@ (is (unlisted-only? (name->ids "Dropbox ToS"))) (is (unlisted-only? (name->ids "FIXME: choose"))) (is (unlisted-only? (name->ids "Firebase ToS"))) - (is (= #{"BSD-2-Clause-FreeBSD"} (name->ids "FreeBSD License"))) (is (unlisted-only? (name->ids "GG Public License"))) (is (unlisted-only? (name->ids "Google Maps ToS"))) (is (unlisted-only? (name->ids "GraphiQL license"))) @@ -644,15 +1286,15 @@ (is (unlisted-only? (name->ids "Like Clojure."))) (is (unlisted-only? (name->ids "Mixed"))) (is (unlisted-only? (name->ids "Multiple"))) - (is (unlisted-only? (name->ids "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Not fit for public use so formally proprietary software - this is not open-source"))) (is (unlisted-only? (name->ids "OTN License Agreement"))) (is (unlisted-only? (name->ids "Open Source Community License - Type C version 1.0"))) (is (unlisted-only? (name->ids "Other License"))) - (is (unlisted-only? (name->ids "Private License"))) - (is (unlisted-only? (name->ids "Private"))) - (is (unlisted-only? (name->ids "Proprietary License"))) - (is (unlisted-only? (name->ids "Proprietary"))) - (is (unlisted-only? (name->ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (= #{(proprietary-or-commercial)} (name->ids "Private License"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Private"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Proprietary License"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Proprietary"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) (is (unlisted-only? (name->ids "Provisdom"))) (is (unlisted-only? (name->ids "Research License 1.0"))) (is (unlisted-only? (name->ids "Restricted Distribution."))) @@ -665,7 +1307,7 @@ (is (unlisted-only? (name->ids "TODO: Choose a license"))) (is (unlisted-only? (name->ids "The I Haven't Got Around To This Yet License"))) (is (unlisted-only? (name->ids "To ill!"))) - (is (unlisted-only? (name->ids "Tulos Commercial License"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Tulos Commercial License"))) (is (unlisted-only? (name->ids "UNLICENSED"))) (is (unlisted-only? (name->ids "University of Buffalo Public License"))) (is (unlisted-only? (name->ids "Unknown"))) @@ -674,14 +1316,15 @@ (is (unlisted-only? (name->ids "Various"))) (is (unlisted-only? (name->ids "Vimeo License"))) (is (unlisted-only? (name->ids "WIP"))) - (is (unlisted-only? (name->ids "Wildbit Proprietary License"))) + (is (= #{(proprietary-or-commercial)} (name->ids "Wildbit Proprietary License"))) (is (unlisted-only? (name->ids "YouTube ToS"))) (is (unlisted-only? (name->ids "avi license"))) (is (unlisted-only? (name->ids "esl-sdk-external-signer-verification"))) + (is (unlisted-only? (name->ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet (is (unlisted-only? (name->ids "jank license"))) (is (unlisted-only? (name->ids "name"))) (is (unlisted-only? (name->ids "none"))) - (is (unlisted-only? (name->ids "proprietary"))) + (is (= #{(proprietary-or-commercial)} (name->ids "proprietary"))) (is (unlisted-only? (name->ids "state-node license"))) (is (unlisted-only? (name->ids "trove"))) (is (unlisted-only? (name->ids "url"))) @@ -711,4 +1354,8 @@ (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) (testing "URIs that aren't in the SPDX license list, but do match via retrieval and full text matching" (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) - (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) ; ####TODO: Not sure about this one + (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) + + +) From d8a54f0857ce23a9539abe973f17766be064be49 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 17 Aug 2023 22:01:48 -0700 Subject: [PATCH 13/34] :construction: Ongoing work on issue #3 --- NOTICE | 4 + src/lice_comb/impl/3rd_party.clj | 37 + src/lice_comb/matching.clj | 468 ++++++------ test/lice_comb/matching_test.clj | 1143 +++++++++++++++--------------- 4 files changed, 883 insertions(+), 769 deletions(-) create mode 100644 NOTICE create mode 100644 src/lice_comb/impl/3rd_party.clj diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..223bc5f --- /dev/null +++ b/NOTICE @@ -0,0 +1,4 @@ +lice-comb +Copyright © 2021 Peter Monks (https://github.com/pmonks) + +This project contains source code for rdrop-while, which is copyright Joshua Suskalo (https://github.com/IGJoshua) 2023 and licensed as "CC0-1.0 OR MIT". For details, see https://discord.com/channels/729136623421227082/732641743723298877/1141786961875583097. diff --git a/src/lice_comb/impl/3rd_party.clj b/src/lice_comb/impl/3rd_party.clj new file mode 100644 index 0000000..72067cc --- /dev/null +++ b/src/lice_comb/impl/3rd_party.clj @@ -0,0 +1,37 @@ +;;;; lice_comb.impl.3rd_party.clj +;;; +;;; Code obtained from third party sources, but not available via standard +;;; package-consumption mechanisms (i.e. as Maven artifacts) +;;; +;;; Copyright and license information is on a per-code-snippet basis, and +;;; is communicated inline via further comments. +;;; +(ns lice-comb.impl.3rd-party) + +;; rdrop-while is copyright © Joshua Suskalo (https://github.com/IGJoshua) 2023 and licensed as "CC0-1.0 OR MIT" +;; +;; Source: https://discord.com/channels/729136623421227082/732641743723298877/1141786961875583097 +;; Link to request access: https://discord.gg/discljord +;; +;; Note that the lice-comb project elects to consume this code under the MIT license +(defn rdrop-while + "As for clojure.core/drop-while, but drops from the end of the + sequence backwards, rather than the front forwards. More efficient + when provided with a vector rather than a list." + ([pred coll] + (if (reversible? coll) + (take (- (count coll) (count (take-while pred (rseq coll)))) coll) + (reverse (drop-while pred (reverse coll))))) + ([pred] + (fn [rf] + (let [stash (volatile! [])] + (fn + ([] (rf)) + ([acc] (rf acc)) + ([acc elt] + (if (pred elt) + (do (vswap! stash conj elt) + acc) + (let [res (reduce rf acc (conj @stash elt))] + (vreset! stash []) + res)))))))) diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 3196bff..751a2ff 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -19,20 +19,31 @@ (ns lice-comb.matching "Matching functionality, some of which is provided by https://github.com/pmonks/clj-spdx" - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [hato.client :as hc] - [spdx.licenses :as sl] - [spdx.exceptions :as se] - [spdx.matching :as sm] - [spdx.expressions :as sexp] - [rencg.api :as rencg] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.set :as set] + [clojure.java.io :as io] + [clojure.pprint :as pp] + [hato.client :as hc] + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [spdx.matching :as sm] + [spdx.expressions :as sexp] + [rencg.api :as rencg] + [lice-comb.impl.3rd-party :as lc3] + [lice-comb.impl.utils :as lcu])) + +; The subset of SPDX license identifiers that we use; specifically excludes the deprecated 'historical oddity' GPL family identifiers +(def ^:private license-ids-d + (delay + (disj (set (filter #(not (s/ends-with? % "+")) (sl/ids))) + "AGPL-1.0" "AGPL-3.0" "GPL-1.0" "GPL-2.0" "GPL-3.0" "LGPL-2.0" "LGPL-2.1" "LGPL-3.0"))) + +; The subset of SPDX exception identifiers that we use; right now this is all of them (this is a placeholder) +(def ^:private exception-ids-d (delay (se/ids))) ; The license and exception lists -(def ^:private license-list-d (delay (map sl/id->info (sl/ids)))) -(def ^:private exception-list-d (delay (map se/id->info (se/ids)))) +(def ^:private license-list-d (delay (map sl/id->info @license-ids-d))) +(def ^:private exception-list-d (delay (map se/id->info @exception-ids-d))) ; The unlisted license refs lice-comb uses (note: the unlisted one usually has a base62 suffix appended) (def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") @@ -40,8 +51,8 @@ (def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") ; Lower case id map -(def ^:private spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) (sl/ids))) - (into {} (map #(vec [(s/lower-case %) %]) (se/ids)))))) +(def ^:private spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) @license-ids-d)) + (into {} (map #(vec [(s/lower-case %) %]) @exception-ids-d))))) (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" @@ -130,9 +141,6 @@ fix-ids-that-end-with-plus fix-classpath-exception))) -; Only match against SPDX license identifiers that do _not_ end with "+" - these are all duplicate/old/deprecated ids that pre-date license expressions (where "+" gained independent semantics) -(def ^:private license-ids-for-matching-d (delay (filter #(not (s/ends-with? % "+")) (sl/ids)))) - (defmulti text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) within the given license text (a String, Reader, InputStream, or something @@ -151,7 +159,7 @@ (defmethod text->ids java.lang.String [s] ; These clj-spdx APIs are *expensive*, so we paralellise them - (let [f-lic (future (sm/licenses-within-text s @license-ids-for-matching-d)) + (let [f-lic (future (sm/licenses-within-text s @license-ids-d)) f-exc (future (sm/exceptions-within-text s))] (manual-fixes (set/union @f-lic @f-exc)))) @@ -282,16 +290,24 @@ (s/replace #"\s+" " ")))) default)))) +(defn- assert-valid-id + [id] + (if (or (contains? @license-ids-d id) + (contains? (se/ids) id)) + id + (throw (ex-info "Invalid SPDX id constructed" {:id id})))) + (defn- generic-id-constructor [m] (when m - (str (:id m) - (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] - (str "-" - ver - (when (and (:pad-ver? m) - (not (s/includes? ver "."))) - ".0")))))) + (let [id (str (:id m) + (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] + (str "-" + ver + (when (and (:pad-ver? m) + (not (s/includes? ver "."))) + ".0"))))] + (assert-valid-id id)))) (defn- number-name-to-number "Converts the name of a number to that number (as a string). e.g. \"two\" -> \"2\". Returns s unchanged if it's not a number name." @@ -342,9 +358,9 @@ nil) base-id (str (:id m) "-" clause-count "-Clause") id-with-suffix (str base-id "-" suffix)] - (if (contains? (sl/ids) id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it + (if (contains? @license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it id-with-suffix - base-id))) + (assert-valid-id base-id)))) (defn- cc-id-constructor [m] @@ -369,15 +385,13 @@ ("united states" "usa" "us") "US" nil) id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] - (if (contains? (sl/ids) id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it + (if (contains? @license-ids-d id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it id-with-region - (if (contains? (sl/ids) base-id) - base-id - (throw (ex-info "Invalid Creative Commons license information found" (dissoc m :id :regex :fn :pad-ver? :latest-ver))))))) + (assert-valid-id base-id)))) (defn- gpl-id-constructor [m] - (let [id (case (get-rencgs m ["edition1" "edition2"]) + (let [variant (case (get-rencgs m ["edition1" "edition2"]) ("affero" "agpl") "AGPL" ("lesser" "library" "lgpl") "LGPL" "GPL") @@ -385,11 +399,12 @@ (if (s/includes? ver ".") ver (str ver ".0"))) - suffix (case (get-rencgs m ["suffix"]) + suffix (case (get-rencgs m ["suffix1" "suffix2"]) ("later" "newer" "+") "or-later" ("only") "only" - "only")] ; Note: we (conservatively) default to "only" when we don't have an explicit suffix - (str id "-" version "-" suffix))) + "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix + id (str variant "-" version "-" suffix)] + (assert-valid-id id))) (defn- simple-regex-match "Constructs a 'simple' name match structure" @@ -399,129 +414,133 @@ :fn (constantly s)}) ; Regexes used for license name matching, along with functions for constructing an SPDX id -(def ^:private license-name-matching (concat - ; By default we add every single id as a "simple" regex match, excluding MIT and Zlib (they're explicitly handled below) - (map simple-regex-match (disj (sl/ids) "MIT" "Zlib")) - (map simple-regex-match (se/ids)) - [ - {:id "AFL" - :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "3.0"} - {:id "Apache" - :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Artistic" - :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Beerware" - :regex #"(?i)\bBeer-?ware\b" - :fn (constantly "Beerware")} - {:id "BSL" - :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.0"} - {:id "BSD" - :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" - :fn bsd-id-constructor} - {:id "CC0" - :regex #"(?i)\bCC\s*0" - :fn (constantly "CC0-1.0")} - {:id "CECILL" - :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.1"} - {:id "Classpath-exception" - :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "CDDL" - :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.1"} - {:id "CPL" - :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.0"} - {:id "Creative commons family" - :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons(\s+Legal\s+Code)?(\s+Attribution)?|Attribution\s+(?\d(.\d)?)))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" - :fn cc-id-constructor - :pad-ver? true - :latest-ver "4.0"} - {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 - :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "EUPL" - :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.2"} - {:id "FreeBSD" - :regex #"(?i)\bFreeBSD\b" - :fn (constantly "BSD-2-Clause-FreeBSD")} - {:id "GNU license family" - :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU(?!\s*Classpath)|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(or(\s+\(?at\s+your\s+(option|discretion)\)?)?)?(\s+any)?(\s*(?later|newer|only|\+))?\b" - :fn gpl-id-constructor - :pad-ver? true - :latest-ver 3.0} - {:id "Hippocratic" - :regex #"(?i)\bHippocratic\b" - :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license - {:id "LLVM-exception" - :regex #"(?i)\bLLVM[\s-]+Exception\b" - :fn (constantly "LLVM-exception")} - {:id "MIT" - :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" - :fn (constantly "MIT")} - {:id "MPL" - :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "NASA" - :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.3"} - {:id "Plexus" - :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" - :fn (constantly "Plexus")} - {:id "Proprietary or commercial" - :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" - :fn proprietary-or-commercial} - {:id "Public Domain" - :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" - :fn public-domain} - {:id "Ruby" - :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" - :fn (constantly "Ruby")} - {:id "SGI-B" - :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Unlicense" - :regex #"(?i)\bUnlicen[cs]e\b" - :fn (constantly "Unlicense")} - {:id "WTFPL" - :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" - :fn (constantly "WTFPL")} - {:id "Zlib" - :regex #"\b(?i)zlib(?![\s/]+libpng)\b" - :fn (constantly "Zlib")} - ])) +(def ^:private license-name-matching-d (delay + (concat + ; By default we add most SPDX ids as "simple" regex matches + (map simple-regex-match (disj @license-ids-d "MIT" "Zlib")) ; We remove MIT and Zlib as they're special-cased below + (map simple-regex-match (se/ids)) + [ + {:id "AFL" + :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "3.0"} + {:id "Apache" + :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Artistic" + :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Beerware" + :regex #"(?i)\bBeer-?ware\b" + :fn (constantly "Beerware")} + {:id "BSL" + :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "BSD" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" + :fn bsd-id-constructor} + {:id "CC0" + :regex #"(?i)\bCC\s*0" + :fn (constantly "CC0-1.0")} + {:id "CECILL" + :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.1"} + {:id "Classpath-exception" + :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "CDDL" + :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.1"} + {:id "CPL" + :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "Creative commons family" + :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons(\s+Legal\s+Code)?(\s+Attribution)?|Attribution\s+(?\d(.\d)?)))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" + :fn cc-id-constructor + :pad-ver? true + :latest-ver "4.0"} + {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 + :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "EUPL" + :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.2"} + {:id "FreeBSD" + :regex #"(?i)\bFreeBSD\b" + :fn (constantly "BSD-2-Clause-FreeBSD")} + {:id "GNU license family" + :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU(?!\s*Classpath)|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?\+)?\s*(or(\s+\(?at\s+your\s+(option|discretion)\)?)?)?(\s+any)?(\s*(?later|newer|only))?" + :fn gpl-id-constructor + :pad-ver? true + :latest-ver 3.0} + {:id "Hippocratic" + :regex #"(?i)\bHippocratic\b" + :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license + {:id "LLVM-exception" + :regex #"(?i)\bLLVM[\s-]+Exception\b" + :fn (constantly "LLVM-exception")} + {:id "MIT" + :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" + :fn (constantly "MIT")} + {:id "MPL" + :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "MX4J" + :regex #"(?i)\bMX4J\s+Licen[cs]e(,?\s+v(ersion)?\s*1\.0)?\b" + :fn (constantly "Apache-1.1")} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX + {:id "NASA" + :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.3"} + {:id "Plexus" + :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" + :fn (constantly "Plexus")} + {:id "Proprietary or commercial" + :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" + :fn proprietary-or-commercial} + {:id "Public Domain" + :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" + :fn public-domain} + {:id "Ruby" + :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" + :fn (constantly "Ruby")} + {:id "SGI-B" + :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Unlicense" + :regex #"(?i)\bUnlicen[cs]e\b" + :fn (constantly "Unlicense")} + {:id "WTFPL" + :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" + :fn (constantly "WTFPL")} + {:id "Zlib" + :regex #"\b(?i)zlib(?![\s/]+libpng)\b" + :fn (constantly "Zlib")} + ]))) (defn- match-regex "Returns a map containing the SPDX :id and :start index of the given @@ -536,10 +555,28 @@ exception ids for the given string, or nil if there were no matches. Results are in the order in which they appear in the string." [s] - (some->> (seq (filter identity (pmap (partial match-regex s) license-name-matching))) + (some->> (seq (filter identity (pmap (partial match-regex s) @license-name-matching-d))) (sort-by :start) (map :id))) +(defn- filter-blanks + "Filter blank strings out of coll" + [coll] + (when (seq coll) + (seq (filter #(or (not (string? %)) (not (s/blank? %))) coll)))) + +(defn- map-split-and-interpose + "Maps over the given sequence, splitting strings using the given regex + and interposing the given value, returning a (flattened) sequence." + [re int coll] + (mapcat #(if-not (string? %) + [%] + (let [splits (s/split % re)] + (if (nil? int) + splits + (interpose int splits)))) + coll)) + (defn- split-on-operators "Case insensitively splits a string based on license operators (and, or, with), but only if they're not also part of a license name (e.g. @@ -547,10 +584,11 @@ License version 2.0 or (at your option) any later version', etc.)." [s] (when-not (s/blank? s) - (map #(if (keyword? %) % (s/trim %)) - (mapcat #(if (keyword? %) [%] (interpose :with (s/split % #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)"))) - (mapcat #(if (keyword? %) [%] (interpose :or (s/split % #"(?i)\bor(?!\s+(later|lator|newer|lesser|library))\b"))) - (interpose :and (s/split s #"(?i)\b(and|\&)(?!(\s+distribution))\b"))))))) + (->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b") + (map-split-and-interpose #"(?i)(\band|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) + (map-split-and-interpose #"(?i)\bor(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?))" :or) + (map-split-and-interpose #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) + filter-blanks))) ;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! (defn- string->ids @@ -572,45 +610,56 @@ (if-let [spdx-id (get @spdx-ids-d (s/lower-case s))] [spdx-id] ; 2. Is it an SPDX license or exception name? - (if-let [name-match (listed-name->ids s)] - [name-match] + (if-let [name-matches (listed-name->ids s)] + (vec name-matches) ; 3. If it's a URI, perform URI matching on it (this is to handle some dumb corner cases that do exist in the real world) (if-let [uri-matches (uri->ids s)] (vec uri-matches) ; 4. Attempt regex name matching (if-let [re-name-matches (match-regexes s)] - re-name-matches - ; 5. Give up and return a lice-comb "unlisted" LicenseRef - [(name->unlisted s)]))))))) + (vec re-name-matches) + ; 5. Give up and return a lice-comb "unlisted" LicenseRef + [(name->unlisted s)]))))))) + +(def ^:private push conj) ; Because I won't remember in X years when I come back to this code that with lists-as-stacks conj == push (defn- process-expression-element - "Processes a single new element being added to l, and will combine it - with earlier elements in l where appropriate." - [l e] + "Processes a single new expression element e (either a keyword representing + an SPDX operator, or an SPDX identifier) in the context of stack (list) s." + [s e] (if (keyword? e) - (conj l e) - (case (count (take-while keyword? l)) - 0 (if (= (peek l) e) l (conj l e)) - 1 (let [kw (s/upper-case (name (first l))) - prior (second l) - earlier (rest (rest l))] + ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword + (if (= (peek s) e) + s + (push s e)) + ; e is a string (SPDX identifier): depending on how many keywords are currently at the top of s... + (case (count (take-while keyword? s)) + ; No keywords? Push e onto s + 0 (push s e) + ; One keyword? See if we should "collapse" the prior value, the keyword and e into an SPDX expression fragment and push the result onto s + 1 (let [kw (peek s) + operator (s/upper-case (name kw)) + s-minus-1 (pop s) + prior (peek s-minus-1) + s-minus-2 (pop s-minus-1)] (if (nil? prior) - (conj earlier e) - (conj earlier (s/join " " [prior kw e])))) - (let [earlier (drop-while keyword? l)] - (conj earlier e))))) + (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on + (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s + (se/listed-id? e)) + (push s-minus-2 (s/join " " [prior operator e])) + (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on + ; Many keywords? That's invalid, so drop all of them and push e onto s + (push (drop-while keyword? s) e)))) ; Multiple keywords were found sequentially, so drop all of them and push the current element on (defn- build-spdx-expressions - "Builds a set of SPDX expression(s) from the given list containing strings and keywords." + "Builds a list of SPDX expression(s) from the given list containing strings and keywords." [l] - (let [l (drop-while keyword? l)] - (loop [result '() - f (first l) - r (rest l)] - (if f - (recur (process-expression-element result f) (first r) (rest r)) - (some-> (seq (reverse (drop-while keyword? result))) - set))))) + (loop [result '() + f (first l) + r (rest l)] + (if f + (recur (process-expression-element result f) (first r) (rest r)) + (seq (reverse result))))) ; Remember to reverse the result, since lists-as-stacks grow at the front, not the end ;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AND SOURCE!!!! (defn name->expressions @@ -626,11 +675,20 @@ ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set (if-let [normalised-expression (sexp/normalise name)] #{normalised-expression} - ; 2. Attempt to build SPDX expression(s) from the name - (some->> (split-on-operators name) - (mapcat #(if (keyword? %) [%] (string->ids %))) - (map #(if (and (coll? %) (= 1 (count %))) (first %) %)) - build-spdx-expressions))))) + ; 2. Is it an SPDX license or exception name? + (if-let [name-matches (listed-name->ids name)] + name-matches + ; 3. If it's a URI, perform URI matching on it (this is to handle some dumb corner cases that do exist in the real world) + (if-let [uri-matches (uri->ids name)] + uri-matches + ; 4. Attempt to build SPDX expression(s) from the name + (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids %))) + flatten + build-spdx-expressions + set))))))) (defn name->ids "Attempts to determine the SPDX license identifier(s) (a set) from the given @@ -660,16 +718,20 @@ Note: this method has a substantial performance cost." [] - ; Parallelise initialisation of the license and exception lists, as they're both sloooooooow - (future - (sl/init!) - @license-list-d) - (future - (se/init!) - @exception-list-d) + ; Parallelise initialisation of the spdx.licenses and spdx.exceptions namespaces, as they're both sloooooooow (~1.5 mins total) + (let [sl-init (future (sl/init!)) + se-init (future (se/init!))] + @sl-init + @se-init) + + ; Serially initialise this namespace's dependent state - they're all pretty fast (< 1s) + @license-ids-d + @exception-ids-d + @license-list-d + @exception-list-d @spdx-ids-d - @license-ids-for-matching-d @index-uri-to-id-d @index-name-to-id-d @http-client-d + @license-name-matching-d nil) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index d7c91e0..56ee5f0 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -21,7 +21,8 @@ [lice-comb.test-boilerplate :refer [fixture]] [lice-comb.matching :refer [unlisted? proprietary-or-commercial? name->unlisted public-domain proprietary-or-commercial text->ids name->expressions name->ids uri->ids]] [spdx.licenses :as sl] - [spdx.exceptions :as se])) + [spdx.exceptions :as se] + [spdx.expressions :as sexp])) (use-fixtures :once fixture) @@ -44,578 +45,589 @@ (is (true? (every? false? (map unlisted? (sl/ids))))) (is (true? (every? false? (map unlisted? (se/ids))))))) +(defn valid= + "Returns true if all of the SPDX exceptions in s2 are valid, and also + that s1 equals s2." + [s1 s2] + (and (set? s2) + (= s1 s2) + (every? true? (map sexp/valid? s2)))) + (deftest name->expressions-tests (testing "Nil, empty or blank" - (is (nil? (name->expressions nil))) - (is (nil? (name->expressions ""))) - (is (nil? (name->expressions " "))) - (is (nil? (name->expressions "\n"))) - (is (nil? (name->expressions "\t")))) + (is (nil? (name->expressions nil))) + (is (nil? (name->expressions ""))) + (is (nil? (name->expressions " "))) + (is (nil? (name->expressions "\n"))) + (is (nil? (name->expressions "\t")))) (testing "SPDX license ids" - (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (name->expressions " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->expressions "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->expressions "GPL-2.0-with-classpath-exception")))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0-only"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache-2.0 "))) ; Test whitespace + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "CC-BY-SA-4.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (valid= #{"GPL-2.0-with-classpath-exception"} (name->expressions "GPL-2.0-with-classpath-exception")))) (testing "Public domain and proprietary/commercial" - (is (= #{(public-domain)} (name->expressions "Public Domain"))) - (is (= #{(public-domain)} (name->expressions "Public domain"))) ; Test lower case - (is (= #{(public-domain)} (name->expressions " Public domain "))) ; Test whitespace - (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Commercial"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "All rights reserved")))) + (is (valid= #{(public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(public-domain)} (name->expressions "Public domain"))) ; Test lower case + (is (valid= #{(public-domain)} (name->expressions " Public domain "))) ; Test whitespace + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Commercial"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "All rights reserved")))) (testing "Expressions that are valid SPDX" - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) - (is (= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) + (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) (testing "Single expressions that are not valid SPDX" - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache License version 2.0 or GNU General Public License version 3"))) - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)"))) - (is (= #{"Apache-2.0 AND MIT"} (name->expressions "Apache & MIT licence"))) - (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution Licence")))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache License version 2.0 or GNU General Public License version 3"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)"))) + (is (valid= #{"Apache-2.0 AND MIT"} (name->expressions "Apache & MIT licence"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution Licence")))) (testing "Expressions with weird operators" - (is (= #{"Apache-2.0"} (name->expressions "and and and Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0 or or or"))) - (is (= #{"Apache-2.0 or MIT"} (name->expressions "Apache License 2.0 or or or or or or or or MIT license"))) - (is (= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 and/or MIT licence")))) - (testing "Multiple expressions that are not valid SPDX" - (is (= #{"MIT" "BSD-4-Clause"} (name->expressions "MIT / BSD"))) - (is (= #{"Apache-2.0" "GPL-3.0-only"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3"))) - (is (= #{"Apache-2.0" "GPL-3.0-only WITH Classpath-exception-2.0"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3 with classpath exception"))) - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR BSD-3-Clause AND Apache-2.0"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception or MIT Licence or three clause bsd and Apache Licence")))) - (testing "Names seen in select POMs on Maven Central" - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0 only"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) - (is (= #{"Apache-1.0"} (name->expressions "Apache License 1"))) - (is (= #{"Apache-1.0"} (name->expressions "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (name->expressions "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (name->expressions "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (name->expressions "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (name->expressions "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (name->expressions "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (name->expressions "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (name->expressions "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (name->expressions "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (name->expressions " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->expressions "Apache 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache v2"))) - (is (= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) - (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (name->expressions "BSD 3-Clause Attribution"))) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD"))) - (is (= #{"CC-BY-3.0"} (name->expressions "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (name->expressions "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (name->expressions "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (name->expressions "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) - (is (= #{"JSON"} (name->expressions "JSON License"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Library General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"MIT"} (name->expressions "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"MIT"} (name->expressions "MIT License"))) - (is (= #{"MIT"} (name->expressions "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (name->expressions "The MIT License"))) - (is (= #{"MPL-1.0"} (name->expressions "Mozilla Public License 1"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"Plexus"} (name->expressions "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (valid= #{"Apache-2.0"} (name->expressions "and and and Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0 or or or"))) + (is (valid= #{"Apache-2.0 OR MIT"} (name->expressions "Apache License 2.0 or or or or or or or or MIT license"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 or and or and or and or and MIT license"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "or and Apache Licence 2.0 or and or and or and or and MIT and or and"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 and/or MIT licence")))) + (testing "Multiple expressions" + (is (valid= #{"MIT" "BSD-4-Clause"} (name->expressions "MIT / BSD"))) + (is (valid= #{"Apache-2.0" "GPL-3.0-only"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3"))) + (is (valid= #{"Apache-2.0" "GPL-3.0-only WITH Classpath-exception-2.0"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3 with classpath exception"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR BSD-3-Clause AND Apache-2.0"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception or MIT Licence or three clause bsd and Apache Licence")))) + (testing "Messed up license expressions" + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache with MIT")))) + (testing "Names seen in handpicked POMs on Maven Central" + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL) version 3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0 only"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License 1"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License Version 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License, Version 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache Software License - Version 1.0"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License, Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache Software License - Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "The MX4J License, version 1.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License - Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License (BSD3)"))) + (is (valid= #{"BSD-3-Clause-Attribution"} (name->expressions "BSD 3-Clause Attribution"))) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Attribution 3.0 Unported"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "Attribution 4.0 International"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "Creative Commons Attribution Share Alike 4.0 International"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License, Version 1.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (valid= #{"JSON"} (name->expressions "JSON License"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Library General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MIT"} (name->expressions "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (valid= #{"MIT"} (name->expressions "MIT License"))) + (is (valid= #{"MIT"} (name->expressions "MIT license"))) ; Test capitalisation + (is (valid= #{"MIT"} (name->expressions "The MIT License"))) + (is (valid= #{"MPL-1.0"} (name->expressions "Mozilla Public License 1"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Plexus"} (name->expressions "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "All names seen in POMs on Clojars as of 2023-07-13" -(comment - (is (= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "AGPL v3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "AGPLv3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU AGPLv3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License 3.0 (AGPL-3.0)"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, version 3"))) - (is (= #{"AGPL-3.0-or-later"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License v3 or later (at your option)"))) - (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License version 3 or lator"))) ; Typo in "lator" - (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License"))) - (is (= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU AGPL-V3 or later"))) - (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0 WITH LLVM-exception"} (name->expressions "Apache 2.0 with LLVM Exception"))) - (is (= #{"Apache-2.0"} (name->expressions " Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) - (is (= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "APACHE"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->expressions "ASL 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "ASL"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->expressions "Apache 2 License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2 Public License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2, see LICENSE"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0 License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Licence"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache Licence, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License - v 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License - v2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License V2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License V2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0, January 2004"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License v 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License v2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License v2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache License, 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0."))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License, version 2."))) - (is (= #{"Apache-2.0"} (name->expressions "Apache License, version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Public License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Public License v2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Public License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache Public License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Public License, version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License - v 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Software Licesne"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache Sofware Licencse 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Sofware License 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache V2 License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache V2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache license version 2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache license, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache v2 License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache v2"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache v2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->expressions "Apache, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0 License"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "Apache2 License"))) - (is (= #{"Apache-2.0"} (name->expressions "The Apache 2 License"))) - (is (= #{"Apache-2.0"} (name->expressions "The Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->expressions "apache"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->expressions "apache-2.0"))) - (is (= #{"Artistic-2.0" "GPL-3.0-only"} (name->expressions "Artistic License/GPL"))) ; Missing conjunction, so return 2 (singleton) expressions - (is (= #{"Artistic-2.0"} (name->expressions "Artistic License"))) ; Listed license missing version - we assume the latest - (is (= #{"Artistic-2.0"} (name->expressions "Artistic-2.0"))) - (is (= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD (2 Clause)"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD (2-Clause)"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD (Type 2) Public License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2 Clause"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2 clause license"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause Licence"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause \"Simplified\" License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause license"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD 2-clause \"Simplified\" License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD C2"))) - (is (= #{"BSD-2-Clause"} (name->expressions "BSD-2-Clause"))) - (is (= #{"BSD-2-Clause"} (name->expressions "New BSD 2-clause license"))) - (is (= #{"BSD-2-Clause"} (name->expressions "Simplified BSD License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "Simplified BSD license"))) - (is (= #{"BSD-2-Clause"} (name->expressions "The BSD 2-Clause License"))) - (is (= #{"BSD-2-Clause"} (name->expressions "Two clause BSD license"))) - (is (= #{"BSD-2-Clause-FreeBSD"} (name->expressions "FreeBSD License"))) - (is (= #{"BSD-3-Clause" "MIT"} (name->expressions "New-BSD / MIT"))) ; Missing conjunction, so return 2 (singleton) expressions - (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD"))) - (is (= #{"BSD-3-Clause"} (name->expressions "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) - (is (= #{"BSD-3-Clause"} (name->expressions "3-clause BSD license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "3-clause license (New BSD License or Modified BSD License)"))) - (is (= #{"BSD-3-Clause"} (name->expressions "Aduna BSD license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3 Clause"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause 'New' or 'Revised' License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause \"New\" or \"Revised\" License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD New, Version 3.0"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD-3"))) - (is (= #{"BSD-3-Clause"} (name->expressions "BSD-3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->expressions "Modified BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "New BSD License or Modified BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "New BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "New BSD license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "Revised BSD"))) - (is (= #{"BSD-3-Clause"} (name->expressions "The 3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "The New BSD License"))) - (is (= #{"BSD-3-Clause"} (name->expressions "The New BSD license"))) - (is (= #{"BSD-3-Clause"} (name->expressions "Three Clause BSD-like License"))) -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->expressions "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 - (is (= #{"BSD-3-Clause"} (name->expressions "https://opensource.org/licenses/BSD-3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->expressions "new BSD License"))) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->expressions "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->expressions "The BSD License"))) - (is (= #{"BSL-1.0"} (name->expressions "Boost Software License - Version 1.0"))) - (is (= #{"Beerware"} (name->expressions "Beerware 42"))) - (is (= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE"))) - (is (= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License"))) - (is (= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0"))) - (is (= #{"CC-BY-4.0"} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) - (is (= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0"))) - (is (= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest - (is (= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0"))) - (is (= #{"CC-BY-NC-4.0"} (name->expressions "CC BY-NC"))) ; Listed license missing version - we assume the latest - (is (= #{"CC-BY-NC-ND-3.0"} (name->expressions "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) - (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) - (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported"))) - (is (= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->expressions "CC BY-SA 4.0"))) - (is (= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) - (is (= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal"))) - (is (= #{"CC0-1.0"} (name->expressions "CC0"))) - (is (= #{"CC0-1.0"} (name->expressions "Public domain (CC0)"))) - (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info - (is (= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License"))) ; Listed license missing clause info - (is (= #{"CECILL-2.1"} (name->expressions "CeCILL License"))) ; Listed license missing version - we assume the latest - (is (= #{"CPL-1.0"} (name->expressions "Common Public License - v 1.0"))) - (is (= #{"CPL-1.0"} (name->expressions "Common Public License Version 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "EPL 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "EPL-1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "EPL-v1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License (EPL) - v 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - Version 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0 (EPL-1.0)"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License v 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License v1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License version 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public License, version 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "Eclipse Public Licese - v 1.0"))) - (is (= #{"EPL-1.0"} (name->expressions "https://github.com/cmiles74/uio/blob/master/LICENSE"))) - (is (= #{"EPL-2.0 AND LGPL-3.0-or-later"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0 OR Apache-2.0"} (name->expressions "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! - (is (= #{"EPL-2.0 OR GPL-2.0-or-later"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later"))) - (is (= #{"EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"EPL-2.0 OR GPL-3.0-or-later"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later"))) - (is (= #{"EPL-2.0" "MIT"} (name->expressions "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest ; Missing conjunction, so return 2 (singleton) expressions - (is (= #{"EPL-2.0"} (name->expressions "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (= #{"EPL-2.0"} (name->expressions "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (= #{"EPL-2.0"} (name->expressions "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "EPL"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "EPL-2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "EPLv2"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License - v 2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0,"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License v2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v. 2.0"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v2"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Eclipse public license, the same as Clojure"))) - (is (= #{"EPL-2.0"} (name->expressions "Eclipse"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->expressions "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"EUPL-1.1"} (name->expressions "European Union Public Licence (EUPL v.1.1)"))) - (is (= #{"EUPL-1.1"} (name->expressions "The European Union Public License, Version 1.1"))) - (is (= #{"EUPL-1.2"} (name->expressions "European Union Public Licence v. 1.2"))) - (is (= #{"EUPL-1.2"} (name->expressions "European Union Public License 1.2 or later"))) - (is (= #{"EUPL-1.2"} (name->expressions "European Union Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, Version 2, with the Classpath Exception"))) - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2 with Classpath exception"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License 2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License v2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2.0"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GNU Public License, v2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GPL v2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) - (is (= #{"GPL-2.0-only"} (name->expressions "GPLv2"))) - (is (= #{"GPL-2.0-only"} (name->expressions "The GNU General Public License, Version 2"))) - (is (= #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"GPL-2.0-or-later"} (name->expressions "GNU GPL V2+"))) - (is (= #{"GPL-2.0-or-later"} (name->expressions "GPL 2.0+"))) - (is (= #{"GPL-3.0-only"} (name->expressions " GNU GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v 3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v. 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU GPL, version 3, 29 June 2007"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License V3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License Version 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, Version 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3 (GPLv3)"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU Public License V. 3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU Public License V3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNU public licence V3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GNUv3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL 3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL V3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL v3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL version 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL-3.0-only"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPL3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "GPLv3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "General Public License 3"))) - (is (= #{"GPL-3.0-only"} (name->expressions "General Public License v3.0"))) - (is (= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License v3.0"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL v3+"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU GPLv3+"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License v3.0 or later"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License, Version 3 (or later)"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GNU"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "GPL V3+"))) - (is (= #{"GPL-3.0-or-later"} (name->expressions "GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->expressions "The GNU General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"Hippocratic-2.1"} (name->expressions "Hippocratic License"))) - (is (= #{"ISC WITH Classpath-exception-2.0"} (name->expressions "ISC WITH Classpath-exception-2.0"))) - (is (= #{"ISC"} (name->expressions "ISC Licence"))) - (is (= #{"ISC"} (name->expressions "ISC License"))) - (is (= #{"ISC"} (name->expressions "ISC"))) - (is (= #{"ISC"} (name->expressions "MIT/ISC License"))) - (is (= #{"ISC"} (name->expressions "MIT/ISC"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU LGPL v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License, Version 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Pulic License v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) V2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1-only"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "LGPLv2.1"))) - (is (= #{"LGPL-2.1-only"} (name->expressions "lgpl_v2_1"))) - (is (= #{"LGPL-2.1-or-later"} (name->expressions "GNU Lesser General Public License, version 2.1 or newer"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU General Lesser Public License (LGPL) version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL v3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL version 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL-3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU LGPLv3 "))) ; Note trailing space - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL) Version 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License v3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License, Version 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "L GPL 3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0 (GNU Lesser General Public License)"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL Open Source license"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL v3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0-only"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "LGPLv3"))) - (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, Version 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, v. 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "LGPL-3.0-or-later"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "LGPLv3+"))) - (is (= #{"LGPL-3.0-or-later"} (name->expressions "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space - (is (= #{"Libpng"} (name->expressions "zlib/libpng License"))) - (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->expressions "MIT/Apache-2.0/BSD-3-Clause"))) - (is (= #{"MIT"} (name->expressions " MIT License"))) - (is (= #{"MIT"} (name->expressions "Distributed under an MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT"} (name->expressions "Expat (MIT) license"))) - (is (= #{"MIT"} (name->expressions "MIT LICENSE"))) - (is (= #{"MIT"} (name->expressions "MIT Licence"))) - (is (= #{"MIT"} (name->expressions "MIT Licens"))) - (is (= #{"MIT"} (name->expressions "MIT License (MIT)"))) - (is (= #{"MIT"} (name->expressions "MIT License"))) - (is (= #{"MIT"} (name->expressions "MIT Public License"))) - (is (= #{"MIT"} (name->expressions "MIT license"))) - (is (= #{"MIT"} (name->expressions "MIT public License"))) - (is (= #{"MIT"} (name->expressions "MIT public license"))) - (is (= #{"MIT"} (name->expressions "MIT"))) - (is (= #{"MIT"} (name->expressions "MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT"} (name->expressions "THE MIT LICENSE"))) - (is (= #{"MIT"} (name->expressions "The MIT Licence"))) - (is (= #{"MIT"} (name->expressions "The MIT License (MIT) "))) ; Note trailing space - (is (= #{"MIT"} (name->expressions "The MIT License (MIT) | Open Source Initiative"))) - (is (= #{"MIT"} (name->expressions "The MIT License (MIT)"))) - (is (= #{"MIT"} (name->expressions "The MIT License"))) - (is (= #{"MIT"} (name->expressions "The MIT License."))) - (is (= #{"MIT"} (name->expressions "http://opensource.org/licenses/MIT"))) -; (is (= #{"MIT"} (name->expressions "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 - (is (= #{"MPL-1.0"} (name->expressions "Mozilla Public License Version 1.0"))) - (is (= #{"MPL-1.1"} (name->expressions "Mozilla Public License Version 1.1"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL 2"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL v2"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL"))) ; Listed license missing version - we assume the latest - (is (= #{"MPL-2.0"} (name->expressions "MPL-2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL-v2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "MPL2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public Licence 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License (Version 2.0)"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0+"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License, v. 2.0"))) - (is (= #{"MPL-2.0"} (name->expressions "Mozilla Public License, version 2.0"))) - (is (= #{"NASA-1.3"} (name->expressions "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) - (is (= #{"NASA-1.3"} (name->expressions "NASA Open Source Agreement, Version 1.3"))) - (is (= #{"NCSA"} (name->expressions "University of Illinois/NCSA Open Source License"))) - (is (= #{"Ruby"} (name->expressions "Ruby License"))) - (is (= #{"SGI-B-2.0"} (name->expressions "SGI"))) ; Listed license missing version - we assume the latest - (is (= #{"SMPPL"} (name->expressions "SMPPL"))) - (is (= #{"Unlicense"} (name->expressions "The UnLicense"))) - (is (= #{"Unlicense"} (name->expressions "The Unlicence"))) - (is (= #{"Unlicense"} (name->expressions "The Unlicense"))) - (is (= #{"Unlicense"} (name->expressions "UnLicense"))) - (is (= #{"Unlicense"} (name->expressions "Unlicense License"))) - (is (= #{"Unlicense"} (name->expressions "Unlicense"))) - (is (= #{"Unlicense"} (name->expressions "unlicense"))) - (is (= #{"W3C"} (name->expressions "W3C Software license"))) - (is (= #{"WTFPL"} (name->expressions "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) - (is (= #{"WTFPL"} (name->expressions "DO-WTF-U-WANT-2"))) - (is (= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License"))) - (is (= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License, Version 2"))) - (is (= #{"WTFPL"} (name->expressions "WTFPL v2"))) - (is (= #{"WTFPL"} (name->expressions "WTFPL – Do What the Fuck You Want to Public License"))) - (is (= #{"WTFPL"} (name->expressions "WTFPL"))) - (is (= #{"X11"} (name->expressions "MIT X11 License"))) - (is (= #{"X11"} (name->expressions "MIT/X11"))) - (is (= #{"Zlib"} (name->expressions "Zlib License"))) - (is (= #{"Zlib"} (name->expressions "zlib License"))) - (is (= #{"Zlib"} (name->expressions "zlib license"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "All Rights Reserved"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "All rights reserved"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Private License"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Private"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary License"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Tulos Commercial License"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "Wildbit Proprietary License"))) - (is (= #{(proprietary-or-commercial)} (name->expressions "proprietary"))) - (is (= #{(public-domain)} (name->expressions "Public Domain"))) - (is (= #{(str "GPL-2.0-or-later OR " (name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) - (is (= #{(str "MIT AND " (proprietary-or-commercial))} (name->expressions "Dual MIT & Proprietary"))) + (is (valid= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL v3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPLv3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AGPLv3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License 3.0 (AGPL-3.0)"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, version 3"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License v3 or later (at your option)"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License version 3 or lator"))) ; Typo in "lator" + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU AGPL-V3 or later"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0 WITH LLVM-exception"} (name->expressions "Apache 2.0 with LLVM Exception"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "ASL 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "ASL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2 Public License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2, see LICENSE"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License V2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License V2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0, January 2004"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0."))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, version 2."))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License, version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License - v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software Licesne"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Sofware Licencse 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Sofware License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache V2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache V2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache license version 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache license, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache 2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "apache"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "apache-2.0"))) + (is (valid= #{"Artistic-2.0" "GPL-3.0-only"} (name->expressions "Artistic License/GPL"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"Artistic-2.0"} (name->expressions "Artistic License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Artistic-2.0"} (name->expressions "Artistic-2.0"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (2 Clause)"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (2-Clause)"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (Type 2) Public License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2 Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2 clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause Licence"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause \"Simplified\" License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-clause \"Simplified\" License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD C2"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD-2-Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "New BSD 2-clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Simplified BSD License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Simplified BSD license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "The BSD 2-Clause License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Two clause BSD license"))) + (is (valid= #{"BSD-2-Clause-FreeBSD"} (name->expressions "FreeBSD License"))) + (is (valid= #{"BSD-3-Clause" "MIT"} (name->expressions "New-BSD / MIT"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause license (New BSD License or Modified BSD License)"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Aduna BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3 Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause 'New' or 'Revised' License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause \"New\" or \"Revised\" License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD New, Version 3.0"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD-3"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD-3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Modified BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD License or Modified BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Revised BSD"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The 3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The New BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The New BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Three Clause BSD-like License"))) +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (valid= #{"BSD-3-Clause"} (name->expressions "https://opensource.org/licenses/BSD-3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "new BSD License"))) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "The BSD License"))) + (is (valid= #{"BSL-1.0"} (name->expressions "Boost Software License - Version 1.0"))) + (is (valid= #{"Beerware"} (name->expressions "Beerware 42"))) + (is (valid= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE"))) + (is (valid= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0"))) + (is (valid= #{"CC-BY-NC-4.0"} (name->expressions "CC BY-NC"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CC-BY-NC-ND-3.0"} (name->expressions "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "CC BY-SA 4.0"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0"))) + (is (valid= #{"CC0-1.0"} (name->expressions "Public domain (CC0)"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License"))) ; Listed license missing clause info + (is (valid= #{"CECILL-2.1"} (name->expressions "CeCILL License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CPL-1.0"} (name->expressions "Common Public License - v 1.0"))) + (is (valid= #{"CPL-1.0"} (name->expressions "Common Public License Version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL-1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL-v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License (EPL) - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - Version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0 (EPL-1.0)"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License, version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public Licese - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (valid= #{"EPL-2.0 AND LGPL-3.0-or-later"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR Apache-2.0"} (name->expressions "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later"))) + (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later"))) + (is (valid= #{"EPL-2.0" "MIT"} (name->expressions "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"EPL-2.0"} (name->expressions "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (valid= #{"EPL-2.0"} (name->expressions "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (valid= #{"EPL-2.0"} (name->expressions "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "EPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "EPL-2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "EPLv2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License - v 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0,"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License v2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v. 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse public license, the same as Clojure"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EUPL-1.1"} (name->expressions "European Union Public Licence (EUPL v.1.1)"))) + (is (valid= #{"EUPL-1.1"} (name->expressions "The European Union Public License, Version 1.1"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public Licence v. 1.2"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public License 1.2 or later"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, Version 2, with the Classpath Exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2 with Classpath exception"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPLv2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "The GNU General Public License, Version 2"))) + (is (valid= #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU GPL V2+"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GPL 2.0+"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions " GNU GENERAL PUBLIC LICENSE Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v. 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL, version 3, 29 June 2007"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3 (GPLv3)"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License V. 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU public licence V3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNUv3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3.0-only"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPLv3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "General Public License 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL v3+"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU GPLv3+"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License v3.0 or later"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License, Version 3 (or later)"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GPL V3+"))) + (is (valid= #{"Hippocratic-2.1"} (name->expressions "Hippocratic License"))) + (is (valid= #{"ISC WITH Classpath-exception-2.0"} (name->expressions "ISC WITH Classpath-exception-2.0"))) + (is (valid= #{"ISC"} (name->expressions "ISC Licence"))) + (is (valid= #{"ISC"} (name->expressions "ISC License"))) + (is (valid= #{"ISC"} (name->expressions "ISC"))) + (is (valid= #{"ISC"} (name->expressions "MIT/ISC License"))) + (is (valid= #{"ISC"} (name->expressions "MIT/ISC"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LGPL v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License, Version 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Pulic License v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) V2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1-only"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPLv2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "lgpl_v2_1"))) + (is (valid= #{"LGPL-2.1-or-later"} (name->expressions "GNU Lesser General Public License, version 2.1 or newer"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU General Lesser Public License (LGPL) version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL-3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPLv3 "))) ; Note trailing space + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL) Version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License v3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License, Version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "L GPL 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0 (GNU Lesser General Public License)"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL Open Source license"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0-only"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPLv3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, Version 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, v. 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "LGPL-3.0-or-later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "LGPLv3+"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space + (is (valid= #{"Libpng"} (name->expressions "zlib/libpng License"))) + (is (valid= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->expressions "MIT/Apache-2.0/BSD-3-Clause"))) + (is (valid= #{"MIT"} (name->expressions " MIT License"))) + (is (valid= #{"MIT"} (name->expressions "Distributed under an MIT-style license (see LICENSE for details)."))) + (is (valid= #{"MIT"} (name->expressions "Expat (MIT) license"))) + (is (valid= #{"MIT"} (name->expressions "MIT LICENSE"))) + (is (valid= #{"MIT"} (name->expressions "MIT Licence"))) + (is (valid= #{"MIT"} (name->expressions "MIT Licens"))) + (is (valid= #{"MIT"} (name->expressions "MIT License (MIT)"))) + (is (valid= #{"MIT"} (name->expressions "MIT License"))) + (is (valid= #{"MIT"} (name->expressions "MIT Public License"))) + (is (valid= #{"MIT"} (name->expressions "MIT license"))) + (is (valid= #{"MIT"} (name->expressions "MIT public License"))) + (is (valid= #{"MIT"} (name->expressions "MIT public license"))) + (is (valid= #{"MIT"} (name->expressions "MIT"))) + (is (valid= #{"MIT"} (name->expressions "MIT-style license (see LICENSE for details)."))) + (is (valid= #{"MIT"} (name->expressions "THE MIT LICENSE"))) + (is (valid= #{"MIT"} (name->expressions "The MIT Licence"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT) "))) ; Note trailing space + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT) | Open Source Initiative"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT)"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License."))) + (is (valid= #{"MIT"} (name->expressions "http://opensource.org/licenses/MIT"))) +; (is (valid= #{"MIT"} (name->expressions "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (valid= #{"MPL-1.0"} (name->expressions "Mozilla Public License Version 1.0"))) + (is (valid= #{"MPL-1.1"} (name->expressions "Mozilla Public License Version 1.1"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL 2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL v2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MPL-2.0"} (name->expressions "MPL-2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL-v2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public Licence 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License (Version 2.0)"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0+"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License, v. 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License, version 2.0"))) + (is (valid= #{"NASA-1.3"} (name->expressions "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) + (is (valid= #{"NASA-1.3"} (name->expressions "NASA Open Source Agreement, Version 1.3"))) + (is (valid= #{"NCSA"} (name->expressions "University of Illinois/NCSA Open Source License"))) + (is (valid= #{"Ruby"} (name->expressions "Ruby License"))) + (is (valid= #{"SGI-B-2.0"} (name->expressions "SGI"))) ; Listed license missing version - we assume the latest + (is (valid= #{"SMPPL"} (name->expressions "SMPPL"))) + (is (valid= #{"Unlicense"} (name->expressions "The UnLicense"))) + (is (valid= #{"Unlicense"} (name->expressions "The Unlicence"))) + (is (valid= #{"Unlicense"} (name->expressions "The Unlicense"))) + (is (valid= #{"Unlicense"} (name->expressions "UnLicense"))) + (is (valid= #{"Unlicense"} (name->expressions "Unlicense License"))) + (is (valid= #{"Unlicense"} (name->expressions "Unlicense"))) + (is (valid= #{"Unlicense"} (name->expressions "unlicense"))) + (is (valid= #{"W3C"} (name->expressions "W3C Software license"))) + (is (valid= #{"WTFPL"} (name->expressions "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) + (is (valid= #{"WTFPL"} (name->expressions "DO-WTF-U-WANT-2"))) + (is (valid= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License"))) + (is (valid= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License, Version 2"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL v2"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL – Do What the Fuck You Want to Public License"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL"))) + (is (valid= #{"X11"} (name->expressions "MIT X11 License"))) + (is (valid= #{"X11"} (name->expressions "MIT/X11"))) + (is (valid= #{"Zlib"} (name->expressions "Zlib License"))) + (is (valid= #{"Zlib"} (name->expressions "zlib License"))) + (is (valid= #{"Zlib"} (name->expressions "zlib license"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "All Rights Reserved"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "All rights reserved"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Private License"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Private"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary License"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Tulos Commercial License"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "Wildbit Proprietary License"))) + (is (valid= #{(proprietary-or-commercial)} (name->expressions "proprietary"))) + (is (valid= #{(public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(str "GPL-2.0-or-later OR " (name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) + (is (valid= #{(str "MIT AND " (proprietary-or-commercial))} (name->expressions "Dual MIT & Proprietary"))) (is (unlisted-only? (name->expressions "${license.id}"))) (is (unlisted-only? (name->expressions "A Clojure library for Google Cloud Pub/Sub."))) (is (unlisted-only? (name->expressions "APGL"))) ; Probable typo @@ -695,7 +707,6 @@ (is (unlisted-only? (name->expressions "url"))) (is (unlisted-only? (name->expressions "wisdragon"))) (is (unlisted-only? (name->expressions "wiseloong"))))) -) (comment ; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) From eb41e9297989d7f2a18424ae5f47781bb827ecd0 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 1 Sep 2023 16:12:26 -0700 Subject: [PATCH 14/34] :construction: Ongoing work on issue #3 --- .../ISSUE_TEMPLATE/Invalid_id_constructed.md | 13 + README.md | 7 +- deps.edn | 2 +- src/lice_comb/deps.clj | 45 +- src/lice_comb/files.clj | 49 +- src/lice_comb/impl/matching.clj | 308 ++++++ src/lice_comb/impl/regex_matching.clj | 346 +++++++ src/lice_comb/impl/spdx.clj | 137 +++ src/lice_comb/impl/utils.clj | 12 + src/lice_comb/matching.clj | 692 +------------- src/lice_comb/maven.clj | 35 +- test/lice_comb/deps_test.clj | 235 ++--- test/lice_comb/files_test.clj | 72 +- test/lice_comb/impl_regex_matching_test.clj | 255 +++++ test/lice_comb/matching_test.clj | 873 +++--------------- test/lice_comb/maven_test.clj | 48 +- 16 files changed, 1503 insertions(+), 1626 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/Invalid_id_constructed.md create mode 100644 src/lice_comb/impl/matching.clj create mode 100644 src/lice_comb/impl/regex_matching.clj create mode 100644 src/lice_comb/impl/spdx.clj create mode 100644 test/lice_comb/impl_regex_matching_test.clj diff --git a/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md b/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md new file mode 100644 index 0000000..1dfbd7a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md @@ -0,0 +1,13 @@ +--- +name: 🐛 Invalid SPDX identifier constructed +about: When the library constructs an invalid SPDX identifier. 😢 + +--- + +## `lice-comb` API(s) you were calling, if known: + +_e.g. `lice-comb.deps/deps-licenses`_ + +## Input data that you provided to that API: + +_e.g. a license name, or the URI of a file containing the license text, or the `tools.deps` coordinate of the dependency, etc._ diff --git a/README.md b/README.md index 6631808..aba851c 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ This library leverages, and is inspired by, the *excellent* [SPDX project](https * `lice-comb` (all versions) requires an internet connection. +* `lice-comb` (all versions) assumes Maven is installed and in the `PATH` (but has fallback logic if it isn't). + * `lice-comb` (v2.0+) requires JDK 11 or higher. ## Installation @@ -54,9 +56,10 @@ $ deps-try com.github.pmonks/lice-comb ### 1.x -> 2.x -Implementing [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in the creation of a [new SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library). Because of irreconcilable differences in how that Java library represents license data compared to `lice-comb` v1.x, as well as the addition of support for SPDX license exceptions, it was not possible to retain backwards compatibility. +The implementation of [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in a number of unavoidable breaking changes, including: -The backwards compatibility breaking changes are limited to the (removed) `lice-comb.spdx` namespace however, so if you're not using that namespace you should be unaffected. If you are using that namespace, migration involves migrating to [`clj-spdx`](https://github.com/pmonks/clj-spdx), and (possibly) the `lice-comb.matching` namespace. +* A wholesale change from returning sets of SPDX identifiers to returning sets of SPDX expressions +* The creation of [a dedicated SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library) ## Contributor Information diff --git a/deps.edn b/deps.edn index a4007e6..550d82b 100644 --- a/deps.edn +++ b/deps.edn @@ -25,7 +25,7 @@ tolitius/xml-in {:mvn/version "0.1.1"} hato/hato {:mvn/version "0.9.0"} miikka/clj-base62 {:mvn/version "0.1.1"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.88"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.91"} com.github.pmonks/rencg {:mvn/version "1.0.34"}} :aliases {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 8bee5c3..d9be4b7 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -26,6 +26,7 @@ [lice-comb.impl.data :as lcd] [lice-comb.impl.utils :as lcu])) +;####TODO: FIGURE OUT HOW TO HANDLE METADATA FOR OVERRIDES / FALLBACKS!!!! (def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) (def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) @@ -52,13 +53,17 @@ (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) -(defmulti dep->ids - "Attempt to detect the license(s) in a tools.deps style dep (a MapEntry or - two-element sequence of [groupId/artifactId dep-info])." +(defmulti dep->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps + style dep (a MapEntry or two-element sequence of + `[groupId/artifactId dep-info]`). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) -(defmethod dep->ids :mvn +(defmethod dep->expressions :mvn [dep] (when dep (let [[ga info] (normalise-dep dep) @@ -67,35 +72,39 @@ (if-let [override (check-overrides ga version)] override (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) - license-ids (check-fallbacks ga - (if-let [license-ids (lcmvn/pom->ids pom-uri)] - license-ids - (lcu/nset (mapcat lcf/zip->ids (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too - license-ids))))) + expressions (check-fallbacks ga + (if-let [expressions (lcmvn/pom->expressions pom-uri)] + expressions +;####TODO: MERGE METADATA MAPS!!!! + (lcu/nset (mapcat lcf/zip->expressions (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too + expressions))))) -(defmethod dep->ids :deps +(defmethod dep->expressions :deps [dep] (when dep (let [[ga info] (normalise-dep dep) version (:git/sha info)] (if-let [override (check-overrides ga version)] override - (check-fallbacks ga (lcf/dir->ids (:deps/root info))))))) + (check-fallbacks ga (lcf/dir->expressions (:deps/root info))))))) -(defmethod dep->ids nil +(defmethod dep->expressions nil [_]) -(defmethod dep->ids :default +(defmethod dep->expressions :default [dep] - (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) {:dep dep}))) + (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) + {:dep dep}))) -(defn deps-licenses - "Attempt to detect the license(s) in a tools.deps 'lib map', returning a new - lib map with the licenses assoc'ed in (in key :lice-comb/licenses)" +(defn deps-expressions + "Attempt to detect the SPDX license expression(s) in a tools.deps 'lib map', + returning a new lib map with the licenses assoc'ed in (in key + `:lice-comb/license-expressions`)" [deps] (when deps (into {} - (pmap #(let [[k v] %] [k (assoc v :lice-comb/licenses (dep->ids [k v]))]) deps)))) +;####TODO: CHECK WHETHER METADATA MAPS NEED TO BE MERGED!!!! + (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-expressions (dep->expressions [k v]))]) deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 0935a81..7938c78 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -51,38 +51,51 @@ (throw (java.nio.file.NotDirectoryException. (str dir)))) (throw (java.io.FileNotFoundException. (str dir))))))) -(defn file->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given +(defn file->expressions + "Attempts to determine the SPDX license expression(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, the associated filename should also be - provided as the second parameter (it is unnecessary in other cases)." - ([f] (file->ids f (lcu/filename f))) + provided as the second parameter (it is unnecessary in other cases). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([f] (file->expressions f (lcu/filename f))) ([f fname] (when (and f fname) (let [fname (s/lower-case fname)] - (cond (= fname "pom.xml") (lcmvn/pom->ids f) - (s/ends-with? fname ".pom") (lcmvn/pom->ids f) + (cond (= fname "pom.xml") (lcmvn/pom->expressions f) + (s/ends-with? fname ".pom") (lcmvn/pom->expressions f) :else (lcmtch/text->ids (io/input-stream f))))))) ; Default is to assume it's a plain text file containing license text(s) -(defn dir->ids - "Attempt to detect the license(s) in a directory. dir may be a String or a - java.io.File, both of which must refer to a directory." +(defn dir->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir + may be a String or a java.io.File, both of which must refer to a + readable directory. + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." [dir] (when dir - (lcu/nset (mapcat file->ids (probable-license-files dir))))) +;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! + (lcu/nset (mapcat file->expressions (probable-license-files dir))))) + +(defn zip->expressions + "Attempt to detect the SPDX license expression(s) in a ZIP file. zip may be a + String or a java.io.File, both of which must refer to a ZIP-format compressed + file. -(defn zip->ids - "Attempt to detect the license(s) in a ZIP file. zip may be a String or a - java.io.File, both of which must refer to a ZIP-format compressed file." + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." [zip] (when zip (let [zip-file (io/file zip)] (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] - (loop [licenses nil - entry (.getNextEntry zip-is)] + (loop [result #{} + entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (set/union licenses (file->ids zip-is (lcu/filename entry))) (.getNextEntry zip-is)) - (recur licenses (.getNextEntry zip-is))) - (doall (some-> (seq licenses) set)))))))) ; Realise the result before we exit the `with-open` scope +;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! + (recur (set/union result (file->expressions zip-is (lcu/filename entry))) (.getNextEntry zip-is)) + (recur result (.getNextEntry zip-is))) + (doall (some-> (seq result) set)))))))) ; De-lazy the result before we exit the with-open scope diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj new file mode 100644 index 0000000..e668146 --- /dev/null +++ b/src/lice_comb/impl/matching.clj @@ -0,0 +1,308 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.matching + "Matching helper functionality. Note: this namespace is not part of + the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.set :as set] + [clojure.java.io :as io] + [hato.client :as hc] + [spdx.exceptions :as se] + [spdx.matching :as sm] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.regex-matching :as lcirm] + [lice-comb.impl.3rd-party :as lc3] + [lice-comb.impl.utils :as lcu])) + +(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 + :redirect-policy :always + :cookie-policy :none}))) + +(defn- fix-public-domain-cc0 + "If the set of ids includes both CC0-1.0 and lice-comb's public domain + LicenseRef, remove the LicenseRef as it's redundant." + [ids] + (if (and (contains? ids (lcis/public-domain)) + (contains? ids "CC0-1.0")) + (disj ids (lcis/public-domain)) + ids)) + +(defn manual-fixes + "Manually fix certain invalid combinations of license identifiers in a set." + [ids] + (when ids + (some-> ids + fix-public-domain-cc0 + set))) + +(defmulti text->ids + "Attempts to determine the SPDX license and/or exception identifier(s) (a set) + within the given license text (a String, Reader, InputStream, or something + that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). + The result has metadata attached that describes how the identifiers were + determined. + + Notes: + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames through clojure.java.io/file first" + {:arglists '([text])} + type) + +(defmethod text->ids java.lang.String + [s] + ; These clj-spdx APIs are *expensive*, so we paralellise them + (let [f-lic (future (sm/licenses-within-text s @lcis/license-ids-d)) + f-exc (future (sm/exceptions-within-text s @lcis/exception-ids-d)) + ids (manual-fixes (set/union @f-lic @f-exc))] + (when ids + (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-matching}]) ids)))))) + +(defmethod text->ids java.io.Reader + [r] + (let [sw (java.io.StringWriter.)] + (io/copy r sw) + (text->ids (str sw)))) + +(defmethod text->ids java.io.InputStream + [is] + (text->ids (io/reader is))) + +(defmethod text->ids :default + [src] + (when src + (with-open [r (io/reader src)] + (text->ids r)))) + +(defn- cdn-uri + "Converts raw URIs into CDN URIs, for these 'known' hosts: + + * github.com e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE + + If the given URI is not known, returns the input unchanged." + [uri] + (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] + (case (s/lower-case (.getHost uri-obj)) + "github.com" (-> uri + (s/replace #"(?i)github\.com" "raw.githubusercontent.com") + (s/replace "/blob/" "/")) + uri) ; Default case + uri)) + +(defn- attempt-text-http-get + "Attempts to get plain text as a String from the given URI, returning nil if + unable to do so (including for error conditions - there is no way to + disambiguate errors from non-text content, for example)." + [uri] + (when (lcu/valid-http-uri? uri) + (try + (when-let [response (hc/get (cdn-uri uri) + {:http-client @http-client-d + :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (when (= :text/plain (:content-type response)) + (:body response))) + (catch Exception _ + nil)))) + +; TODO: THIS MAY BE UNNECESSARY AND IF SO SHOULD BE REMOVED +(comment +(defn listed-name->ids + "Returns the SPDX license and/or exception identifier(s) (a set) for + the given license name (matched case insensitively), or nil if there + aren't any. + + Note that SPDX license names are not guaranteed to be unique - see + https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" + [name] + (when-not (s/blank? name) + (get @lcis/index-name-to-id-d (s/trim (s/lower-case name))))) +) + +(defn uri->ids + "Returns the SPDX license and/or exception identifiers (a set) for the given + uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions. + + The result has metadata attached that describes how the identifiers were + determined." + [uri] + (when-not (s/blank? uri) + (manual-fixes + (let [suri (lcu/simplify-uri uri)] + ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) + (if-let [ids (get @lcis/index-uri-to-id-d suri)] + (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-listed-uri :source (list uri)}]) ids))) + ; Second, attempt to retrieve the text/plain contents of the uri and perform full license matching on it + (when-let [license-text (attempt-text-http-get uri)] + (when-let [ids (text->ids license-text)] + (let [metadata (lcu/mapfonv #(assoc % :source (conj (:source %) (str uri ""))) (meta ids))] ; Append to existing metadata returned from text->ids + (with-meta ids metadata))))))))) + +(defn- string->ids-info + "Converts the given String into a sequence of singleton maps, each of which + has a key is that is an SPDX identifier (either a listed SPDX license or + exception id if the value is recognised, or a lice-comb specific 'unlisted' + LicenseRef if not), and whose value is meta-information about how that + identifier was found. The result sequence is ordered in the same order of + appearance as the source values in s. + + This involves: + 1. Seeing if it's a listed license or exception id + 2. Seeing if it's a listed license or exception name + 3. Checking if the value is a URI, and if so performing URI matching on it + 4. Using regexes to attempt to identify the license(s) and/or + exception(s) + 5. Returning a lice-comb specific 'unlisted' LicenseRef" + [s] + (when-not (s/blank? s) + ; 1. Is it an SPDX license or exception id? + (let [s (s/trim s)] + (if-let [id (get @lcis/spdx-ids-d (s/lower-case s))] + (if (= id s) + (list {id {:type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)}}) + (list {id {:type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)}})) + ; 2. Is it an SPDX license or exception name? + (if-let [ids (get @lcis/index-name-to-id-d (s/trim (s/lower-case s)))] + (map #(hash-map % {:type :concluded :confidence :low :strategy :spdx-listed-name :source (list s)}) ids) + ; 3. Is it a URI? If so, perform URI matching on it (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) + (if-let [ids (uri->ids s)] + (mapcat #(list {(key %) (val %)}) (meta ids)) + ; 4. Attempt regex name matching + (if-let [ids (lcirm/match-regexes s)] + (map #(hash-map % {:type :concluded :confidence :low :strategy :regex-matching :source (list s)}) ids) + ; 5. Give up and return a lice-comb "unlisted" LicenseRef + (list {(lcis/name->unlisted s) {:type :concluded :confidence :low :strategy :unlisted :source (list s)}})))))))) + +(defn- filter-blanks + "Filter blank strings out of coll" + [coll] + (when (seq coll) + (seq (filter #(or (not (string? %)) (not (s/blank? %))) coll)))) + +(defn- map-split-and-interpose + "Maps over the given sequence, splitting strings using the given regex + and interposing the given value, returning a (flattened) sequence." + [re int coll] + (mapcat #(if-not (string? %) + [%] + (let [splits (s/split % re)] + (if (nil? int) + splits + (interpose int splits)))) + coll)) + +(defn split-on-operators + "Case insensitively splits a string based on license operators (and, + or, with), but only if they're not also part of a license name (e.g. + 'Common Development and Distribution License', 'GNU General Public + License version 2.0 or (at your option) any later version', etc.)." + [s] + (when-not (s/blank? s) + (->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b") + (map-split-and-interpose #"(?i)(\band|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) + (map-split-and-interpose #"(?i)\bor(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?)))" :or) + (map-split-and-interpose #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) + filter-blanks + (map #(if (string? %) (s/trim %) %))))) + +(def ^:private push conj) ; With lists-as-stacks conj == push + +(defn- process-expression-element + "Processes a single new expression element e (either a keyword representing + an SPDX operator, or an SPDX identifier) in the context of stack (list) s." + [s e] + (if (keyword? e) + ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword + (if (= (peek s) e) + s + (push s e)) + ; e is a singleton map with an SPDX identifier as a key: depending on how many keywords are currently at the top of s... + (case (count (take-while keyword? s)) + ; No keywords? Push e onto s + 0 (push s e) + ; One keyword? See if we should "collapse" the prior value, the keyword and e into an SPDX expression fragment and push the result onto s + 1 (let [kw (peek s) + operator (s/upper-case (name kw)) + s-minus-1 (pop s) + prior (peek s-minus-1) + s-minus-2 (pop s-minus-1)] + (if (nil? prior) + (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on + (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s + (se/listed-id? e)) + (push s-minus-2 (s/join " " [prior operator e])) + (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on + ; Many keywords? That's invalid (since we dedupe them when they get pushed on, so this means they're different), so drop all of them and push e onto s + (push (drop-while keyword? s) e)))) + +(defn- build-spdx-expressions + "Builds a set of SPDX expressions from the given list of strings & keywords." + [l] + (loop [result '() + f (first l) + r (rest l)] + (if f + (recur (process-expression-element result f) (first r) (rest r)) + (some-> (seq (reverse result)) ; Remember to reverse the expressions, since lists-as-stacks grow at the front, not the end + set + manual-fixes)))) + +(defn attempt-to-build-expressions + "Attempts to build SPDX expression(s) (a set of strings) from the + given name. The result has metadata attached that describes how the + identifiers were determined." + [name] + (when-let [partial-expressions (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids-info %))) + flatten + seq)] + (let [spdx-expressions (build-spdx-expressions (map #(if (keyword? %) % (first (keys %))) partial-expressions)) + metadata (into {} (filter (complement keyword?) partial-expressions))] + (with-meta spdx-expressions metadata)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (lcis/init!) + (lcirm/init!) + @http-client-d + nil) diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj new file mode 100644 index 0000000..8e4b6f8 --- /dev/null +++ b/src/lice_comb/impl/regex_matching.clj @@ -0,0 +1,346 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.regex-matching + "Helper functionality focused on regex matching. Note: this namespace is not + part of the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [rencg.api :as rencg] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.utils :as lcu])) + +(defn- get-rencgs + "Get a value for an re-ncg, potentially looking at multiple ncgs in order + until a non-blank value is found. Also trims and lower-cases the value, and + replaces all whitespace with a single space." + ([m names] (get-rencgs m names nil)) + ([m names default] + (loop [f (first names) + r (rest names)] + (if f + (let [value (get m f)] + (if (s/blank? value) + (recur (first r) (rest r)) + (-> value + (s/trim) + (s/lower-case) + (s/replace #"\s+" " ")))) + default)))) + +(defn- assert-listed-id + "Checks that the id is a listed SPDX identifier (license or exception) and + throws if not. Returns the id." + [id] + (if (or (contains? @lcis/license-ids-d id) + (contains? @lcis/exception-ids-d id)) + id + (throw (ex-info (str "Invalid SPDX id constructed: '" id + "'' - please raise an issue at " + "https://github.com/pmonks/lice-comb/issues/new?assignees=pmonks&labels=bug&template=Invalid_id_constructed.md&title=Invalid+SPDX+identifer+constructed:+" id) + {:id id})))) + +(defn- generic-id-constructor + "A generic SPDX id constructor which works for many simple regexes." + [m] + (when m + (let [id (str (:id m) + (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] + (str "-" + ver + (when (and (:pad-ver? m) + (not (s/includes? ver "."))) + ".0"))))] + (assert-listed-id id)))) + +(defn- number-name-to-number + "Converts the name of a number to that number (as a string). e.g. + \"two\" -> \"2\". Returns s unchanged if it's not a number name." + [^String s] + (when s + (case s + "two" "2" + "three" "3" + "four" "4" + s))) + +(defn- bsd-id-constructor + "An SPDX id constructor specific to the BSD family of licenses." + [m] + (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) + clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) + preferred-clause-count (case [(lcu/is-digits? clause-count1) (lcu/is-digits? clause-count2)] + [true true] clause-count1 + [true false] clause-count1 + [false true] clause-count2 + (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) + clause-count1 + clause-count2)) + clause-count (case preferred-clause-count + ("2" "simplified") "2" + ("3" "new" "revised" "modified" "aduna") "3" + "4") ; Note: we default to 4 clause, since it was the original form of the BSD license + suffix (case (get-rencgs m ["suffix"]) + "patent" "Patent" + "views" "Views" + "attribution" "Attribution" + "clear" "Clear" + "lbnl" "LBNL" + "modification" "Modification" + ("no military license" "no military licence") "No-Military-License" + ("no nuclear license" "no nuclear licence") "No-Nuclear-License" + ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" + "no nuclear warranty" "No-Nuclear-Warranty" + "open mpi" "Open-MPI" + "shortened" "Shortened" + "uc" "UC" + nil) + base-id (str (:id m) "-" clause-count "-Clause") + id-with-suffix (str base-id "-" suffix)] + (if (contains? @lcis/license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it + id-with-suffix + (assert-listed-id base-id)))) + +(defn- cc-id-constructor + "An SPDX id constructor specific to the Creative Commons family of licenses." + [m] + (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) + nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) + sa? (not (s/blank? (get-rencgs m ["sharealike"]))) + version (let [ver (s/replace (get-rencgs m ["version"] (:latest-ver m)) #"\p{Punct}+" ".")] + (if (s/includes? ver ".") + ver + (str ver ".0"))) + base-id (str "CC-BY-" + (when nc? "NC-") + (when nd? "ND-") + (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND + version) + region (case (get-rencgs m ["region"]) + "australia" "AU" + "austria" "AT" + ("england" "england and wales" "england & wales" "uk") "UK" + "france" "FR" + "germany" "DE" + "igo" "IGO" + "japan" "JP" + "netherlands" "NL" + ("united states" "usa" "us") "US" + nil) + id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] + (if (contains? @lcis/license-ids-d id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it + id-with-region + (assert-listed-id base-id)))) + +(defn- gpl-id-constructor + "An SPDX id constructor specific to the GNU family of licenses." + [m] + (let [variant (cond (contains? m "agpl") "AGPL" + (contains? m "lgpl") "LGPL" + (contains? m "gpl") "GPL") + version (let [ver (s/replace (get-rencgs m ["version"] (:latest-ver m)) #"\p{Punct}+" ".")] + (if (s/includes? ver ".") + ver + (str ver ".0"))) + suffix (if (contains? m "orLater") + "or-later" + "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix + id (str variant "-" version "-" suffix)] + (assert-listed-id id))) + +(defn- simple-regex-match + "Constructs a 'simple' name match structure" + [s] + {:id s + :regex (re-pattern (str "(?i)\\b" s "\\b")) + :fn (constantly s)}) + +; The regex for the GNU family is a nightmare, so we build it up (and test it) in pieces +(def agpl-re #"(?AGPL|Affero)(\s+GNU)?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?AGPL\)?)?") +(def lgpl-re #"(?L\s?GPL|GNU\s+(Library|Lesser)|(Library|Lesser)\s+(L?GPL|General\s+Public\s+Licen[cs]e))(\s+or\s+Lesser)?(\s+General)?(\s+Pub?lic)?(\s+Licen[cs]e)?(\s+\(?LGPL\)?)?") +(def gpl-re #"(?GNU(?!\s+Classpath)|(?\d+([\._]\d+)?)?") +(def only-or-later-re #"[\s-]*((?only)|(\(?or(\s+\(?at\s+your\s+(option|discretion)\)?)?(\s+any)?)?([\s-]*(?later|lator|newer|\+)))?") +(def gnu-re (lcu/re-concat "(?x)(?i)\\b(\n# Alternative 1: AGPL\n" + agpl-re + "\n# Alternative 2: LGPL\n|" + lgpl-re + "\n# Alternative 3: GPL\n|" + gpl-re + "\n)\n# Version\n" + version-re + "\n# Only/or-Later suffix\n" + only-or-later-re)) + +; Regexes used for license name matching, along with functions for constructing an SPDX id from them +(def ^:private license-name-matching-d (delay + (concat + ; By default we add most SPDX ids as "simple" regex matches + (map simple-regex-match (disj @lcis/license-ids-d "MIT" "Zlib")) + (map simple-regex-match (disj @lcis/exception-ids-d "Classpath-exception-2.0")) + [ + {:id "AFL" + :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "3.0"} + {:id "Apache" + :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Artistic" + :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Beerware" + :regex #"(?i)\bBeer-?ware\b" + :fn (constantly "Beerware")} + {:id "BSL" + :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "BSD" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" + :fn bsd-id-constructor} + {:id "CC0" + :regex #"(?i)\bCC\s*0" + :fn (constantly "CC0-1.0")} + {:id "CECILL" + :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.1"} + {:id "Classpath-exception" + :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "CDDL" + :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.1"} + {:id "CPL" + :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "Creative commons family" + :regex #"(?i)(\bCC\sBY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" + :fn cc-id-constructor + :pad-ver? true + :latest-ver "4.0"} + {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 + :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "EUPL" + :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.2"} + {:id "FreeBSD" + :regex #"(?i)\bFreeBSD\b" + :fn (constantly "BSD-2-Clause-FreeBSD")} + {:id "GNU license family" + :regex gnu-re + :fn gpl-id-constructor + :pad-ver? true + :latest-ver 3.0} + {:id "Hippocratic" + :regex #"(?i)\bHippocratic\b" + :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license + {:id "LLVM-exception" + :regex #"(?i)\bLLVM[\s-]+Exception\b" + :fn (constantly "LLVM-exception")} + {:id "MIT" + :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" + :fn (constantly "MIT")} + {:id "MPL" + :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "MX4J" + :regex #"(?i)\bMX4J\s+Licen[cs]e(,?\s+v(ersion)?\s*1\.0)?\b" + :fn (constantly "Apache-1.1")} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX + {:id "NASA" + :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.3"} + {:id "Plexus" + :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" + :fn (constantly "Plexus")} + {:id "Proprietary or commercial" + :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" + :fn lcis/proprietary-commercial} + {:id "Public Domain" + :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" + :fn lcis/public-domain} + {:id "Ruby" + :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" + :fn (constantly "Ruby")} + {:id "SGI-B" + :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Unlicense" + :regex #"(?i)\bUnlicen[cs]e\b" + :fn (constantly "Unlicense")} + {:id "WTFPL" + :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" + :fn (constantly "WTFPL")} + {:id "Zlib" + :regex #"\b(?i)zlib(?![\s/]+libpng)\b" + :fn (constantly "Zlib")} + ]))) + +(defn- match-regex + "Returns a map containing the SPDX :id and :start index of the given + regex in the string if a match occurred, or nil if there was no match." + [s elem] + (when-let [match (rencg/re-find-ncg (:regex elem) s)] + {:id ((:fn elem) (merge {:name s} elem match)) + :start (:start match)})) + +(defn match-regexes + "Returns a sequence (NOT A SET!) of the matched SPDX license or + exception ids for the given string, or nil if there were no matches. + Results are in the order in which they appear in the string." + [s] + (some->> (seq (filter identity (pmap (partial match-regex s) @license-name-matching-d))) + (sort-by :start) + (map :id) + distinct)) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (lcis/init!) + @license-name-matching-d + nil) diff --git a/src/lice_comb/impl/spdx.clj b/src/lice_comb/impl/spdx.clj new file mode 100644 index 0000000..20fbe96 --- /dev/null +++ b/src/lice_comb/impl/spdx.clj @@ -0,0 +1,137 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.spdx + "SPDX-related functionality. Note: this namespace is not part of the public + API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [lice-comb.impl.utils :as lcu])) + +; The subset of SPDX license identifiers that we use; specifically excludes the superceded deprecated GPL family identifiers +(def license-ids-d + (delay + (disj (set (filter #(not (s/ends-with? % "+")) (sl/ids))) + "AGPL-1.0" "AGPL-3.0" "GPL-1.0" "GPL-2.0" "GPL-3.0" "LGPL-2.0" "LGPL-2.1" "LGPL-3.0" + "GPL-2.0-with-autoconf-exception" "GPL-2.0-with-bison-exception" "GPL-2.0-with-classpath-exception" + "GPL-2.0-with-font-exception" "GPL-2.0-with-GCC-exception" "GPL-3.0-with-autoconf-exception" + "GPL-3.0-with-GCC-exception"))) + +; The subset of SPDX exception identifiers that we use; right now this is all of them (placeholder in case we need to use a subset in future) +(def exception-ids-d (delay (se/ids))) + +; The license and exception lists +(def license-list-d (delay (map sl/id->info @license-ids-d))) +(def exception-list-d (delay (map se/id->info @exception-ids-d))) + +; The unlisted license refs lice-comb uses (note: the unlisted one usually has a hyphen then a base62 suffix appended) +(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") +(def ^:private proprietary-commercial-license-ref "LicenseRef-lice-comb-PROPRIETARY-COMMERCIAL") +(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") + +; Lower case id map +(def spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) @license-ids-d)) + (into {} (map #(vec [(s/lower-case %) %]) @exception-ids-d))))) + +(defn- name-to-id-tuple + [list-entry] + [(s/lower-case (s/trim (:name list-entry))) (:id list-entry)]) + +(def index-name-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) + (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) + +(defn- urls-to-id-tuples + "Extracts all urls for a given list (license or exception) entry." + [list-entry] + (let [id (:id list-entry) + simplified-uris (map lcu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] + (map #(vec [% id]) simplified-uris))) + +(def index-uri-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) + (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) + +(defn public-domain? + "Is the given id lice-comb's custom 'public domain' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case public-domain-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing public domain." + :arglists '([])} + public-domain + (constantly public-domain-license-ref)) + +(defn proprietary-commercial? + "Is the given id lice-comb's custom 'proprietary / commercial' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing a proprietary / commercial license." + :arglists '([])} + proprietary-commercial + (constantly proprietary-commercial-license-ref)) + +(defn unlisted? + "Is the given id a lice-comb custom 'unlisted' LicenseRef?" + [id] + (when id + (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-ref-prefix)))) + +(defn name->unlisted + "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an + unlisted license, with the given name appended as Base62 (since clj-spdx + identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." + [name] + (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) + +(defn unlisted->name + "Get the original name of the given unlisted license. Returns nil if id is nil + or is not a lice-comb's unlisted LicenseRef." + [id] + (when (unlisted? id) + (str "Unlisted (" + (if (> (count id) (count unlisted-license-ref-prefix)) + (lcu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) + "-original name not available-") + ")"))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + ; Parallelise initialisation of the spdx.licenses and spdx.exceptions namespaces, as they're both sloooooooow (~1.5 mins total) + (let [sl-init (future (sl/init!)) + se-init (future (se/init!))] + @sl-init + @se-init) + + ; Serially initialise this namespace's dependent state - they're all pretty fast (< 1s) + @license-ids-d + @exception-ids-d + @license-list-d + @exception-list-d + @spdx-ids-d + @index-uri-to-id-d + @index-name-to-id-d + nil) diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 79feb7c..6190d95 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -50,6 +50,13 @@ [^String s] (when s (s/trim s))) +(defn is-digits? + "Does the given string contains digits only?" + [^String s] + (boolean ; Eliminate nil-punning + (when-not (s/blank? s) + (every? #(Character/isDigit ^Character %) s)))) + (defn nset "nil preserving version of clojure.core/set" [coll] @@ -81,6 +88,11 @@ \> "\\>" }))) +(defn re-concat + "Concatenate all of the given regexes or strings into a single regex." + [& res] + (re-pattern (apply str res))) + (defn base62-encode "Encodes the given string to Base62/UTF-8." [^String s] diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 751a2ff..a43a572 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -19,86 +19,28 @@ (ns lice-comb.matching "Matching functionality, some of which is provided by https://github.com/pmonks/clj-spdx" - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [clojure.pprint :as pp] - [hato.client :as hc] - [spdx.licenses :as sl] - [spdx.exceptions :as se] - [spdx.matching :as sm] - [spdx.expressions :as sexp] - [rencg.api :as rencg] - [lice-comb.impl.3rd-party :as lc3] - [lice-comb.impl.utils :as lcu])) - -; The subset of SPDX license identifiers that we use; specifically excludes the deprecated 'historical oddity' GPL family identifiers -(def ^:private license-ids-d - (delay - (disj (set (filter #(not (s/ends-with? % "+")) (sl/ids))) - "AGPL-1.0" "AGPL-3.0" "GPL-1.0" "GPL-2.0" "GPL-3.0" "LGPL-2.0" "LGPL-2.1" "LGPL-3.0"))) - -; The subset of SPDX exception identifiers that we use; right now this is all of them (this is a placeholder) -(def ^:private exception-ids-d (delay (se/ids))) - -; The license and exception lists -(def ^:private license-list-d (delay (map sl/id->info @license-ids-d))) -(def ^:private exception-list-d (delay (map se/id->info @exception-ids-d))) - -; The unlisted license refs lice-comb uses (note: the unlisted one usually has a base62 suffix appended) -(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") -(def ^:private proprietary-commercial-license-ref "LicenseRef-lice-comb-PROPRIETARY-OR-COMMERCIAL") -(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") - -; Lower case id map -(def ^:private spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) @license-ids-d)) - (into {} (map #(vec [(s/lower-case %) %]) @exception-ids-d))))) + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [spdx.expressions :as sexp] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.matching :as lcim] + [lice-comb.impl.utils :as lcu])) (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" [id] - (= (s/lower-case id) (s/lower-case public-domain-license-ref))) - -(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) - representing public domain." - :arglists '([])} - public-domain - (constantly public-domain-license-ref)) + (lcis/public-domain? id)) -(defn proprietary-or-commercial? - "Is the given id lice-comb's custom 'proprietary or commercial' LicenseRef?" +(defn proprietary-commercial? + "Is the given id lice-comb's custom 'proprietary / commercial' LicenseRef?" [id] - (= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref))) - -(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) - representing a proprietary or commercial license." - :arglists '([])} - proprietary-or-commercial - (constantly proprietary-commercial-license-ref)) + (lcis/proprietary-commercial? id)) (defn unlisted? "Is the given id a lice-comb custom 'unlisted' LicenseRef?" [id] - (when id - (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-ref-prefix)))) - -(defn name->unlisted - "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an - unlisted license, with the given name appended as Base62 (since clj-spdx - identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." - [name] - (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) - -(defn unlisted->name - "Get the original name of the given unlisted license. Returns nil if id is nil - or is not a lice-comb's unlisted LicenseRef." - [id] - (when (unlisted? id) - (str "Unlisted (" - (if (> (count id) (count unlisted-license-ref-prefix)) - (lcu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) - "-original name not available-") - ")"))) + (lcis/unlisted? id)) (defn id->name "Returns the human readable name of the given license or exception identifier; @@ -107,41 +49,14 @@ verbatim if unable to determine a name. Returns nil if the id is blank." [id] (when-not (s/blank? id) - (cond (sl/listed-id? id) (:name (sl/id->info id)) - (se/listed-id? id) (:name (se/id->info id)) - (public-domain? id) "Public domain" - (proprietary-or-commercial? id) "Proprietary or commercial" - (unlisted? id) (unlisted->name id) - :else id))) - -(defn- fix-public-domain-cc0 - [ids] - (if (and (contains? ids public-domain-license-ref) - (contains? ids "CC0-1.0")) - (disj ids public-domain-license-ref) - ids)) - -(defn- fix-ids-that-end-with-plus - [ids] - (some-> (seq (map #(s/replace % #"\+\z" "-or-later") ids)) ; Note: assumes that all SPDX license identifiers that end in '+' also have a variant that ends in '-or-later' (which is known to be true up to 2023-07-01, and I expect to remain true going forward thanks to SPDX expressions) - set)) - -(defn- fix-classpath-exception - [ids] - (if (contains? ids "GPL-2.0-with-classpath-exception") - (conj (disj ids "GPL-2.0-with-classpath-exception") "GPL-2.0-only" "Classpath-exception-2.0") - ids)) - -(defn- manual-fixes - "Manually fix certain combinations of license identifiers." - [ids] - (when ids - (-> ids - fix-public-domain-cc0 - fix-ids-that-end-with-plus - fix-classpath-exception))) - -(defmulti text->ids + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (public-domain? id) "Public domain" + (proprietary-commercial? id) "Proprietary/commercial" + (unlisted? id) (lcis/unlisted->name id) + :else id))) + +(defn text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). @@ -152,76 +67,12 @@ * the caller is expected to open & close a Reader or InputStream passed to this function (e.g. using clojure.core/with-open) * you cannot pass a String representation of a filename to this method - you - should pass filenames through clojure.java.io/file first" - {:arglists '([text])} - type) + should pass filenames through clojure.java.io/file first -(defmethod text->ids java.lang.String - [s] - ; These clj-spdx APIs are *expensive*, so we paralellise them - (let [f-lic (future (sm/licenses-within-text s @license-ids-d)) - f-exc (future (sm/exceptions-within-text s))] - (manual-fixes (set/union @f-lic @f-exc)))) - -(defmethod text->ids java.io.Reader - [r] - (let [sw (java.io.StringWriter.)] - (io/copy r sw) - (text->ids (str sw)))) - -(defmethod text->ids java.io.InputStream - [is] - (text->ids (io/reader is))) - -(defmethod text->ids :default - [src] - (when src - (with-open [r (io/reader src)] - (text->ids r)))) - -(defn- urls-to-id-tuples - "Extracts all urls for a given list (license or exception) entry." - [list-entry] - (let [id (:id list-entry) - simplified-uris (map lcu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] - (map #(vec [% id]) simplified-uris))) - -(def ^:private index-uri-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) - (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) - -(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 - :redirect-policy :always - :cookie-policy :none}))) - -(defn- github-raw-uri - "Converts a GitHub UI URI into a GitHub CDN URI. - e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE - - If the given URI is not a GitHub UI URI, returns the input unchanged." - [uri] - (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] - (if (= "github.com" (s/lower-case (.getHost uri-obj))) - (-> uri - (s/replace #"(?i)github\.com" "raw.githubusercontent.com") - (s/replace "/blob/" "/")) - uri) - uri)) - -(defn- attempt-text-http-get - "Attempts to get plain text as a String from the given URI, returning nil if - unable to do so (including for error conditions - there is no way to - disambiguate errors from non-text content, for example)." - [uri] - (when (lcu/valid-http-uri? uri) - (try - (when-let [response (hc/get (github-raw-uri uri) - {:http-client @http-client-d - :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 - :header {"user agent" "com.github.pmonks/lice-comb"}})] - (when (= :text/plain (:content-type response)) - (:body response))) - (catch Exception _ - nil)))) + The result has metadata attached that describes how the identifiers were + determined." + [text] + (lcim/text->ids text)) (defn uri->ids "Returns the SPDX license and/or exception identifiers (a set) for the given @@ -238,477 +89,37 @@ ignoring extensions representing MIME types (.txt vs .html, etc.), etc. See lice-comb.impl.utils/simplify-uri for exact details. 2. URIs in the SPDX license and exception lists are not unique - the same URI - may represent multiple licenses and/or exceptions." - [uri] - (when-not (s/blank? uri) - (manual-fixes - (let [suri (lcu/simplify-uri uri)] - ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) - (if-let [result (get @index-uri-to-id-d suri)] - result - ; Second, attempt to retrieve the text/plain contents of the uri and perform full license matching on it - (when-let [license-text (attempt-text-http-get uri)] - (text->ids license-text))))))) - -(defn- name-to-id-tuple - [list-entry] - [(s/lower-case (s/trim (:name list-entry))) (:id list-entry)]) - -(def ^:private index-name-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) - (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) - -(defn- listed-name->ids - "Returns the SPDX license and/or exception identifier(s) (a set) for the given license name - (matched case insensitively), or nil if there aren't any. + may represent multiple licenses and/or exceptions. - Note that SPDX license names are not guaranteed to be unique - see - https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" - [name] - (when-not (s/blank? name) - (get @index-name-to-id-d (s/trim (s/lower-case name))))) - -(defn- parse-expression-and-extract-ids - "Parse s as if it were an SPDX expression, and if it is, extract all ids - (for licenses and exceptions) out of it." - [s] - (when-let [expression (sexp/parse s)] - (sexp/extract-ids expression))) - -(defn- get-rencgs - "Get a value for an re-ncg, potentially looking at multiple ncgs in order until a non-blank value is found. Also trims and lower-cases the value, and replaces all whitespace with a single space." - ([m names] (get-rencgs m names nil)) - ([m names default] - (loop [f (first names) - r (rest names)] - (if f - (let [value (get m f)] - (if (s/blank? value) - (recur (first r) (rest r)) - (-> value - (s/trim) - (s/lower-case) - (s/replace #"\s+" " ")))) - default)))) - -(defn- assert-valid-id - [id] - (if (or (contains? @license-ids-d id) - (contains? (se/ids) id)) - id - (throw (ex-info "Invalid SPDX id constructed" {:id id})))) - -(defn- generic-id-constructor - [m] - (when m - (let [id (str (:id m) - (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] - (str "-" - ver - (when (and (:pad-ver? m) - (not (s/includes? ver "."))) - ".0"))))] - (assert-valid-id id)))) - -(defn- number-name-to-number - "Converts the name of a number to that number (as a string). e.g. \"two\" -> \"2\". Returns s unchanged if it's not a number name." - [^String s] - (when s - (case s - "two" "2" - "three" "3" - "four" "4" - s))) - -(defn- is-digits? - "Does the given string contains digits only?" - [^String s] - (boolean ; Eliminate nil-punning, since we use the output of this method in case - (when s - (every? #(Character/isDigit ^Character %) s)))) - -(defn- bsd-id-constructor - [m] - (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) - clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) - preferred-clause-count (case [(is-digits? clause-count1) (is-digits? clause-count2)] - [true true] clause-count1 - [true false] clause-count1 - [false true] clause-count2 - (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) - clause-count1 - clause-count2)) - clause-count (case preferred-clause-count - ("2" "simplified") "2" - ("3" "new" "revised" "modified" "aduna") "3" - "4") ; Note: we default to 4 clause, since it was the original form of the BSD license - suffix (case (get-rencgs m ["suffix"]) - "patent" "Patent" - "views" "Views" - "attribution" "Attribution" - "clear" "Clear" - "lbnl" "LBNL" - "modification" "Modification" - ("no military license" "no military licence") "No-Military-License" - ("no nuclear license" "no nuclear licence") "No-Nuclear-License" - ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" - "no nuclear warranty" "No-Nuclear-Warranty" - "open mpi" "Open-MPI" - "shortened" "Shortened" - "uc" "UC" - nil) - base-id (str (:id m) "-" clause-count "-Clause") - id-with-suffix (str base-id "-" suffix)] - (if (contains? @license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it - id-with-suffix - (assert-valid-id base-id)))) - -(defn- cc-id-constructor - [m] - (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) - nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) - sa? (not (s/blank? (get-rencgs m ["sharealike"]))) - version (get-rencgs m ["version1" "version2"] (:latest-ver m)) - base-id (str "CC-BY-" - (when nc? "NC-") - (when nd? "ND-") - (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND - version) - region (case (get-rencgs m ["region"]) - "australia" "AU" - "austria" "AT" - ("england" "england and wales" "england & wales" "uk") "UK" - "france" "FR" - "germany" "DE" - "igo" "IGO" - "japan" "JP" - "netherlands" "NL" - ("united states" "usa" "us") "US" - nil) - id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] - (if (contains? @license-ids-d id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it - id-with-region - (assert-valid-id base-id)))) - -(defn- gpl-id-constructor - [m] - (let [variant (case (get-rencgs m ["edition1" "edition2"]) - ("affero" "agpl") "AGPL" - ("lesser" "library" "lgpl") "LGPL" - "GPL") - version (let [ver (get-rencgs m ["version"] (:latest-ver m))] - (if (s/includes? ver ".") - ver - (str ver ".0"))) - suffix (case (get-rencgs m ["suffix1" "suffix2"]) - ("later" "newer" "+") "or-later" - ("only") "only" - "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix - id (str variant "-" version "-" suffix)] - (assert-valid-id id))) - -(defn- simple-regex-match - "Constructs a 'simple' name match structure" - [s] - {:id s - :regex (re-pattern (str "(?i)\\b" s "\\b")) - :fn (constantly s)}) - -; Regexes used for license name matching, along with functions for constructing an SPDX id -(def ^:private license-name-matching-d (delay - (concat - ; By default we add most SPDX ids as "simple" regex matches - (map simple-regex-match (disj @license-ids-d "MIT" "Zlib")) ; We remove MIT and Zlib as they're special-cased below - (map simple-regex-match (se/ids)) - [ - {:id "AFL" - :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "3.0"} - {:id "Apache" - :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Artistic" - :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Beerware" - :regex #"(?i)\bBeer-?ware\b" - :fn (constantly "Beerware")} - {:id "BSL" - :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.0"} - {:id "BSD" - :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" - :fn bsd-id-constructor} - {:id "CC0" - :regex #"(?i)\bCC\s*0" - :fn (constantly "CC0-1.0")} - {:id "CECILL" - :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.1"} - {:id "Classpath-exception" - :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "CDDL" - :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.1"} - {:id "CPL" - :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.0"} - {:id "Creative commons family" - :regex #"(?i)\b(CC([\s-]+BY)?\b|(Creative\s+Commons(\s+Legal\s+Code)?(\s+Attribution)?|Attribution\s+(?\d(.\d)?)))([\s,-]*((?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(\s+Unported|International|Generic)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?\b" - :fn cc-id-constructor - :pad-ver? true - :latest-ver "4.0"} - {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 - :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "EUPL" - :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.2"} - {:id "FreeBSD" - :regex #"(?i)\bFreeBSD\b" - :fn (constantly "BSD-2-Clause-FreeBSD")} - {:id "GNU license family" - :regex #"(?i)\b(?(Affero|Lesser|Library|LGPL|AGPL)\s+)?(GPL|GNU(?!\s*Classpath)|General\s+Pub?lic\s+Licen[cs]e)(?\s+(Affero|Lesser|Library))?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?(A|L)?GPL\)?)?([\s,-]*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?\+)?\s*(or(\s+\(?at\s+your\s+(option|discretion)\)?)?)?(\s+any)?(\s*(?later|newer|only))?" - :fn gpl-id-constructor - :pad-ver? true - :latest-ver 3.0} - {:id "Hippocratic" - :regex #"(?i)\bHippocratic\b" - :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license - {:id "LLVM-exception" - :regex #"(?i)\bLLVM[\s-]+Exception\b" - :fn (constantly "LLVM-exception")} - {:id "MIT" - :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" - :fn (constantly "MIT")} - {:id "MPL" - :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "MX4J" - :regex #"(?i)\bMX4J\s+Licen[cs]e(,?\s+v(ersion)?\s*1\.0)?\b" - :fn (constantly "Apache-1.1")} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX - {:id "NASA" - :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "1.3"} - {:id "Plexus" - :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" - :fn (constantly "Plexus")} - {:id "Proprietary or commercial" - :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" - :fn proprietary-or-commercial} - {:id "Public Domain" - :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" - :fn public-domain} - {:id "Ruby" - :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" - :fn (constantly "Ruby")} - {:id "SGI-B" - :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" - :fn generic-id-constructor - :pad-ver? true - :latest-ver "2.0"} - {:id "Unlicense" - :regex #"(?i)\bUnlicen[cs]e\b" - :fn (constantly "Unlicense")} - {:id "WTFPL" - :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" - :fn (constantly "WTFPL")} - {:id "Zlib" - :regex #"\b(?i)zlib(?![\s/]+libpng)\b" - :fn (constantly "Zlib")} - ]))) - -(defn- match-regex - "Returns a map containing the SPDX :id and :start index of the given - regex in the string if a match occurred, or nil if there was no match." - [s elem] - (when-let [match (rencg/re-find-ncg (:regex elem) s)] - {:id ((:fn elem) (merge {:name s} elem match)) - :start (:start match)})) - -(defn- match-regexes - "Returns a sequence (NOT A SET!) of the matched SPDX license or - exception ids for the given string, or nil if there were no matches. - Results are in the order in which they appear in the string." - [s] - (some->> (seq (filter identity (pmap (partial match-regex s) @license-name-matching-d))) - (sort-by :start) - (map :id))) - -(defn- filter-blanks - "Filter blank strings out of coll" - [coll] - (when (seq coll) - (seq (filter #(or (not (string? %)) (not (s/blank? %))) coll)))) - -(defn- map-split-and-interpose - "Maps over the given sequence, splitting strings using the given regex - and interposing the given value, returning a (flattened) sequence." - [re int coll] - (mapcat #(if-not (string? %) - [%] - (let [splits (s/split % re)] - (if (nil? int) - splits - (interpose int splits)))) - coll)) - -(defn- split-on-operators - "Case insensitively splits a string based on license operators (and, - or, with), but only if they're not also part of a license name (e.g. - 'Common Development and Distribution License', 'GNU General Public - License version 2.0 or (at your option) any later version', etc.)." - [s] - (when-not (s/blank? s) - (->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b") - (map-split-and-interpose #"(?i)(\band|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) - (map-split-and-interpose #"(?i)\bor(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?))" :or) - (map-split-and-interpose #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) - filter-blanks))) - -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AS WELL AS SOURCE!!!! -(defn- string->ids - "Converts the given String into a sequence (NOT A SET!) of SPDX - identifier(s), each of which is a listed SPDX license or exception id - if the value is recognised, or a lice-comb specific 'unlisted' - LicenseRef if not. This involves: - 1. Seeing if it's a listed license or exception id - 2. Looking up the value in the names in the SPDX license and exception - lists - 3. If the value is a URI, performing URI matching with it - 4. Using regexes to attempt to identify the license(s) and/or - exception(s) - 5. Returning a lice-comb specific 'unlisted' LicenseRef" - [s] - (when-not (s/blank? s) - ; 1. Is it an SPDX license or exception id? - (let [s (s/trim s)] - (if-let [spdx-id (get @spdx-ids-d (s/lower-case s))] - [spdx-id] - ; 2. Is it an SPDX license or exception name? - (if-let [name-matches (listed-name->ids s)] - (vec name-matches) - ; 3. If it's a URI, perform URI matching on it (this is to handle some dumb corner cases that do exist in the real world) - (if-let [uri-matches (uri->ids s)] - (vec uri-matches) - ; 4. Attempt regex name matching - (if-let [re-name-matches (match-regexes s)] - (vec re-name-matches) - ; 5. Give up and return a lice-comb "unlisted" LicenseRef - [(name->unlisted s)]))))))) - -(def ^:private push conj) ; Because I won't remember in X years when I come back to this code that with lists-as-stacks conj == push - -(defn- process-expression-element - "Processes a single new expression element e (either a keyword representing - an SPDX operator, or an SPDX identifier) in the context of stack (list) s." - [s e] - (if (keyword? e) - ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword - (if (= (peek s) e) - s - (push s e)) - ; e is a string (SPDX identifier): depending on how many keywords are currently at the top of s... - (case (count (take-while keyword? s)) - ; No keywords? Push e onto s - 0 (push s e) - ; One keyword? See if we should "collapse" the prior value, the keyword and e into an SPDX expression fragment and push the result onto s - 1 (let [kw (peek s) - operator (s/upper-case (name kw)) - s-minus-1 (pop s) - prior (peek s-minus-1) - s-minus-2 (pop s-minus-1)] - (if (nil? prior) - (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on - (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s - (se/listed-id? e)) - (push s-minus-2 (s/join " " [prior operator e])) - (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on - ; Many keywords? That's invalid, so drop all of them and push e onto s - (push (drop-while keyword? s) e)))) ; Multiple keywords were found sequentially, so drop all of them and push the current element on - -(defn- build-spdx-expressions - "Builds a list of SPDX expression(s) from the given list containing strings and keywords." - [l] - (loop [result '() - f (first l) - r (rest l)] - (if f - (recur (process-expression-element result f) (first r) (rest r)) - (seq (reverse result))))) ; Remember to reverse the result, since lists-as-stacks grow at the front, not the end + The result has metadata attached that describes how the identifiers were + determined." + [uri] + (lcim/uri->ids uri)) -;####TODO: MAKE THIS FUNCTION RETURN METADATA ABOUT :concluded VS :declared AND SOURCE!!!! (defn name->expressions "Attempts to determine the SPDX license expression(s) (a set of Strings) from the given 'license name' (a String), or nil if there aren't any. This involves: 1. Determining whether the name is a valid SPDX license expression, and if so normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it - 2. constructing one or more SPDX license expressions by " + 2. attempting to construct one or more SPDX license expressions from the + name + + The result has metadata attached that describes how the identifiers were + determined." [name] (when-not (s/blank? name) (let [name (s/trim name)] ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set (if-let [normalised-expression (sexp/normalise name)] - #{normalised-expression} - ; 2. Is it an SPDX license or exception name? - (if-let [name-matches (listed-name->ids name)] - name-matches - ; 3. If it's a URI, perform URI matching on it (this is to handle some dumb corner cases that do exist in the real world) - (if-let [uri-matches (uri->ids name)] - uri-matches - ; 4. Attempt to build SPDX expression(s) from the name - (some->> (split-on-operators name) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - (map #(if (keyword? %) % (string->ids %))) - flatten - build-spdx-expressions - set))))))) - -(defn name->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given - name (a string), or nil if there aren't any. This involves: - 1. checking if the name is actually an SPDX expression (this is rare, but - sometimes an SPDX identifier or expression appears in a pom.xml file) - 2. looking up the name case insensitively in the SPDX license list - 3. matching lice-comb specific 'name matching' regexes against the name - 4. if the name is actually a URI, running it through uri->ids - - If those steps all fail, a lice-comb custom 'unlisted' LicenseRef is returned - instead (which can be checked using the unlisted? fn)." - [name] - (when-not (s/blank? name) - (manual-fixes - (let [name (s/trim name)] - ; 1. Parse the name as an SPDX exception, and if that succeeds, return all ids in the expression - (if-let [ids-in-expression (parse-expression-and-extract-ids name)] - ids-in-expression - (string->ids name)))))) + (with-meta #{normalised-expression} {:type :declared :strategy :spdx-expression :source (list name)}) + ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) + (if (lcu/valid-http-uri? name) + (if-let [ids (uri->ids name)] + ids + (with-meta #{(lcis/name->unlisted name)} {:type :concluded :confidence :low :strategy :unresolvable-uri :source (list name)})) + ; 3. Attempt to build SPDX expression(s) from the name + (lcim/attempt-to-build-expressions name)))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent @@ -718,20 +129,5 @@ Note: this method has a substantial performance cost." [] - ; Parallelise initialisation of the spdx.licenses and spdx.exceptions namespaces, as they're both sloooooooow (~1.5 mins total) - (let [sl-init (future (sl/init!)) - se-init (future (se/init!))] - @sl-init - @se-init) - - ; Serially initialise this namespace's dependent state - they're all pretty fast (< 1s) - @license-ids-d - @exception-ids-d - @license-list-d - @exception-list-d - @spdx-ids-d - @index-uri-to-id-d - @index-name-to-id-d - @http-client-d - @license-name-matching-d + (lcim/init!) nil) diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 845b0c4..07bbc28 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -31,6 +31,9 @@ (def ^:private local-maven-repo-d (delay (try + ; The command: + ; mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout + ; determines where the local repository is located. (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] (if (zero? (:exit sh-result)) (s/trim (:out sh-result)) @@ -41,6 +44,7 @@ ; TODO: make this configurable (def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) +;####TODO: MOVE THIS TO UTILS AND REIMPLEMENT ON HATO?? (defn- uri-resolves? "Does the given URI resolve (i.e. does the resource it points to exist)?" [^java.net.URI uri] @@ -49,6 +53,7 @@ (.setRequestMethod "HEAD"))] (= 200 (.getResponseCode http))))) +;####TODO: MOVE THIS TO AN IMPL NS?? (defn pom-uri-for-gav "Attempts to locate the POM for the given GAV, which is a URI that may point to a file in the local Maven repository or a remote Maven repository (e.g. on @@ -67,27 +72,34 @@ ;####TODO: Check both URI and name and merge the results! (defn- licenses-from-pair - "Attempts to determine the license(s) (a set) from a POM license name/URL pair." + "Attempts to determine the license(s) (a set) from a POM license name/URL pair. + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." [{:keys [name url]}] ; Attempt to find a match by URL first (if-let [licenses (lcmtch/uri->ids url)] licenses ; Then match by name - (lcmtch/name->ids name))) + (lcmtch/name->expressions name))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") -(defmulti pom->ids - "Attempt to detect the license(s) reported in a pom.xml file. pom may be a - java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream. +(defmulti pom->expressions + "Attempt to detect the license expression(s) (a set) reported in a pom.xml + file. pom may be a java.io.InputStream, or anything that can be opened by + clojure.java.io/input-stream. Note: if an InputStream is provided, it's the caller's responsibility to open - and close it." + and close it. + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." {:arglists '([pom])} type) ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags here -(defmethod pom->ids java.io.InputStream +(defmethod pom->expressions java.io.InputStream [pom-is] (let [pom-xml (xml/parse pom-is) licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) @@ -96,6 +108,7 @@ ; Licenses block exists - process it (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url]))))] +;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! (lcu/nset (mapcat licenses-from-pair name-uri-pairs))) ; License block doesn't exist, so attempt to lookup the parent pom and get it from there (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) @@ -108,14 +121,14 @@ :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) - (pom->ids (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + (pom->expressions (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep -(defmethod pom->ids :default +(defmethod pom->expressions :default [pom] (when pom (with-open [pom-is (io/input-stream pom)] - (if-let [pom-licenses (pom->ids pom-is)] - pom-licenses + (if-let [expressions (pom->expressions pom-is)] + expressions (log/info (str "'" pom "'") "contains no license information"))))) (defn init! diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index 23665ee..14102b9 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -19,7 +19,8 @@ (ns lice-comb.deps-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.deps :refer [dep->ids deps-licenses]])) + [lice-comb.impl.spdx :as lcis] + [lice-comb.deps :refer [dep->expressions deps-expressions]])) (use-fixtures :once fixture) @@ -27,129 +28,129 @@ (deftest dep->ids-tests (testing "Nil deps" - (is (nil? (dep->ids nil)))) + (is (nil? (dep->expressions nil)))) (testing "Unknown dep types" - (is (thrown? clojure.lang.ExceptionInfo (dep->ids ['com.github.pmonks/lice-comb {:deps/manifest :invalid :mvn/version "1.0.0"}])))) + (is (thrown? clojure.lang.ExceptionInfo (dep->expressions ['com.github.pmonks/lice-comb {:deps/manifest :invalid :mvn/version "1.0.0"}])))) (testing "Invalid deps" - (is (nil? (dep->ids ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA - (is (nil? (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V + (is (nil? (dep->expressions ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA + (is (nil? (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V (testing "Valid deps - single license" - (is (= #{"Apache-2.0"} (dep->ids ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) - (is (= #{"LicenseRef-lice-comb-public-domain"} (dep->ids ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) - (is (= #{"CC0-1.0"} (dep->ids ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) - (is (= #{"MIT"} (dep->ids ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) - (is (= #{"Plexus"} (dep->ids ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) - (is (= #{"LicenseRef-lice-comb-public-domain"} (dep->ids ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) + (is (= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) + (is (= #{"CDDL-1.0"} (dep->expressions ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) + (is (= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) + (is (= #{"MIT"} (dep->expressions ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) + (is (= #{"Plexus"} (dep->expressions ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (= #{"MIT"} (dep->expressions ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) + (is (= #{(lcis/public-domain)} (dep->expressions ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) + (is (= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" - (is (= #{"EPL-1.0"} (dep->ids ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) + (is (= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) + (is (= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->ids ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->ids ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} (dep->ids ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) - (is (= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->ids ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (dep->ids ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) + (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (= #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) + (is (= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) + (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) (testing "Valid deps - Maven classifiers" -; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->ids ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (dep->ids ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) -(deftest deps-licenses-test +(deftest deps-expressions-test (testing "Nil and empty deps" - (is (nil? (deps-licenses nil))) - (is (= {} (deps-licenses {})))) + (is (nil? (deps-expressions nil))) + (is (= {} (deps-expressions {})))) (testing "Single deps" - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-licenses {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run -; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}})))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) - (is (= (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi)) - (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native))))) + (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) + (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}})))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) + (is (= (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi)) + (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native))))) (testing "Multiple deps" (is (= {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3" :lice-comb/licenses #{"EPL-1.0"}} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194" :lice-comb/licenses #{"EPL-1.0"}} @@ -166,7 +167,7 @@ 'clj-xml-validation/clj-xml-validation {:deps/manifest :mvn :mvn/version "1.0.2" :lice-comb/licenses #{"EPL-1.0"}} 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2" :lice-comb/licenses #{"EPL-1.0"}} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1" :lice-comb/licenses #{"EPL-1.0"}}} - (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} + (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194"} 'org.clojure/core.specs.alpha {:deps/manifest :mvn :mvn/version "0.2.56"} 'org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.2.0-alpha6"} @@ -191,7 +192,7 @@ 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2" :lice-comb/licenses #{"EPL-1.0"}} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1" :lice-comb/licenses #{"EPL-1.0"}} 'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}} - (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} + (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194"} 'org.clojure/core.specs.alpha {:deps/manifest :mvn :mvn/version "0.2.56"} 'org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.2.0-alpha6"} diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index 8db6d83..046cf2b 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -20,7 +20,7 @@ (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.files :refer [probable-license-file? probable-license-files file->ids dir->ids zip->ids]])) + [lice-comb.files :refer [probable-license-file? probable-license-files file->expressions dir->expressions zip->expressions]])) (use-fixtures :once fixture) @@ -72,54 +72,54 @@ (io/file (str test-data-path "/MPL-2.0/LICENSE"))} (probable-license-files test-data-path))))) -(deftest file->ids-tests +(deftest file->expressions-tests (testing "Nil, empty, or blank filename" - (is (nil? (file->ids nil))) - (is (thrown? java.io.FileNotFoundException (file->ids ""))) - (is (thrown? java.io.FileNotFoundException (file->ids " "))) - (is (thrown? java.io.FileNotFoundException (file->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (file->ids "\t")))) + (is (nil? (file->expressions nil))) + (is (thrown? java.io.FileNotFoundException (file->expressions ""))) + (is (thrown? java.io.FileNotFoundException (file->expressions " "))) + (is (thrown? java.io.FileNotFoundException (file->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (file->expressions "\t")))) (testing "Non-existent files" - (is (thrown? java.io.FileNotFoundException (file->ids "this_file_does_not_exist")))) + (is (thrown? java.io.FileNotFoundException (file->expressions "this_file_does_not_exist")))) (testing "Files on disk" - (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 - (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->ids (str test-data-path "/MPL-2.0/LICENSE"))))) + (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) (testing "URLs" - (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (file->ids (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (= #{"Apache-2.0"} (file->expressions "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (file->expressions (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) (testing "InputStreams" - (is (thrown? clojure.lang.ExceptionInfo (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->ids is)))) - (is (= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->ids is "LICENSE_2.0.txt"))))) + (is (thrown? clojure.lang.ExceptionInfo (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is)))) + (is (= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is "LICENSE_2.0.txt"))))) (testing "POM files" - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (file->ids (str test-data-path "/no-xml-ns.pom")))) - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/asf-cat-1.0.12.pom")))) - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/with-parent.pom")))))) + (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/simple.pom")))) + (is (= #{"BSD-3-Clause"} (file->expressions (str test-data-path "/no-xml-ns.pom")))) + (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/asf-cat-1.0.12.pom")))) + (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/with-parent.pom")))))) -(deftest dir->ids-tests +(deftest dir->expressions-tests (testing "Nil, empty, or blank directory name" - (is (nil? (dir->ids nil))) - (is (thrown? java.io.FileNotFoundException (dir->ids ""))) - (is (thrown? java.io.FileNotFoundException (dir->ids " "))) - (is (thrown? java.io.FileNotFoundException (dir->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (dir->ids "\t")))) + (is (nil? (dir->expressions nil))) + (is (thrown? java.io.FileNotFoundException (dir->expressions ""))) + (is (thrown? java.io.FileNotFoundException (dir->expressions " "))) + (is (thrown? java.io.FileNotFoundException (dir->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (dir->expressions "\t")))) (testing "Non-existent or invalid directory" - (is (thrown? java.io.FileNotFoundException (dir->ids "this_directory_does_not_exist"))) - (is (thrown? java.nio.file.NotDirectoryException (dir->ids "deps.edn")))) + (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) + (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->ids "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->expressions "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 -(deftest zip->ids-tests +(deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" - (is (nil? (zip->ids nil))) - (is (thrown? java.io.FileNotFoundException (zip->ids ""))) ; Note the hodgepodge of different thrown exception types here - java.util.zip is a mess! - (is (thrown? java.nio.file.NoSuchFileException (zip->ids " "))) - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "\n"))) - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "\t")))) + (is (nil? (zip->expressions nil))) + (is (thrown? java.io.FileNotFoundException (zip->expressions ""))) ; Note the hodgepodge of different thrown exception types here - java.util.zip is a mess! + (is (thrown? java.nio.file.NoSuchFileException (zip->expressions " "))) + (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "\n"))) + (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "\t")))) (testing "Non-existent zip file" - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "this_zip_file_does_not_exist")))) + (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "this_zip_file_does_not_exist")))) (testing "Invalid zip file" - (is (thrown? java.util.zip.ZipException (zip->ids (str test-data-path "/bad.zip"))))) + (is (thrown? java.util.zip.ZipException (zip->expressions (str test-data-path "/bad.zip"))))) (testing "Valid zip file" - (is (= #{"Apache-2.0"} (zip->ids (str test-data-path "/good.zip")))))) + (is (= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))))) diff --git a/test/lice_comb/impl_regex_matching_test.clj b/test/lice_comb/impl_regex_matching_test.clj new file mode 100644 index 0000000..7da3986 --- /dev/null +++ b/test/lice_comb/impl_regex_matching_test.clj @@ -0,0 +1,255 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl-regex-matching-test + (:require [clojure.test :refer [deftest testing is are use-fixtures]] + [clojure.set :as set] + [rencg.api :as rencg] + [lice-comb.impl.utils :as lcu] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re match-regexes]])) + +(use-fixtures :once fixture) + +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + +(def agpl-licenses-and-ids { + "AGPL" '("AGPL-3.0-only") + "AGPL v3" '("AGPL-3.0-only") + "AGPLv3" '("AGPL-3.0-only") + "Affero GNU Public License v3" '("AGPL-3.0-only") + "Affero General Public License" '("AGPL-3.0-only") + "Affero General Public License v3 or later (at your option)" '("AGPL-3.0-or-later") + "Affero General Public License version 3 or lator" '("AGPL-3.0-or-later") + "Affero General Public License," '("AGPL-3.0-only") + "GNU AFFERO GENERAL PUBLIC LICENSE Version 3" '("AGPL-3.0-only") + "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3" '("AGPL-3.0-only") + "GNU AGPL-V3 or later" '("AGPL-3.0-or-later") + "GNU AGPLv3" '("AGPL-3.0-only") + "GNU Affero General Public Licence" '("AGPL-3.0-only") + "GNU Affero General Public License (AGPL)" '("AGPL-3.0-only") + "GNU Affero General Public License (AGPL) version 3.0" '("AGPL-3.0-only") + "GNU Affero General Public License 3.0 (AGPL-3.0)" '("AGPL-3.0-only") + "GNU Affero General Public License Version 3" '("AGPL-3.0-only") + "GNU Affero General Public License v3" '("AGPL-3.0-only") + "GNU Affero General Public License v3.0" '("AGPL-3.0-only") + "GNU Affero General Public License v3.0 only" '("AGPL-3.0-only") + "GNU Affero General Public License, version 3" '("AGPL-3.0-only") + }) + +(def lgpl-licenses-and-ids { + "GNU General Lesser Public License (LGPL) version 3.0" '("LGPL-3.0-only") + "GNU LESSER GENERAL PUBLIC LICENSE" '("LGPL-3.0-only") + "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1" '("LGPL-2.1-only") + "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999" '("LGPL-2.1-only") + "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0" '("LGPL-3.0-only") + "GNU LGPL 3" '("LGPL-3.0-only") + "GNU LGPL v2.1" '("LGPL-2.1-only") + "GNU LGPL v3" '("LGPL-3.0-only") + "GNU LGPL version 3" '("LGPL-3.0-only") + "GNU LGPL-3.0" '("LGPL-3.0-only") + "GNU LGPLv3 " '("LGPL-3.0-only") + "GNU Lesser GPL" '("LGPL-3.0-only") + "GNU Lesser General Public Licence" '("LGPL-3.0-only") + "GNU Lesser General Public Licence 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License" '("LGPL-3.0-only") + "GNU Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "GNU Lesser General Public License (LGPL) Version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License - v 3" '("LGPL-3.0-only") + "GNU Lesser General Public License - v 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License - v3" '("LGPL-3.0-only") + "GNU Lesser General Public License 2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License v2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License v3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License version 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License, Version 2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License, Version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License, Version 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, v. 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, version 2.1 or newer" '("LGPL-2.1-or-later") + "GNU Lesser General Public License, version 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, version 3.0 or (at your option) any later version" '("LGPL-3.0-or-later") + "GNU Lesser General Pulic License v2.1" '("LGPL-2.1-only") + "GNU Lesser Genereal Public License" '("LGPL-3.0-only") + "GNU Lesser Public License" '("LGPL-3.0-only") + "GNU Library General Public License" '("LGPL-3.0-only") + "GNU Library or Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "GNU Library or Lesser General Public License (LGPL) 2.1" '("LGPL-2.1-only") + "GNU Library or Lesser General Public License (LGPL) V2.1" '("LGPL-2.1-only") + "Gnu Lesser Public License" '("LGPL-3.0-only") + "L GPL 3" '("LGPL-3.0-only") + "LGPL" '("LGPL-3.0-only") + "LGPL 2.1" '("LGPL-2.1-only") + "LGPL 3.0" '("LGPL-3.0-only") + "LGPL 3.0 (GNU Lesser General Public License)" '("LGPL-3.0-only") + "LGPL License" '("LGPL-3.0-only") + "LGPL Open Source license" '("LGPL-3.0-only") + "LGPL v3" '("LGPL-3.0-only") + "LGPLv2.1" '("LGPL-2.1-only") + "LGPLv3" '("LGPL-3.0-only") + "LGPLv3+" '("LGPL-3.0-or-later") + "Lesser GPL" '("LGPL-3.0-only") + "Lesser General Public License" '("LGPL-3.0-only") + "Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "Licensed under GNU Lesser General Public License Version 3 or later (the " '("LGPL-3.0-or-later") + "lgpl_v2_1" '("LGPL-2.1-only") + }) + +(def gpl-licenses-and-ids { + " GNU GENERAL PUBLIC LICENSE Version 3" '("GPL-3.0-only") + "GNU" '("GPL-3.0-only") + "GNU GENERAL PUBLIC LICENSE" '("GPL-3.0-only") + "GNU GENERAL PUBLIC LICENSE Version 2, June 1991" '("GPL-2.0-only") + "GNU GPL" '("GPL-3.0-only") + "GNU GPL 3" '("GPL-3.0-only") + "GNU GPL V2+" '("GPL-2.0-or-later") + "GNU GPL v 3.0" '("GPL-3.0-only") + "GNU GPL v. 3" '("GPL-3.0-only") + "GNU GPL v3" '("GPL-3.0-only") + "GNU GPL v3+" '("GPL-3.0-or-later") + "GNU GPL v3.0" '("GPL-3.0-only") + "GNU GPL, version 3, 29 June 2007" '("GPL-3.0-only") + "GNU GPLv3+" '("GPL-3.0-or-later") + "GNU General Public License" '("GPL-3.0-only") + "GNU General Public License (GPL)" '("GPL-3.0-only") + "GNU General Public License 2" '("GPL-2.0-only") + "GNU General Public License V3" '("GPL-3.0-only") + "GNU General Public License Version 3" '("GPL-3.0-only") + "GNU General Public License v2.0" '("GPL-2.0-only") + "GNU General Public License v3" '("GPL-3.0-only") + "GNU General Public License v3.0" '("GPL-3.0-only") + "GNU General Public License v3.0 or later" '("GPL-3.0-or-later") + "GNU General Public License, Version 2" '("GPL-2.0-only") + "GNU General Public License, Version 3" '("GPL-3.0-only") + "GNU General Public License, Version 3 (or later)" '("GPL-3.0-or-later") + "GNU General Public License, version 2" '("GPL-2.0-only") + "GNU General Public License, version 2 (GPL2)" '("GPL-2.0-only") + "GNU General Public License, version 3" '("GPL-3.0-only") + "GNU General Public License, version 3 (GPLv3)" '("GPL-3.0-only") + "GNU General Public License,version 2.0 or (at your option) any later version" '("GPL-2.0-or-later") + "GNU Public License" '("GPL-3.0-only") + "GNU Public License V. 3.0" '("GPL-3.0-only") + "GNU Public License V3" '("GPL-3.0-only") + "GNU Public License v2" '("GPL-2.0-only") + "GNU Public License, Version 2" '("GPL-2.0-only") + "GNU Public License, Version 2.0" '("GPL-2.0-only") + "GNU Public License, v2" '("GPL-2.0-only") + "GNU public licence V3.0" '("GPL-3.0-only") + "GNUv3" '("GPL-3.0-only") + "GPL" '("GPL-3.0-only") + "GPL 2.0+" '("GPL-2.0-or-later") + "GPL 3" '("GPL-3.0-only") + "GPL 3.0" '("GPL-3.0-only") + "GPL V3" '("GPL-3.0-only") + "GPL V3+" '("GPL-3.0-or-later") + "GPL v2" '("GPL-2.0-only") + "GPL v2+" '("GPL-2.0-or-later") + "GPL v3" '("GPL-3.0-only") + "GPL version 3" '("GPL-3.0-only") + "GPL-3" '("GPL-3.0-only") + "GPL3" '("GPL-3.0-only") + "GPLv2" '("GPL-2.0-only") + "GPLv3" '("GPL-3.0-only") + "General Public License 3" '("GPL-3.0-only") + "General Public License v3.0" '("GPL-3.0-only") + "The GNU General Public License" '("GPL-3.0-only") + "The GNU General Public License v3.0" '("GPL-3.0-only") + "The GNU General Public License, Version 2 " '("GPL-2.0-only") + }) + +(def cc-by-licenses-and-ids { + "Attribution 3.0 Unported" '("CC-BY-3.0") + "Attribution 4.0 International" '("CC-BY-4.0") + "Attribution-NonCommercial-NoDerivs 3.0 Unported" '("CC-BY-NC-ND-3.0") + "CC Attribution 4.0 International with exception for binary distribution" '("CC-BY-4.0") + "CC BY-NC" '("CC-BY-NC-4.0") + "Creative Commons 3.0" '("CC-BY-3.0") + "Creative Commons Attribution 2.5 License" '("CC-BY-2.5") + "Creative Commons Attribution License" '("CC-BY-4.0") + "Creative Commons Attribution Share Alike 4.0 International" '("CC-BY-SA-4.0") + "Creative Commons Attribution-NonCommercial 3.0" '("CC-BY-NC-3.0") + "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 Unported License" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 Unported" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0" '("CC-BY-SA-3.0") + "Creative Commons Legal Code Attribution 3.0 Unported" '("CC-BY-3.0") + }) + +(def gnu-licenses-and-ids (merge agpl-licenses-and-ids lgpl-licenses-and-ids gpl-licenses-and-ids)) + +(def agpl-licenses (set (keys agpl-licenses-and-ids))) +(def lgpl-licenses (set (keys lgpl-licenses-and-ids))) +(def gpl-licenses (set (keys gpl-licenses-and-ids))) + +(def gnu-licenses (set/union agpl-licenses lgpl-licenses gpl-licenses)) + +; For testing individual GNU family regex components in isolation +(def agpl-only-re (lcu/re-concat #"(?i)\b" "(" agpl-re ")" version-re only-or-later-re)) +(def lgpl-only-re (lcu/re-concat #"(?i)\b" "(" lgpl-re ")" version-re only-or-later-re)) +(def gpl-only-re (lcu/re-concat #"(?i)\b" "(" gpl-re ")" version-re only-or-later-re)) + +(def not-nil? (complement nil?)) + +(defn when-pred + [val pred then] + (if (pred val) + (then val) + val)) + +(defmacro testing-with-data + "A form of `clojure.test/testing` that generates multiple `clojure.test/is` + clauses, based on applying f to the keys in m, and comparing to the associated + value in m." + [name f m] + `(testing ~name + ~@(map #(list `is `(= (~f ~(key %)) ~(when-pred (val %) list? (partial list 'quote)))) + (if (isa? (type m) clojure.lang.Symbol) + @(resolve m) + m)))) + +; Add input to result to make troubleshooting test failures easier +(defn test-regex + [re s] + (when-let [result (rencg/re-find-ncg re s)] + (assoc result :input s))) + +(deftest gnu-regex-components-tests + (testing "GNU Family Regexes - correct matching and non-matching - AGPL component" + (is (every? not-nil? (map (partial test-regex agpl-only-re) agpl-licenses))) + (is (every? nil? (map (partial test-regex agpl-only-re) lgpl-licenses))) + (is (every? nil? (map (partial test-regex agpl-only-re) gpl-licenses)))) + (testing "GNU Family Regexes - correct matching and non-matching - LGPL component" + (is (every? nil? (map (partial test-regex lgpl-only-re) agpl-licenses))) + (is (every? not-nil? (map (partial test-regex lgpl-only-re) lgpl-licenses))) + (is (every? nil? (map (partial test-regex lgpl-only-re) gpl-licenses)))) + (testing "GNU Family Regexes - correct matching and non-matching - GPL component" + (is (every? nil? (map (partial test-regex gpl-only-re) agpl-licenses))) + (is (every? nil? (map (partial test-regex gpl-only-re) lgpl-licenses))) + (is (every? not-nil? (map (partial test-regex gpl-only-re) gpl-licenses))))) + +(deftest combined-regex-components-tests + (testing "GNU Family Regexes - correct matching - combined GNU family regex" + (is (every? not-nil? (map (partial test-regex gnu-re) gnu-licenses))))) + +(deftest match-regexes-tests + (testing-with-data "GNU Family Regexes - correct identifier results" match-regexes gnu-licenses-and-ids) + (testing-with-data "CC Family Regexes - correct identifier results" match-regexes cc-by-licenses-and-ids)) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 56ee5f0..ee157a3 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -19,7 +19,8 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.matching :refer [unlisted? proprietary-or-commercial? name->unlisted public-domain proprietary-or-commercial text->ids name->expressions name->ids uri->ids]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions uri->ids]] [spdx.licenses :as sl] [spdx.exceptions :as se] [spdx.expressions :as sexp])) @@ -32,6 +33,10 @@ (and (= 1 (count ids)) (unlisted? (first ids)))) +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + (deftest unlisted?-tests (testing "Nil, empty or blank ids" (is (nil? (unlisted? nil))) @@ -40,7 +45,7 @@ (is (false? (unlisted? "\n"))) (is (false? (unlisted? "\t")))) (testing "Unlisted ids" - (is (true? (unlisted? (name->unlisted "foo"))))) + (is (true? (unlisted? (lcis/name->unlisted "foo"))))) (testing "Listed ids" (is (true? (every? false? (map unlisted? (sl/ids))))) (is (true? (every? false? (map unlisted? (se/ids))))))) @@ -67,14 +72,14 @@ (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "CC-BY-SA-4.0"))) (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) - (is (valid= #{"GPL-2.0-with-classpath-exception"} (name->expressions "GPL-2.0-with-classpath-exception")))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-with-classpath-exception")))) (testing "Public domain and proprietary/commercial" - (is (valid= #{(public-domain)} (name->expressions "Public Domain"))) - (is (valid= #{(public-domain)} (name->expressions "Public domain"))) ; Test lower case - (is (valid= #{(public-domain)} (name->expressions " Public domain "))) ; Test whitespace - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Commercial"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "All rights reserved")))) + (is (valid= #{(lcis/public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(lcis/public-domain)} (name->expressions "Public domain"))) ; Test lower case + (is (valid= #{(lcis/public-domain)} (name->expressions " Public domain "))) ; Test whitespace + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Commercial"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved")))) (testing "Expressions that are valid SPDX" (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) @@ -170,28 +175,28 @@ (is (valid= #{"Plexus"} (name->expressions "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html (testing "All names seen in POMs on Clojars as of 2023-07-13" (is (valid= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0"))) + (is (valid= #{"AGPL-3.0-only" (lcis/proprietary-commercial)} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL v3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPLv3"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero General Public License"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AGPLv3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License 3.0 (AGPL-3.0)"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3"))) - (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, Version 3"))) (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, version 3"))) - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License v3 or later (at your option)"))) (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License version 3 or lator"))) ; Typo in "lator" - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License"))) - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU AGPL-V3 or later"))) - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest - (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest (is (valid= #{"Apache-2.0 WITH LLVM-exception"} (name->expressions "Apache 2.0 with LLVM Exception"))) (is (valid= #{"Apache-2.0"} (name->expressions " Apache License, Version 2.0"))) (is (valid= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) @@ -337,7 +342,7 @@ (is (valid= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE"))) (is (valid= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License"))) (is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0"))) - (is (valid= #{"CC-BY-4.0"} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) + (is (valid= #{"CC-BY-4.0" (lcis/name->unlisted "exception for binary distribution")} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) ; The exception in this case doesn't map to any listed SPDX identifier (including CC-BY variants) (is (valid= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0"))) (is (valid= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest (is (valid= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0"))) @@ -373,7 +378,7 @@ (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License, version 1.0"))) (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public Licese - v 1.0"))) (is (valid= #{"EPL-1.0"} (name->expressions "https://github.com/cmiles74/uio/blob/master/LICENSE"))) - (is (valid= #{"EPL-2.0 AND LGPL-3.0-or-later"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0 AND LGPL-3.0-only"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest (is (valid= #{"EPL-2.0 OR Apache-2.0"} (name->expressions "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest @@ -426,6 +431,7 @@ (is (valid= #{"GPL-2.0-only"} (name->expressions "GPLv2"))) (is (valid= #{"GPL-2.0-only"} (name->expressions "The GNU General Public License, Version 2"))) (is (valid= #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU GPL V2+"))) (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GPL 2.0+"))) (is (valid= #{"GPL-3.0-only"} (name->expressions " GNU GENERAL PUBLIC LICENSE Version 3"))) @@ -471,7 +477,6 @@ (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU GPLv3+"))) (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License v3.0 or later"))) (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License, Version 3 (or later)"))) - (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GPL V3+"))) (is (valid= #{"Hippocratic-2.1"} (name->expressions "Hippocratic License"))) (is (valid= #{"ISC WITH Classpath-exception-2.0"} (name->expressions "ISC WITH Classpath-exception-2.0"))) @@ -611,737 +616,105 @@ (is (valid= #{"Zlib"} (name->expressions "Zlib License"))) (is (valid= #{"Zlib"} (name->expressions "zlib License"))) (is (valid= #{"Zlib"} (name->expressions "zlib license"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "All Rights Reserved"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "All rights reserved"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Private License"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Private"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary License"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietary"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Tulos Commercial License"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "Wildbit Proprietary License"))) - (is (valid= #{(proprietary-or-commercial)} (name->expressions "proprietary"))) - (is (valid= #{(public-domain)} (name->expressions "Public Domain"))) - (is (valid= #{(str "GPL-2.0-or-later OR " (name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) - (is (valid= #{(str "MIT AND " (proprietary-or-commercial))} (name->expressions "Dual MIT & Proprietary"))) - (is (unlisted-only? (name->expressions "${license.id}"))) - (is (unlisted-only? (name->expressions "A Clojure library for Google Cloud Pub/Sub."))) - (is (unlisted-only? (name->expressions "APGL"))) ; Probable typo - (is (unlisted-only? (name->expressions "Amazon Software License"))) - (is (unlisted-only? (name->expressions "BankersBox License"))) - (is (unlisted-only? (name->expressions "Bespoke"))) - (is (unlisted-only? (name->expressions "Bloomberg Open API"))) - (is (unlisted-only? (name->expressions "Bostock"))) - (is (unlisted-only? (name->expressions "Built In Project License"))) - (is (unlisted-only? (name->expressions "CRAPL License"))) - (is (unlisted-only? (name->expressions "Contact JMonkeyEngine forums for license details"))) - (is (unlisted-only? (name->expressions "Copyright (C) 2015 by Glowbox LLC"))) - (is (unlisted-only? (name->expressions "Copyright (c) 2011 Drew Colthorp"))) - (is (unlisted-only? (name->expressions "Copyright (c) 2017, Lingchao Xin"))) - (is (unlisted-only? (name->expressions "Copyright 2016, klaraHealth, Inc."))) - (is (unlisted-only? (name->expressions "Copyright 2017 Zensight"))) - (is (unlisted-only? (name->expressions "Copyright 4A Volcano. 2015."))) - (is (unlisted-only? (name->expressions "Copyright Ona Systems Inc."))) - (is (unlisted-only? (name->expressions "Copyright meissa GmbH"))) - (is (unlisted-only? (name->expressions "Copyright © SparX 2014"))) - (is (unlisted-only? (name->expressions "Copyright"))) - (is (unlisted-only? (name->expressions "Custom"))) - (is (unlisted-only? (name->expressions "Cydeas Public License"))) - (is (unlisted-only? (name->expressions "Don't steal my stuff"))) - (is (unlisted-only? (name->expressions "Dropbox ToS"))) - (is (unlisted-only? (name->expressions "FIXME: choose"))) - (is (unlisted-only? (name->expressions "Firebase ToS"))) - (is (unlisted-only? (name->expressions "GG Public License"))) - (is (unlisted-only? (name->expressions "Google Maps ToS"))) - (is (unlisted-only? (name->expressions "GraphiQL license"))) - (is (unlisted-only? (name->expressions "Hackthorn Innovation Ltd"))) - (is (unlisted-only? (name->expressions "Hackthorn Innovation copyright"))) - (is (unlisted-only? (name->expressions "Heap ToS"))) - (is (unlisted-only? (name->expressions "Interel"))) - (is (unlisted-only? (name->expressions "JLGL Backend"))) - (is (unlisted-only? (name->expressions "Jedis License"))) - (is (unlisted-only? (name->expressions "Jiegao Owned"))) - (is (unlisted-only? (name->expressions "LICENSE"))) - (is (unlisted-only? (name->expressions "Libre Uso MX"))) - (is (unlisted-only? (name->expressions "License of respective package"))) - (is (unlisted-only? (name->expressions "License"))) - (is (unlisted-only? (name->expressions "Like Clojure."))) - (is (unlisted-only? (name->expressions "Mixed"))) - (is (unlisted-only? (name->expressions "Multiple"))) - (is (unlisted-only? (name->expressions "OTN License Agreement"))) - (is (unlisted-only? (name->expressions "Open Source Community License - Type C version 1.0"))) - (is (unlisted-only? (name->expressions "Other License"))) - (is (unlisted-only? (name->expressions "Provisdom"))) - (is (unlisted-only? (name->expressions "Research License 1.0"))) - (is (unlisted-only? (name->expressions "Restricted Distribution."))) - (is (unlisted-only? (name->expressions "SYNNEX China Owned"))) - (is (unlisted-only? (name->expressions "See the LICENSE file"))) - (is (unlisted-only? (name->expressions "Shen License"))) - (is (unlisted-only? (name->expressions "Slick2D License"))) - (is (unlisted-only? (name->expressions "Stripe ToS"))) - (is (unlisted-only? (name->expressions "TODO"))) - (is (unlisted-only? (name->expressions "TODO: Choose a license"))) - (is (unlisted-only? (name->expressions "The I Haven't Got Around To This Yet License"))) - (is (unlisted-only? (name->expressions "To ill!"))) - (is (unlisted-only? (name->expressions "UNLICENSED"))) - (is (unlisted-only? (name->expressions "University of Buffalo Public License"))) - (is (unlisted-only? (name->expressions "Unknown"))) - (is (unlisted-only? (name->expressions "VNETLPL - Limited Public License"))) - (is (unlisted-only? (name->expressions "VNet PL"))) - (is (unlisted-only? (name->expressions "Various"))) - (is (unlisted-only? (name->expressions "Vimeo License"))) - (is (unlisted-only? (name->expressions "WIP"))) - (is (unlisted-only? (name->expressions "YouTube ToS"))) - (is (unlisted-only? (name->expressions "avi license"))) - (is (unlisted-only? (name->expressions "esl-sdk-external-signer-verification"))) - (is (unlisted-only? (name->expressions "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet - (is (unlisted-only? (name->expressions "jank license"))) - (is (unlisted-only? (name->expressions "name"))) - (is (unlisted-only? (name->expressions "none"))) - (is (unlisted-only? (name->expressions "state-node license"))) - (is (unlisted-only? (name->expressions "trove"))) - (is (unlisted-only? (name->expressions "url"))) - (is (unlisted-only? (name->expressions "wisdragon"))) - (is (unlisted-only? (name->expressions "wiseloong"))))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All Rights Reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Private License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Private"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Tulos Commercial License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Wildbit Proprietary License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "proprietary"))) + (is (valid= #{(lcis/public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(str "GPL-2.0-or-later OR " (lcis/name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) + (is (valid= #{(str "MIT AND " (lcis/proprietary-commercial))} (name->expressions "Dual MIT & Proprietary"))) + (is (unlisted-only? (name->expressions "${license.id}"))) + (is (unlisted-only? (name->expressions "A Clojure library for Google Cloud Pub/Sub."))) + (is (unlisted-only? (name->expressions "APGL"))) ; Probable typo + (is (unlisted-only? (name->expressions "Amazon Software License"))) + (is (unlisted-only? (name->expressions "BankersBox License"))) + (is (unlisted-only? (name->expressions "Bespoke"))) + (is (unlisted-only? (name->expressions "Bloomberg Open API"))) + (is (unlisted-only? (name->expressions "Bostock"))) + (is (unlisted-only? (name->expressions "Built In Project License"))) + (is (unlisted-only? (name->expressions "CRAPL License"))) + (is (unlisted-only? (name->expressions "Contact JMonkeyEngine forums for license details"))) + (is (unlisted-only? (name->expressions "Copyright (C) 2015 by Glowbox LLC"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2011 Drew Colthorp"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2017, Lingchao Xin"))) + (is (unlisted-only? (name->expressions "Copyright 2016, klaraHealth, Inc."))) + (is (unlisted-only? (name->expressions "Copyright 2017 Zensight"))) + (is (unlisted-only? (name->expressions "Copyright 4A Volcano. 2015."))) + (is (unlisted-only? (name->expressions "Copyright Ona Systems Inc."))) + (is (unlisted-only? (name->expressions "Copyright meissa GmbH"))) + (is (unlisted-only? (name->expressions "Copyright © SparX 2014"))) + (is (unlisted-only? (name->expressions "Copyright"))) + (is (unlisted-only? (name->expressions "Custom"))) + (is (unlisted-only? (name->expressions "Cydeas Public License"))) + (is (unlisted-only? (name->expressions "Don't steal my stuff"))) + (is (unlisted-only? (name->expressions "Dropbox ToS"))) + (is (unlisted-only? (name->expressions "FIXME: choose"))) + (is (unlisted-only? (name->expressions "Firebase ToS"))) + (is (unlisted-only? (name->expressions "GG Public License"))) + (is (unlisted-only? (name->expressions "Google Maps ToS"))) + (is (unlisted-only? (name->expressions "GraphiQL license"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation Ltd"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation copyright"))) + (is (unlisted-only? (name->expressions "Heap ToS"))) + (is (unlisted-only? (name->expressions "Interel"))) + (is (unlisted-only? (name->expressions "JLGL Backend"))) + (is (unlisted-only? (name->expressions "Jedis License"))) + (is (unlisted-only? (name->expressions "Jiegao Owned"))) + (is (unlisted-only? (name->expressions "LICENSE"))) + (is (unlisted-only? (name->expressions "Libre Uso MX"))) + (is (unlisted-only? (name->expressions "License of respective package"))) + (is (unlisted-only? (name->expressions "License"))) + (is (unlisted-only? (name->expressions "Like Clojure."))) + (is (unlisted-only? (name->expressions "Mixed"))) + (is (unlisted-only? (name->expressions "Multiple"))) + (is (unlisted-only? (name->expressions "OTN License Agreement"))) + (is (unlisted-only? (name->expressions "Open Source Community License - Type C version 1.0"))) + (is (unlisted-only? (name->expressions "Other License"))) + (is (unlisted-only? (name->expressions "Provisdom"))) + (is (unlisted-only? (name->expressions "Research License 1.0"))) + (is (unlisted-only? (name->expressions "Restricted Distribution."))) + (is (unlisted-only? (name->expressions "SYNNEX China Owned"))) + (is (unlisted-only? (name->expressions "See the LICENSE file"))) + (is (unlisted-only? (name->expressions "Shen License"))) + (is (unlisted-only? (name->expressions "Slick2D License"))) + (is (unlisted-only? (name->expressions "Stripe ToS"))) + (is (unlisted-only? (name->expressions "TODO"))) + (is (unlisted-only? (name->expressions "TODO: Choose a license"))) + (is (unlisted-only? (name->expressions "The I Haven't Got Around To This Yet License"))) + (is (unlisted-only? (name->expressions "To ill!"))) + (is (unlisted-only? (name->expressions "UNLICENSED"))) + (is (unlisted-only? (name->expressions "University of Buffalo Public License"))) + (is (unlisted-only? (name->expressions "Unknown"))) + (is (unlisted-only? (name->expressions "VNETLPL - Limited Public License"))) + (is (unlisted-only? (name->expressions "VNet PL"))) + (is (unlisted-only? (name->expressions "Various"))) + (is (unlisted-only? (name->expressions "Vimeo License"))) + (is (unlisted-only? (name->expressions "WIP"))) + (is (unlisted-only? (name->expressions "YouTube ToS"))) + (is (unlisted-only? (name->expressions "avi license"))) + (is (unlisted-only? (name->expressions "esl-sdk-external-signer-verification"))) + (is (unlisted-only? (name->expressions "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet + (is (unlisted-only? (name->expressions "jank license"))) + (is (unlisted-only? (name->expressions "name"))) + (is (unlisted-only? (name->expressions "none"))) + (is (unlisted-only? (name->expressions "state-node license"))) + (is (unlisted-only? (name->expressions "trove"))) + (is (unlisted-only? (name->expressions "url"))) + (is (unlisted-only? (name->expressions "wisdragon"))) + (is (unlisted-only? (name->expressions "wiseloong"))))) +;####TEST!!!! (comment -; Note: these tests should be extended indefinitely, as it exercises the most-utilised part of the library (matching license names found in POMs) -(deftest name->ids-tests - (testing "Nil, empty or blank names" - (is (nil? (name->ids nil))) - (is (nil? (name->ids ""))) - (is (nil? (name->ids " "))) - (is (nil? (name->ids "\n"))) - (is (nil? (name->ids "\t")))) - (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (name->ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0-only"} (name->ids "GPL-2.0"))) - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPL-2.0-with-classpath-exception")))) - (testing "Names that are SPDX expressions" - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPL-2.0 WITH Classpath-exception-2.0"))) - (is (= #{"Apache-2.0" "GPL-3.0-only"} (name->ids "Apache-2.0 OR GPL-3.0"))) - (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0" "MIT" "BSD-3-Clause" "Apache-2.0"} - (name->ids "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) - (testing "Names, with an emphasis on those seen in POMs on Maven Central" - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (name->ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (name->ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (name->ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (name->ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (name->ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (name->ids "JSON License"))) - (is (= #{"LGPL-2.0-only"} (name->ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (name->ids "MIT License"))) - (is (= #{"MIT"} (name->ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (name->ids "The MIT License"))) - (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; This is used by JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (name->ids "BSD")))) - (testing "Names that appear in licensey things, but aren't in the SPDX license list" - (is (= #{(public-domain)} (name->ids "Public Domain"))) - (is (= #{(public-domain)} (name->ids "Public domain")))) - (testing "Distinct license names that appear in POMs on Clojars" ; synced from Clojars 2023-07-13 - (is (= #{"AFL-3.0"} (name->ids "Academic Free License 3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "AGPL v3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "AGPLv3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-only"} (name->ids "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU AGPLv3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License 3.0 (AGPL-3.0)"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License Version 3; Other commercial licenses available."))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License, Version 3"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License, version 3"))) - (is (= #{"AGPL-3.0-or-later"} (name->ids "AGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License v3 or later (at your option)"))) - (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License version 3 or lator"))) ; Typo in "lator" - (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License"))) - (is (= #{"AGPL-3.0-or-later"} (name->ids "Affero General Public License,"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU AGPL-V3 or later"))) - (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"AGPL-3.0-or-later"} (name->ids "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0" "EPL-2.0"} (name->ids "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0" "LLVM-exception"} (name->ids "Apache 2.0 with LLVM Exception"))) - (is (= #{"Apache-2.0"} (name->ids " Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) - (is (= #{"Apache-2.0"} (name->ids "APACHE LICENSE, VERSION 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "APACHE"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->ids "ASL 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "ASL"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->ids "Apache 2 License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2 Public License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2, see LICENSE"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2.0 License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Licence 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Licence"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache Licence, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License - v 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License - v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License V2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License V2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0, January 2004"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License v 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache License, 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0."))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, version 2."))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Public License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Public License v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Public License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache Public License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Public License, version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License - v 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software Licesne"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache Sofware Licencse 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Sofware License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache V2 License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache V2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache license version 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache license, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2 License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache"))) ; Listed license missing clause info - (is (= #{"Apache-2.0"} (name->ids "Apache, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache-2.0 License"))) - (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache2 License"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache 2 License"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "apache"))) ; Listed license missing version - we assume the latest - (is (= #{"Apache-2.0"} (name->ids "apache-2.0"))) - (is (= #{"Artistic-2.0" "GPL-3.0-only"} (name->ids "Artistic License/GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"Artistic-2.0"} (name->ids "Artistic License"))) ; Listed license missing version - we assume the latest - (is (= #{"Artistic-2.0"} (name->ids "Artistic-2.0"))) - (is (= #{"BSD-2-Clause"} (name->ids "2-Clause BSD License"))) - (is (= #{"BSD-2-Clause"} (name->ids "2-Clause BSD"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD (2 Clause)"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD (2-Clause)"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD (Type 2) Public License"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2 Clause"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2 clause license"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause Licence"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause License"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause \"Simplified\" License"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause license"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-Clause"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD 2-clause \"Simplified\" License"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD C2"))) - (is (= #{"BSD-2-Clause"} (name->ids "BSD-2-Clause"))) - (is (= #{"BSD-2-Clause"} (name->ids "New BSD 2-clause license"))) - (is (= #{"BSD-2-Clause"} (name->ids "Simplified BSD License"))) - (is (= #{"BSD-2-Clause"} (name->ids "Simplified BSD license"))) - (is (= #{"BSD-2-Clause"} (name->ids "The BSD 2-Clause License"))) - (is (= #{"BSD-2-Clause"} (name->ids "Two clause BSD license"))) - (is (= #{"BSD-2-Clause-FreeBSD"} (name->ids "FreeBSD License"))) - (is (= #{"BSD-3-Clause" "MIT"} (name->ids "New-BSD / MIT"))) - (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD"))) - (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) - (is (= #{"BSD-3-Clause"} (name->ids "3-clause BSD license"))) - (is (= #{"BSD-3-Clause"} (name->ids "3-clause license (New BSD License or Modified BSD License)"))) - (is (= #{"BSD-3-Clause"} (name->ids "Aduna BSD license"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3 Clause"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause 'New' or 'Revised' License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause \"New\" or \"Revised\" License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause license"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause license"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-clause"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD New, Version 3.0"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD-3"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD-3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->ids "Modified BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "New BSD License or Modified BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "New BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "New BSD license"))) - (is (= #{"BSD-3-Clause"} (name->ids "Revised BSD"))) - (is (= #{"BSD-3-Clause"} (name->ids "The 3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The New BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The New BSD license"))) - (is (= #{"BSD-3-Clause"} (name->ids "Three Clause BSD-like License"))) -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 -; (is (= #{"BSD-3-Clause"} (name->ids "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 - (is (= #{"BSD-3-Clause"} (name->ids "https://opensource.org/licenses/BSD-3-Clause"))) - (is (= #{"BSD-3-Clause"} (name->ids "new BSD License"))) - (is (= #{"BSD-4-Clause"} (name->ids "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->ids "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->ids "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->ids "BSD"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->ids "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) - (is (= #{"BSD-4-Clause"} (name->ids "The BSD License"))) - (is (= #{"BSL-1.0"} (name->ids "Boost Software License - Version 1.0"))) - (is (= #{"Beerware"} (name->ids "Beerware 42"))) - (is (= #{"Beerware"} (name->ids "THE BEER-WARE LICENSE"))) - (is (= #{"CC-BY-2.5"} (name->ids "Creative Commons Attribution 2.5 License"))) - (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons 3.0"))) - (is (= #{"CC-BY-4.0"} (name->ids "CC Attribution 4.0 International with exception for binary distribution"))) - (is (= #{"CC-BY-4.0"} (name->ids "CC-BY-4.0"))) - (is (= #{"CC-BY-4.0"} (name->ids "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest - (is (= #{"CC-BY-NC-3.0"} (name->ids "Creative Commons Attribution-NonCommercial 3.0"))) - (is (= #{"CC-BY-NC-4.0"} (name->ids "CC BY-NC"))) ; Listed license missing version - we assume the latest - (is (= #{"CC-BY-NC-ND-3.0"} (name->ids "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0 Unported"))) - (is (= #{"CC-BY-SA-3.0"} (name->ids "Creative Commons Attribution-ShareAlike 3.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "CC BY-SA 4.0"))) - (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) - (is (= #{"CC0-1.0"} (name->ids "CC0 1.0 Universal"))) - (is (= #{"CC0-1.0"} (name->ids "CC0"))) - (is (= #{"CC0-1.0"} (name->ids "Public domain (CC0)"))) - (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info - (is (= #{"CDDL-1.1"} (name->ids "Common Development and Distribution License"))) ; Listed license missing clause info - (is (= #{"CECILL-2.1"} (name->ids "CeCILL License"))) ; Listed license missing version - we assume the latest - (is (= #{"CPL-1.0"} (name->ids "Common Public License - v 1.0"))) - (is (= #{"CPL-1.0"} (name->ids "Common Public License Version 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "EPL 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "EPL-1.0"))) - (is (= #{"EPL-1.0"} (name->ids "EPL-v1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL) - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - Version 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0 (EPL-1.0)"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License v1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License version 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, version 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public Licese - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "https://github.com/cmiles74/uio/blob/master/LICENSE"))) - (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest - (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"EPL-2.0" "GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! - (is (= #{"EPL-2.0" "GPL-2.0-or-later"} (name->ids "EPL-2.0 OR GPL-2.0-or-later"))) - (is (= #{"EPL-2.0" "GPL-3.0-or-later" "Classpath-exception-2.0"} (name->ids "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"EPL-2.0" "GPL-3.0-or-later"} (name->ids "EPL-2.0 OR GPL-3.0-or-later"))) - (is (= #{"EPL-2.0" "LGPL-3.0-or-later"} (name->ids "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0" "MIT"} (name->ids "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (= #{"EPL-2.0"} (name->ids "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) - (is (= #{"EPL-2.0"} (name->ids "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "EPL"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "EPL-2.0"))) - (is (= #{"EPL-2.0"} (name->ids "EPLv2"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License - v 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0,"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License v2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License, v. 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License, v2"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Eclipse public license, the same as Clojure"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse"))) ; Listed license missing version - we assume the latest - (is (= #{"EPL-2.0"} (name->ids "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"EUPL-1.1"} (name->ids "European Union Public Licence (EUPL v.1.1)"))) - (is (= #{"EUPL-1.1"} (name->ids "The European Union Public License, Version 1.1"))) - (is (= #{"EUPL-1.2"} (name->ids "European Union Public Licence v. 1.2"))) - (is (= #{"EUPL-1.2"} (name->ids "European Union Public License 1.2 or later"))) - (is (= #{"EUPL-1.2"} (name->ids "European Union Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GNU General Public License, Version 2, with the Classpath Exception"))) - (is (= #{"GPL-2.0-only" "Classpath-exception-2.0"} (name->ids "GPLv2 with Classpath exception"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License 2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License v2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, Version 2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, Version 2.0"))) - (is (= #{"GPL-2.0-only"} (name->ids "GNU Public License, v2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GPL v2"))) - (is (= #{"GPL-2.0-only"} (name->ids "GPL-2.0"))) - (is (= #{"GPL-2.0-only"} (name->ids "GPLv2"))) - (is (= #{"GPL-2.0-only"} (name->ids "The GNU General Public License, Version 2"))) - (is (= #{"GPL-2.0-or-later" "Classpath-exception-2.0"} (name->ids "GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (= #{"GPL-2.0-or-later"} (name->ids "GNU GPL V2+"))) - (is (= #{"GPL-2.0-or-later"} (name->ids "GPL 2.0+"))) - (is (= #{"GPL-2.0-or-later"} (name->ids "GPL v2+ or Swiss Ephemeris"))) ; ####TODO: THINK MORE ABOUT THIS - (is (= #{"GPL-3.0-only"} (name->ids " GNU GENERAL PUBLIC LICENSE Version 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v 3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v. 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL v3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU GPL, version 3, 29 June 2007"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License V3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License Version 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License v3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License v3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, Version 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, version 3 (GPLv3)"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU General Public License, version 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU Public License V. 3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU Public License V3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNU public licence V3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GNUv3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL 3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL V3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL v3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL version 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL-3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL-3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL-3.0-only"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPL3"))) - (is (= #{"GPL-3.0-only"} (name->ids "GPLv3"))) - (is (= #{"GPL-3.0-only"} (name->ids "General Public License 3"))) - (is (= #{"GPL-3.0-only"} (name->ids "General Public License v3.0"))) - (is (= #{"GPL-3.0-only"} (name->ids "The GNU General Public License v3.0"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPL v3+"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU GPLv3+"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License v3.0 or later"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License, Version 3 (or later)"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU General Public License,version 2.0 or (at your option) any later version"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GNU"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "GPL V3+"))) - (is (= #{"GPL-3.0-or-later"} (name->ids "GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"GPL-3.0-or-later"} (name->ids "The GNU General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"Hippocratic-2.1"} (name->ids "Hippocratic License"))) - (is (= #{"ISC" "Classpath-exception-2.0"} (name->ids "ISC WITH Classpath-exception-2.0"))) - (is (= #{"ISC"} (name->ids "ISC Licence"))) - (is (= #{"ISC"} (name->ids "ISC License"))) - (is (= #{"ISC"} (name->ids "ISC"))) - (is (= #{"ISC"} (name->ids "MIT/ISC License"))) - (is (= #{"ISC"} (name->ids "MIT/ISC"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU LGPL v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Public License, Version 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Lesser General Pulic License v2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Library or Lesser General Public License (LGPL) 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "GNU Library or Lesser General Public License (LGPL) V2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "LGPL 2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "LGPL-2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "LGPL-2.1-only"))) - (is (= #{"LGPL-2.1-only"} (name->ids "LGPLv2.1"))) - (is (= #{"LGPL-2.1-only"} (name->ids "lgpl_v2_1"))) - (is (= #{"LGPL-2.1-or-later"} (name->ids "GNU Lesser General Public License, version 2.1 or newer"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU General Lesser Public License (LGPL) version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL v3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL version 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPL-3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU LGPLv3 "))) ; Note trailing space - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public Licence 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License (LGPL) Version 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License - v3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License v3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License version 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License version 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser General Public License, Version 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "L GPL 3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL 3.0 (GNU Lesser General Public License)"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL 3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL Open Source license"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL v3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL-3.0"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPL-3.0-only"))) - (is (= #{"LGPL-3.0-only"} (name->ids "LGPLv3"))) - (is (= #{"LGPL-3.0-only"} (name->ids "Lesser GPL"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-only"} (name->ids "Lesser General Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, Version 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, v. 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, version 3 or later"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPL-3.0-or-later"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "LGPLv3+"))) - (is (= #{"LGPL-3.0-or-later"} (name->ids "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space - (is (= #{"Libpng"} (name->ids "zlib/libpng License"))) - (is (= #{(public-domain)} (name->ids "Public Domain"))) - (is (= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->ids "MIT/Apache-2.0/BSD-3-Clause"))) - (is (= #{"MIT"} (name->ids " MIT License"))) - (is (= #{"MIT"} (name->ids "Distributed under an MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT" (proprietary-or-commercial)} (name->ids "Dual MIT & Proprietary"))) - (is (= #{"MIT"} (name->ids "Expat (MIT) license"))) - (is (= #{"MIT"} (name->ids "MIT LICENSE"))) - (is (= #{"MIT"} (name->ids "MIT Licence"))) - (is (= #{"MIT"} (name->ids "MIT Licens"))) - (is (= #{"MIT"} (name->ids "MIT License (MIT)"))) - (is (= #{"MIT"} (name->ids "MIT License"))) - (is (= #{"MIT"} (name->ids "MIT Public License"))) - (is (= #{"MIT"} (name->ids "MIT license"))) - (is (= #{"MIT"} (name->ids "MIT public License"))) - (is (= #{"MIT"} (name->ids "MIT public license"))) - (is (= #{"MIT"} (name->ids "MIT"))) - (is (= #{"MIT"} (name->ids "MIT-style license (see LICENSE for details)."))) - (is (= #{"MIT"} (name->ids "THE MIT LICENSE"))) - (is (= #{"MIT"} (name->ids "The MIT Licence"))) - (is (= #{"MIT"} (name->ids "The MIT License (MIT) "))) ; Note trailing space - (is (= #{"MIT"} (name->ids "The MIT License (MIT) | Open Source Initiative"))) - (is (= #{"MIT"} (name->ids "The MIT License (MIT)"))) - (is (= #{"MIT"} (name->ids "The MIT License"))) - (is (= #{"MIT"} (name->ids "The MIT License."))) - (is (= #{"MIT"} (name->ids "http://opensource.org/licenses/MIT"))) -; (is (= #{"MIT"} (name->ids "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 - (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License Version 1.0"))) - (is (= #{"MPL-1.1"} (name->ids "Mozilla Public License Version 1.1"))) - (is (= #{"MPL-2.0"} (name->ids "MPL 2"))) - (is (= #{"MPL-2.0"} (name->ids "MPL 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "MPL v2"))) - (is (= #{"MPL-2.0"} (name->ids "MPL"))) ; Listed license missing version - we assume the latest - (is (= #{"MPL-2.0"} (name->ids "MPL-2.0"))) - (is (= #{"MPL-2.0"} (name->ids "MPL-v2.0"))) - (is (= #{"MPL-2.0"} (name->ids "MPL2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public Licence 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License (Version 2.0)"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License v2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License v2.0+"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License version 2"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License version 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License"))) ; Listed license missing version - we assume the latest - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License, v. 2.0"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License, version 2.0"))) - (is (= #{"NASA-1.3"} (name->ids "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) - (is (= #{"NASA-1.3"} (name->ids "NASA Open Source Agreement, Version 1.3"))) - (is (= #{"NCSA"} (name->ids "University of Illinois/NCSA Open Source License"))) - (is (= #{"Ruby"} (name->ids "Ruby License"))) - (is (= #{"SGI-B-2.0"} (name->ids "SGI"))) ; Listed license missing version - we assume the latest - (is (= #{"SMPPL"} (name->ids "SMPPL"))) - (is (= #{"Unlicense"} (name->ids "The UnLicense"))) - (is (= #{"Unlicense"} (name->ids "The Unlicence"))) - (is (= #{"Unlicense"} (name->ids "The Unlicense"))) - (is (= #{"Unlicense"} (name->ids "UnLicense"))) - (is (= #{"Unlicense"} (name->ids "Unlicense License"))) - (is (= #{"Unlicense"} (name->ids "Unlicense"))) - (is (= #{"Unlicense"} (name->ids "unlicense"))) - (is (= #{"W3C"} (name->ids "W3C Software license"))) - (is (= #{"WTFPL"} (name->ids "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) - (is (= #{"WTFPL"} (name->ids "DO-WTF-U-WANT-2"))) - (is (= #{"WTFPL"} (name->ids "Do What The Fuck You Want To Public License"))) - (is (= #{"WTFPL"} (name->ids "Do What The Fuck You Want To Public License, Version 2"))) - (is (= #{"WTFPL"} (name->ids "WTFPL v2"))) - (is (= #{"WTFPL"} (name->ids "WTFPL – Do What the Fuck You Want to Public License"))) - (is (= #{"WTFPL"} (name->ids "WTFPL"))) - (is (= #{"X11"} (name->ids "MIT X11 License"))) - (is (= #{"X11"} (name->ids "MIT/X11"))) - (is (= #{"Zlib"} (name->ids "Zlib License"))) - (is (= #{"Zlib"} (name->ids "zlib License"))) - (is (= #{"Zlib"} (name->ids "zlib license"))) - (is (unlisted-only? (name->ids "${license.id}"))) - (is (unlisted-only? (name->ids "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (unlisted-only? (name->ids "A Clojure library for Google Cloud Pub/Sub."))) - (is (unlisted-only? (name->ids "APGL"))) ; Probable typo - (is (= #{(proprietary-or-commercial)} (name->ids "All Rights Reserved"))) - (is (= #{(proprietary-or-commercial)} (name->ids "All rights reserved"))) - (is (unlisted-only? (name->ids "Amazon Software License"))) - (is (unlisted-only? (name->ids "BankersBox License"))) - (is (unlisted-only? (name->ids "Bespoke"))) - (is (unlisted-only? (name->ids "Bloomberg Open API"))) - (is (unlisted-only? (name->ids "Bostock"))) - (is (unlisted-only? (name->ids "Built In Project License"))) - (is (unlisted-only? (name->ids "CRAPL License"))) - (is (unlisted-only? (name->ids "Contact JMonkeyEngine forums for license details"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Copyright & all rights reserved Lean Pixel"))) - (is (unlisted-only? (name->ids "Copyright (C) 2015 by Glowbox LLC"))) - (is (unlisted-only? (name->ids "Copyright (c) 2011 Drew Colthorp"))) - (is (unlisted-only? (name->ids "Copyright (c) 2017, Lingchao Xin"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Copyright 2013 The Fresh Diet. All rights reserved."))) - (is (unlisted-only? (name->ids "Copyright 2016, klaraHealth, Inc."))) - (is (= #{(proprietary-or-commercial)} (name->ids "Copyright 2017 All Rights Reserved"))) - (is (unlisted-only? (name->ids "Copyright 2017 Zensight"))) - (is (unlisted-only? (name->ids "Copyright 4A Volcano. 2015."))) - (is (unlisted-only? (name->ids "Copyright Ona Systems Inc."))) - (is (unlisted-only? (name->ids "Copyright meissa GmbH"))) - (is (unlisted-only? (name->ids "Copyright © SparX 2014"))) - (is (unlisted-only? (name->ids "Copyright"))) - (is (unlisted-only? (name->ids "Custom"))) - (is (unlisted-only? (name->ids "Cydeas Public License"))) - (is (unlisted-only? (name->ids "Don't steal my stuff"))) - (is (unlisted-only? (name->ids "Dropbox ToS"))) - (is (unlisted-only? (name->ids "FIXME: choose"))) - (is (unlisted-only? (name->ids "Firebase ToS"))) - (is (unlisted-only? (name->ids "GG Public License"))) - (is (unlisted-only? (name->ids "Google Maps ToS"))) - (is (unlisted-only? (name->ids "GraphiQL license"))) - (is (unlisted-only? (name->ids "Hackthorn Innovation Ltd"))) - (is (unlisted-only? (name->ids "Hackthorn Innovation copyright"))) - (is (unlisted-only? (name->ids "Heap ToS"))) - (is (unlisted-only? (name->ids "Interel"))) - (is (unlisted-only? (name->ids "JLGL Backend"))) - (is (unlisted-only? (name->ids "Jedis License"))) - (is (unlisted-only? (name->ids "Jiegao Owned"))) - (is (unlisted-only? (name->ids "LICENSE"))) - (is (unlisted-only? (name->ids "Libre Uso MX"))) - (is (unlisted-only? (name->ids "License of respective package"))) - (is (unlisted-only? (name->ids "License"))) - (is (unlisted-only? (name->ids "Like Clojure."))) - (is (unlisted-only? (name->ids "Mixed"))) - (is (unlisted-only? (name->ids "Multiple"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Not fit for public use so formally proprietary software - this is not open-source"))) - (is (unlisted-only? (name->ids "OTN License Agreement"))) - (is (unlisted-only? (name->ids "Open Source Community License - Type C version 1.0"))) - (is (unlisted-only? (name->ids "Other License"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Private License"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Private"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Proprietary License"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Proprietary"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) - (is (unlisted-only? (name->ids "Provisdom"))) - (is (unlisted-only? (name->ids "Research License 1.0"))) - (is (unlisted-only? (name->ids "Restricted Distribution."))) - (is (unlisted-only? (name->ids "SYNNEX China Owned"))) - (is (unlisted-only? (name->ids "See the LICENSE file"))) - (is (unlisted-only? (name->ids "Shen License"))) - (is (unlisted-only? (name->ids "Slick2D License"))) - (is (unlisted-only? (name->ids "Stripe ToS"))) - (is (unlisted-only? (name->ids "TODO"))) - (is (unlisted-only? (name->ids "TODO: Choose a license"))) - (is (unlisted-only? (name->ids "The I Haven't Got Around To This Yet License"))) - (is (unlisted-only? (name->ids "To ill!"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Tulos Commercial License"))) - (is (unlisted-only? (name->ids "UNLICENSED"))) - (is (unlisted-only? (name->ids "University of Buffalo Public License"))) - (is (unlisted-only? (name->ids "Unknown"))) - (is (unlisted-only? (name->ids "VNETLPL - Limited Public License"))) - (is (unlisted-only? (name->ids "VNet PL"))) - (is (unlisted-only? (name->ids "Various"))) - (is (unlisted-only? (name->ids "Vimeo License"))) - (is (unlisted-only? (name->ids "WIP"))) - (is (= #{(proprietary-or-commercial)} (name->ids "Wildbit Proprietary License"))) - (is (unlisted-only? (name->ids "YouTube ToS"))) - (is (unlisted-only? (name->ids "avi license"))) - (is (unlisted-only? (name->ids "esl-sdk-external-signer-verification"))) - (is (unlisted-only? (name->ids "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet - (is (unlisted-only? (name->ids "jank license"))) - (is (unlisted-only? (name->ids "name"))) - (is (unlisted-only? (name->ids "none"))) - (is (= #{(proprietary-or-commercial)} (name->ids "proprietary"))) - (is (unlisted-only? (name->ids "state-node license"))) - (is (unlisted-only? (name->ids "trove"))) - (is (unlisted-only? (name->ids "url"))) - (is (unlisted-only? (name->ids "wisdragon"))) - (is (unlisted-only? (name->ids "wiseloong"))))) - (deftest uri->ids-tests (testing "Nil, empty or blank uri" (is (nil? (uri->ids nil))) @@ -1367,6 +740,4 @@ (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) - - -) +) \ No newline at end of file diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index f78ca49..900e6d4 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -19,40 +19,40 @@ (ns lice-comb.maven-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.maven :refer [pom->ids]])) + [lice-comb.maven :refer [pom->expressions]])) (use-fixtures :once fixture) (def test-data-path "./test/lice_comb/data") -(deftest pom->ids-tests +(deftest pom->expressions-tests (testing "Nil pom" - (is (nil? (pom->ids nil)))) + (is (nil? (pom->expressions nil)))) (testing "Invalid filenames" - (is (thrown? java.io.FileNotFoundException (pom->ids ""))) - (is (thrown? java.io.FileNotFoundException (pom->ids " "))) - (is (thrown? java.io.FileNotFoundException (pom->ids "\t"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "this-file-doesnt-exist.pom"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "./this/path/and/file/doesnt/exist.pom")))) + (is (thrown? java.io.FileNotFoundException (pom->expressions ""))) + (is (thrown? java.io.FileNotFoundException (pom->expressions " "))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\t"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "this-file-doesnt-exist.pom"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "./this/path/and/file/doesnt/exist.pom")))) (testing "Synthetic pom files" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (pom->ids (str test-data-path "/no-xml-ns.pom"))))) + (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/simple.pom")))) + (is (= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom"))))) (testing "Real pom files - local" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/asf-cat-1.0.12.pom"))))) + (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" - (is (= #{"Apache-2.0"} (pom->ids "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) - (is (= #{"LicenseRef-lice-comb-public-domain"} (pom->ids "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX - (is (= #{"EPL-1.0"} (pom->ids "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) - (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) - (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) - (is (nil? (pom->ids "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom - (is (= #{"CDDL-1.0"} (pom->ids "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) - (is (= #{"Plexus"} (pom->ids "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (is (= #{"GPL-3.0"} (pom->ids "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) + (is (= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) + (is (= #{"LicenseRef-lice-comb-public-domain"} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX + (is (= #{"EPL-1.0"} (pom->expressions "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) + (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) + (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) + (is (nil? (pom->expressions "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom + (is (= #{"CDDL-1.0"} (pom->expressions "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) + (is (= #{"Plexus"} (pom->expressions "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (= #{"GPL-3.0"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) (testing "Real pom files - remote - dual-licensed" - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (pom->ids "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) + (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) (testing "Synthetic pom files with licenses in parent - local" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/with-parent.pom"))))) + (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/with-parent.pom"))))) (testing "Real pom files with licenses in parent - remote" - (is (= #{"Apache-2.0"} (pom->ids "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) + (is (= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) From 5e5aba823ce97872af7fa49ded86aad9c3fed84a Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 1 Sep 2023 17:06:30 -0700 Subject: [PATCH 15/34] :construction: Ongoing work on issue #3 --- src/lice_comb/impl/matching.clj | 39 ++++++++++++++++++++ src/lice_comb/maven.clj | 10 +++--- test/lice_comb/deps_test.clj | 62 ++++++++++++++++---------------- test/lice_comb/files_test.clj | 6 ++-- test/lice_comb/matching_test.clj | 5 ++- test/lice_comb/maven_test.clj | 7 ++-- 6 files changed, 86 insertions(+), 43 deletions(-) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index e668146..bcda9f4 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -34,6 +34,34 @@ :redirect-policy :always :cookie-policy :none}))) +(def ^:private gpl-ids-with-only-or-later #{"AGPL-1.0" + "AGPL-3.0" + "GFDL-1.1" + "GFDL-1.2" + "GFDL-1.3" + "GPL-1.0" + "GPL-2.0" + "GPL-3.0" + "LGPL-2.0" + "LGPL-2.1" + "LGPL-3.0"}) + +(defn- fix-gpl-only-or-later + "If the set of ids includes both an 'only' and an 'or-later' variant of the + same underlying GNU family identifier, remove the 'only' variant." + [ids] + (loop [result ids + f (first gpl-ids-with-only-or-later) + r (rest gpl-ids-with-only-or-later)] + (if f + (recur (if (and (contains? result (str f "-only")) + (contains? result (str f "-or-later"))) + (disj result (str f "-only")) + result) + (first r) + (rest r)) + result))) + (defn- fix-public-domain-cc0 "If the set of ids includes both CC0-1.0 and lice-comb's public domain LicenseRef, remove the LicenseRef as it's redundant." @@ -43,12 +71,23 @@ (disj ids (lcis/public-domain)) ids)) +(defn- fix-mpl-2 + "If the set of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, + remove the MPL-2.0-no-copyleft-exception as it's redundant." + [ids] + (if (and (contains? ids "MPL-2.0") + (contains? ids "MPL-2.0-no-copyleft-exception")) + (disj ids "MPL-2.0-no-copyleft-exception") + ids)) + (defn manual-fixes "Manually fix certain invalid combinations of license identifiers in a set." [ids] (when ids (some-> ids + fix-gpl-only-or-later fix-public-domain-cc0 + fix-mpl-2 set))) (defmulti text->ids diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 07bbc28..78eec06 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -77,11 +77,11 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." [{:keys [name url]}] - ; Attempt to find a match by URL first - (if-let [licenses (lcmtch/uri->ids url)] - licenses - ; Then match by name - (lcmtch/name->expressions name))) + ; Attempt to find a match from the name first + (if-let [expressions (lcmtch/name->expressions name)] + expressions + ; Then match by url + (lcmtch/uri->ids url))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index 14102b9..bc213cb 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -58,33 +58,33 @@ (is (= #{"EPL-1.0"} (dep->expressions ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) (is (= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) (is (= #{"Apache-2.0"} (dep->expressions ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) @@ -131,11 +131,11 @@ (is (= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) (is (= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) + (is (= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) (is (= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) (testing "Valid deps - Maven classifiers" ; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 (is (= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index 046cf2b..1f06280 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -82,8 +82,8 @@ (testing "Non-existent files" (is (thrown? java.io.FileNotFoundException (file->expressions "this_file_does_not_exist")))) (testing "Files on disk" - (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 - (is (= #{"MPL-2.0" "MPL-2.0-no-copyleft-exception"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) + (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (= #{"MPL-2.0"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) (testing "URLs" (is (= #{"Apache-2.0"} (file->expressions "https://www.apache.org/licenses/LICENSE-2.0.txt"))) (is (= #{"Apache-2.0"} (file->expressions (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) @@ -107,7 +107,7 @@ (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "MPL-2.0-no-copyleft-exception" "CC-BY-4.0"} (dir->expressions "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->expressions "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 (deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index ee157a3..4a6fbf3 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -50,11 +50,14 @@ (is (true? (every? false? (map unlisted? (sl/ids))))) (is (true? (every? false? (map unlisted? (se/ids))))))) +(def not-nil? (complement nil?)) + (defn valid= "Returns true if all of the SPDX exceptions in s2 are valid, and also that s1 equals s2." [s1 s2] - (and (set? s2) + (and ;(not-nil? (meta s2)) ;####TODO: THIS MAY CONFUSE THINGS!!!! + (set? s2) (= s1 s2) (every? true? (map sexp/valid? s2)))) diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index 900e6d4..526b458 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -19,6 +19,7 @@ (ns lice-comb.maven-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.spdx :as lcis] [lice-comb.maven :refer [pom->expressions]])) (use-fixtures :once fixture) @@ -42,16 +43,16 @@ (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" (is (= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) - (is (= #{"LicenseRef-lice-comb-public-domain"} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX + (is (= #{(lcis/public-domain)} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX (is (= #{"EPL-1.0"} (pom->expressions "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) (is (nil? (pom->expressions "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom (is (= #{"CDDL-1.0"} (pom->expressions "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) (is (= #{"Plexus"} (pom->expressions "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (is (= #{"GPL-3.0"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) + (is (= #{"GPL-3.0-only"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) (testing "Real pom files - remote - dual-licensed" - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) + (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) (testing "Synthetic pom files with licenses in parent - local" (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/with-parent.pom"))))) (testing "Real pom files with licenses in parent - remote" From a7aa436656dbc718da49af6db96b832aa70028d0 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 1 Sep 2023 18:04:58 -0700 Subject: [PATCH 16/34] :construction: Ongoing work on issue #3 --- resources/lice_comb/deps/fallbacks.edn | 6 +- resources/lice_comb/deps/overrides.edn | 2 +- src/lice_comb/impl/matching.clj | 13 ++ test/lice_comb/deps_test.clj | 208 ++++++++++++------------- test/lice_comb/files_test.clj | 25 +-- test/lice_comb/matching_test.clj | 16 +- test/lice_comb/maven_test.clj | 44 +++--- test/lice_comb/test_boilerplate.clj | 23 ++- 8 files changed, 180 insertions(+), 157 deletions(-) diff --git a/resources/lice_comb/deps/fallbacks.edn b/resources/lice_comb/deps/fallbacks.edn index 2f61dea..e4f612a 100644 --- a/resources/lice_comb/deps/fallbacks.edn +++ b/resources/lice_comb/deps/fallbacks.edn @@ -1,5 +1,5 @@ { - borkdude/sci.impl.reflector {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/borkdude/sci/blob/master/LICENSE"} - org.ow2.asm/asm {:spdx true :licenses #{"BSD-3-Clause"} :evidence "https://asm.ow2.io/license.html"} - slipset/deps-deploy {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/slipset/deps-deploy/blob/master/LICENSE"} +; borkdude/sci.impl.reflector {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/borkdude/sci/blob/master/LICENSE"} +; org.ow2.asm/asm {:spdx true :licenses #{"BSD-3-Clause"} :evidence "https://asm.ow2.io/license.html"} +; slipset/deps-deploy {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/slipset/deps-deploy/blob/master/LICENSE"} } \ No newline at end of file diff --git a/resources/lice_comb/deps/overrides.edn b/resources/lice_comb/deps/overrides.edn index f7052bc..6784832 100644 --- a/resources/lice_comb/deps/overrides.edn +++ b/resources/lice_comb/deps/overrides.edn @@ -1,3 +1,3 @@ { - javax.mail/mail {:spdx true :licenses #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} :evidence "https://javaee.github.io/javamail/JavaMail-License"} +; javax.mail/mail {:spdx true :licenses #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} :evidence "https://javaee.github.io/javamail/JavaMail-License"} } \ No newline at end of file diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index bcda9f4..9ac7720 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -34,6 +34,18 @@ :redirect-policy :always :cookie-policy :none}))) +(def ^:private direct-replacements-map { + #{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"} + #{"GPL-2.0-or-later" "Classpath-exception-2.0"} #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} + #{"GPL-3.0-only" "Classpath-exception-2.0"} #{"GPL-3.0-only WITH Classpath-exception-2.0"} + #{"GPL-3.0-or-later" "Classpath-exception-2.0"} #{"GPL-3.0-or-later WITH Classpath-exception-2.0"} + }) + +(defn- direct-replacements + "Self-evident direct replacements." + [ids] + (get direct-replacements-map ids ids)) + (def ^:private gpl-ids-with-only-or-later #{"AGPL-1.0" "AGPL-3.0" "GFDL-1.1" @@ -85,6 +97,7 @@ [ids] (when ids (some-> ids + direct-replacements fix-gpl-only-or-later fix-public-domain-cc0 fix-mpl-2 diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index bc213cb..dbf27bf 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -18,7 +18,7 @@ (ns lice-comb.deps-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.test-boilerplate :refer [fixture valid=]] [lice-comb.impl.spdx :as lcis] [lice-comb.deps :refer [dep->expressions deps-expressions]])) @@ -35,120 +35,120 @@ (is (nil? (dep->expressions ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA (is (nil? (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V (testing "Valid deps - single license" - (is (= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) - (is (= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) - (is (= #{"CDDL-1.0"} (dep->expressions ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) - (is (= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->expressions ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) - (is (= #{"MIT"} (dep->expressions ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) - (is (= #{"Plexus"} (dep->expressions ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"MIT"} (dep->expressions ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) - (is (= #{(lcis/public-domain)} (dep->expressions ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) - (is (= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) + (is (valid= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) + (is (valid= #{"CDDL-1.0"} (dep->expressions ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) + (is (valid= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) + (is (valid= #{"Plexus"} (dep->expressions ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) + (is (valid= #{(lcis/public-domain)} (dep->expressions ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" - (is (= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) - (is (= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" - (is (= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) - (is (= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) + (is (valid= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) + (is (valid= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) (testing "Valid deps - Maven classifiers" ; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) (deftest deps-expressions-test (testing "Nil and empty deps" (is (nil? (deps-expressions nil))) (is (= {} (deps-expressions {})))) (testing "Single deps" - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run + (is (valid= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) + (is (valid= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run ; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}})))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) + (is (valid= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) (is (= (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi)) (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native))))) (testing "Multiple deps" diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index 1f06280..aa51f50 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -19,7 +19,7 @@ (ns lice-comb.files-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] - [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.test-boilerplate :refer [fixture valid=]] [lice-comb.files :refer [probable-license-file? probable-license-files file->expressions dir->expressions zip->expressions]])) (use-fixtures :once fixture) @@ -82,19 +82,19 @@ (testing "Non-existent files" (is (thrown? java.io.FileNotFoundException (file->expressions "this_file_does_not_exist")))) (testing "Files on disk" - (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 - (is (= #{"MPL-2.0"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) +; (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (valid= #{"MPL-2.0"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) (testing "URLs" - (is (= #{"Apache-2.0"} (file->expressions "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (file->expressions (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (is (valid= #{"Apache-2.0"} (file->expressions "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (valid= #{"Apache-2.0"} (file->expressions (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) (testing "InputStreams" (is (thrown? clojure.lang.ExceptionInfo (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is)))) - (is (= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is "LICENSE_2.0.txt"))))) + (is (valid= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is "LICENSE_2.0.txt"))))) (testing "POM files" - (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (file->expressions (str test-data-path "/no-xml-ns.pom")))) - (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/asf-cat-1.0.12.pom")))) - (is (= #{"Apache-2.0"} (file->expressions (str test-data-path "/with-parent.pom")))))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/simple.pom")))) + (is (valid= #{"BSD-3-Clause"} (file->expressions (str test-data-path "/no-xml-ns.pom")))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/asf-cat-1.0.12.pom")))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/with-parent.pom")))))) (deftest dir->expressions-tests (testing "Nil, empty, or blank directory name" @@ -107,7 +107,8 @@ (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->expressions "."))))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 +; (is (valid= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->expressions "."))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 +)) (deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" @@ -121,5 +122,5 @@ (testing "Invalid zip file" (is (thrown? java.util.zip.ZipException (zip->expressions (str test-data-path "/bad.zip"))))) (testing "Valid zip file" - (is (= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))))) + (is (valid= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))))) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 4a6fbf3..d13c102 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -18,12 +18,11 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.test-boilerplate :refer [fixture valid=]] [lice-comb.impl.spdx :as lcis] [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions uri->ids]] [spdx.licenses :as sl] - [spdx.exceptions :as se] - [spdx.expressions :as sexp])) + [spdx.exceptions :as se])) (use-fixtures :once fixture) @@ -50,17 +49,6 @@ (is (true? (every? false? (map unlisted? (sl/ids))))) (is (true? (every? false? (map unlisted? (se/ids))))))) -(def not-nil? (complement nil?)) - -(defn valid= - "Returns true if all of the SPDX exceptions in s2 are valid, and also - that s1 equals s2." - [s1 s2] - (and ;(not-nil? (meta s2)) ;####TODO: THIS MAY CONFUSE THINGS!!!! - (set? s2) - (= s1 s2) - (every? true? (map sexp/valid? s2)))) - (deftest name->expressions-tests (testing "Nil, empty or blank" (is (nil? (name->expressions nil))) diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index 526b458..7b5d2cc 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -18,7 +18,7 @@ (ns lice-comb.maven-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.test-boilerplate :refer [fixture valid=]] [lice-comb.impl.spdx :as lcis] [lice-comb.maven :refer [pom->expressions]])) @@ -30,30 +30,30 @@ (testing "Nil pom" (is (nil? (pom->expressions nil)))) (testing "Invalid filenames" - (is (thrown? java.io.FileNotFoundException (pom->expressions ""))) - (is (thrown? java.io.FileNotFoundException (pom->expressions " "))) - (is (thrown? java.io.FileNotFoundException (pom->expressions "\t"))) - (is (thrown? java.io.FileNotFoundException (pom->expressions "\n"))) - (is (thrown? java.io.FileNotFoundException (pom->expressions "this-file-doesnt-exist.pom"))) - (is (thrown? java.io.FileNotFoundException (pom->expressions "./this/path/and/file/doesnt/exist.pom")))) + (is (thrown? java.io.FileNotFoundException (pom->expressions ""))) + (is (thrown? java.io.FileNotFoundException (pom->expressions " "))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\t"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "this-file-doesnt-exist.pom"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "./this/path/and/file/doesnt/exist.pom")))) (testing "Synthetic pom files" - (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/simple.pom")))) + (is (valid= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom"))))) (testing "Real pom files - local" - (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" - (is (= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) - (is (= #{(lcis/public-domain)} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX - (is (= #{"EPL-1.0"} (pom->expressions "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) - (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) - (is (= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) - (is (nil? (pom->expressions "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom - (is (= #{"CDDL-1.0"} (pom->expressions "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) - (is (= #{"Plexus"} (pom->expressions "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (is (= #{"GPL-3.0-only"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) + (is (valid= #{(lcis/public-domain)} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX + (is (valid= #{"EPL-1.0"} (pom->expressions "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) + (is (nil? (pom->expressions "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom + (is (valid= #{"CDDL-1.0"} (pom->expressions "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) + (is (valid= #{"Plexus"} (pom->expressions "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (valid= #{"GPL-3.0-only"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) (testing "Real pom files - remote - dual-licensed" - (is (= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) (testing "Synthetic pom files with licenses in parent - local" - (is (= #{"Apache-2.0"} (pom->expressions (str test-data-path "/with-parent.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/with-parent.pom"))))) (testing "Real pom files with licenses in parent - remote" - (is (= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 4bb5e82..3dc5df5 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -17,7 +17,8 @@ ; (ns lice-comb.test-boilerplate - (:require [clojure.spec.alpha :as spec])) + (:require [clojure.spec.alpha :as spec] + [spdx.expressions :as sexp])) ; Here we hack up a "global once" function (def ^:private global-setup (memoize (fn [] @@ -35,3 +36,23 @@ [f] (global-setup) (f)) + +(def not-nil? (complement nil?)) + +(defn valid= + "Returns true if all of the SPDX exceptions in s2 are valid, and also + that s1 equals s2." + [s1 s2] + (let [metadata? (not-nil? (meta s2)) + is-a-set? (set? s2) + is-equal? (= s1 s2) + all-valid-expressions? (every? true? (map sexp/valid? s2))] + (when-not metadata? (println "☔️ Missing metadata")) + (when-not is-a-set? (println "☔️ Not a set")) + (when-not is-equal? (println "☔️ Not equal to expected value")) + (when-not all-valid-expressions? (println "☔️ Not all valid SPDX expressions")) + (and metadata? + is-a-set? + is-equal? + all-valid-expressions?))) + From 5d28afc915e24f520afef840821e1356c00aa2f4 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 1 Sep 2023 18:34:02 -0700 Subject: [PATCH 17/34] :construction: Ongoing work on issue #3 --- src/lice_comb/impl/matching.clj | 14 ----------- src/lice_comb/maven.clj | 15 ++++++++---- test/lice_comb/test_boilerplate.clj | 36 ++++++++++++++++++----------- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 9ac7720..d8e7eaa 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -176,20 +176,6 @@ (catch Exception _ nil)))) -; TODO: THIS MAY BE UNNECESSARY AND IF SO SHOULD BE REMOVED -(comment -(defn listed-name->ids - "Returns the SPDX license and/or exception identifier(s) (a set) for - the given license name (matched case insensitively), or nil if there - aren't any. - - Note that SPDX license names are not guaranteed to be unique - see - https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md" - [name] - (when-not (s/blank? name) - (get @lcis/index-name-to-id-d (s/trim (s/lower-case name))))) -) - (defn uri->ids "Returns the SPDX license and/or exception identifiers (a set) for the given uri, or nil if there aren't any. It does this via two steps: diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 78eec06..d482c23 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -20,6 +20,7 @@ "Functionality related to finding and determining license information from Maven POMs." (:require [clojure.string :as s] + [clojure.set :as set] [clojure.java.io :as io] [clojure.data.xml :as xml] [clojure.java.shell :as sh] @@ -70,7 +71,6 @@ (.toURI local-pom) (first (filter uri-resolves? (map #(java.net.URI. (str % "/" gav-path)) remote-maven-repos)))))))) -;####TODO: Check both URI and name and merge the results! (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair. @@ -78,10 +78,15 @@ expression(s) were determined." [{:keys [name url]}] ; Attempt to find a match from the name first - (if-let [expressions (lcmtch/name->expressions name)] - expressions - ; Then match by url - (lcmtch/uri->ids url))) + (let [name-expressions (lcmtch/name->expressions name)] + (if (every? lcmtch/unlisted? name-expressions) + ; If all we got were unlisted expressions from the name, try the URI + (let [uri-expressions (lcmtch/uri->ids url)] + (if (every? lcmtch/unlisted? uri-expressions) + ; Neither worked, so just return all of the unlisted placeholders + (set/union name-expressions uri-expressions) ;####TODO: MERGE METADATA!!!! + uri-expressions)) + name-expressions))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 3dc5df5..ab60a84 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -40,19 +40,27 @@ (def not-nil? (complement nil?)) (defn valid= - "Returns true if all of the SPDX exceptions in s2 are valid, and also - that s1 equals s2." + "Returns true if all of the following are true: + * s2 has metadata + * s2 is a set + * s2 is equal to s1 + * every entry in s2 is a valid SPDX license expression + + Also prints (to stdout) which of the above is not true, in the event that any + of them are not true." [s1 s2] - (let [metadata? (not-nil? (meta s2)) - is-a-set? (set? s2) + (let [metadata? (or (nil? s2) (not-nil? (meta s2))) + is-a-set? (or (nil? s2) (set? s2)) is-equal? (= s1 s2) - all-valid-expressions? (every? true? (map sexp/valid? s2))] - (when-not metadata? (println "☔️ Missing metadata")) - (when-not is-a-set? (println "☔️ Not a set")) - (when-not is-equal? (println "☔️ Not equal to expected value")) - (when-not all-valid-expressions? (println "☔️ Not all valid SPDX expressions")) - (and metadata? - is-a-set? - is-equal? - all-valid-expressions?))) - + all-valid-expressions? (every? true? (map sexp/valid? s2)) + result (and metadata? + is-a-set? + is-equal? + all-valid-expressions?)] + ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not metadata? (print "\n* Missing metadata")) + (when-not is-a-set? (print "\n* Not a set")) + (when-not is-equal? (print "\n* Not equal to expected value")) + (when-not all-valid-expressions? (print "\n* Not all valid SPDX expressions")) + result)) From b4180734df5a615f448e7fbae7ec9175e68886aa Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Sat, 2 Sep 2023 23:32:01 -0700 Subject: [PATCH 18/34] :construction: Ongoing work on issue #3 --- src/lice_comb/deps.clj | 58 ++-- src/lice_comb/files.clj | 113 +++++--- src/lice_comb/impl/http.clj | 83 ++++++ src/lice_comb/impl/matching.clj | 98 +++---- src/lice_comb/impl/metadata.clj | 122 ++++++++ src/lice_comb/impl/regex_matching.clj | 263 +++++++++++------- src/lice_comb/impl/spdx.clj | 2 + src/lice_comb/impl/utils.clj | 2 +- src/lice_comb/matching.clj | 10 +- src/lice_comb/maven.clj | 79 +++--- test/lice_comb/data/complex.pom | 21 ++ test/lice_comb/data/pom-in-a-zip.zip | Bin 0 -> 384 bytes test/lice_comb/files_test.clj | 40 +-- test/lice_comb/impl/matching_test.clj | 69 +++++ test/lice_comb/impl/metadata_test.clj | 78 ++++++ .../regex_matching_test.clj} | 23 +- test/lice_comb/matching_test.clj | 2 +- test/lice_comb/maven_test.clj | 9 +- test/lice_comb/test_boilerplate.clj | 37 ++- 19 files changed, 779 insertions(+), 330 deletions(-) create mode 100644 src/lice_comb/impl/http.clj create mode 100644 src/lice_comb/impl/metadata.clj create mode 100644 test/lice_comb/data/complex.pom create mode 100644 test/lice_comb/data/pom-in-a-zip.zip create mode 100644 test/lice_comb/impl/matching_test.clj create mode 100644 test/lice_comb/impl/metadata_test.clj rename test/lice_comb/{impl_regex_matching_test.clj => impl/regex_matching_test.clj} (96%) diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index d9be4b7..148e154 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -19,32 +19,32 @@ (ns lice-comb.deps "Functionality related to finding and determining license information from deps in tools.deps lib-map format." - (:require [clojure.string :as s] - [spdx.licenses :as sl] - [lice-comb.maven :as lcmvn] - [lice-comb.files :as lcf] - [lice-comb.impl.data :as lcd] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [lice-comb.maven :as lcmvn] + [lice-comb.files :as lcf] + [lice-comb.impl.data :as lcd] + [lice-comb.impl.metadata :as lcimd])) ;####TODO: FIGURE OUT HOW TO HANDLE METADATA FOR OVERRIDES / FALLBACKS!!!! (def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) (def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) -(defn- check-overrides - "Checks if an override should be used for the given dep" - ([ga] (check-overrides ga nil)) - ([ga v] - (let [gav (symbol (str ga (when v (str "@" v))))] - (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version +;(defn- check-overrides +; "Checks if an override should be used for the given dep" +; ([ga] (check-overrides ga nil)) +; ([ga v] +; (let [gav (symbol (str ga (when v (str "@" v))))] +; (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version -(defn- check-fallbacks - "Checks if a fallback should be used for the given dep, given the set of - detected ids" - [ga ids] - (if (or (empty? ids) - (every? #(not (sl/listed-id? %)) ids)) - (:licenses (get @fallbacks-d ga {:licenses ids})) - ids)) +;(defn- check-fallbacks +; "Checks if a fallback should be used for the given dep, given the set of +; detected ids" +; [ga ids] +; (if (or (empty? ids) +; (every? #(not (sl/listed-id? %)) ids)) +; (:licenses (get @fallbacks-d ga {:licenses ids})) +; ids)) (defn- normalise-dep "Normalises a dep, by removing any classifier suffixes from the artifact-id @@ -69,24 +69,24 @@ (let [[ga info] (normalise-dep dep) [group-id artifact-id] (s/split (str ga) #"/") version (:mvn/version info)] - (if-let [override (check-overrides ga version)] - override +; (if-let [override (check-overrides ga version)] +; override (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) - expressions (check-fallbacks ga + expressions ;(check-fallbacks ga (if-let [expressions (lcmvn/pom->expressions pom-uri)] expressions -;####TODO: MERGE METADATA MAPS!!!! - (lcu/nset (mapcat lcf/zip->expressions (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too - expressions))))) + (apply lcimd/union (mapcat lcf/zip->expressions (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too + expressions))));) (defmethod dep->expressions :deps [dep] (when dep (let [[ga info] (normalise-dep dep) version (:git/sha info)] - (if-let [override (check-overrides ga version)] - override - (check-fallbacks ga (lcf/dir->expressions (:deps/root info))))))) +; (if-let [override (check-overrides ga version)] +; override +; (check-fallbacks ga + (lcf/dir->expressions (:deps/root info)))));)) (defmethod dep->expressions nil [_]) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 7938c78..cbfbf19 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -19,15 +19,27 @@ (ns lice-comb.files "Functionality related to finding and determining license information from files and directories." - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [lice-comb.matching :as lcmtch] - [lice-comb.maven :as lcmvn] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.java.io :as io] + [lice-comb.matching :as lcmtch] + [lice-comb.maven :as lcmvn] + [lice-comb.impl.metadata :as lcimd] + [lice-comb.impl.utils :as lcu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... +(defn- ensure-readable-dir + "Ensures dir (a String or File) refers to a readable directory, and returns it + as a File." + [dir] + (when dir + (let [dir (io/file dir)] + (if (.exists dir) + (if (.isDirectory dir) + dir + (throw (java.nio.file.NotDirectoryException. (str dir)))) + (throw (java.io.FileNotFoundException. (str dir))))))) + (defn probable-license-file? "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." @@ -40,50 +52,38 @@ (defn probable-license-files "Returns all probable license files in the given directory, recursively, as a - set of java.io.File objects. dir may be a String or a java.io.File, both of - which must refer to a directory." + set of java.io.File objects. dir may be a String or a java.io.File, either of + which must refer to a readable directory." [dir] - (when dir - (let [dir (io/file dir)] - (if (.exists dir) ; Note: we have to do this, because file-seq does weird things when handed a file that doesn't exist - (if (.isDirectory dir) - (lcu/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) - (throw (java.nio.file.NotDirectoryException. (str dir)))) - (throw (java.io.FileNotFoundException. (str dir))))))) + (when-let [dir (ensure-readable-dir dir)] + (lcu/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))))) (defn file->expressions "Attempts to determine the SPDX license expression(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on - it). If an InputStream is provided, the associated filename should also be - provided as the second parameter (it is unnecessary in other cases). + it). If an InputStream is provided, it must already be open and the associated + filename should also be provided as the second parameter (it is optional in + other cases). The result has metadata attached that describes how the identifiers in the expression(s) were determined." ([f] (file->expressions f (lcu/filename f))) ([f fname] (when (and f fname) - (let [fname (s/lower-case fname)] - (cond (= fname "pom.xml") (lcmvn/pom->expressions f) - (s/ends-with? fname ".pom") (lcmvn/pom->expressions f) - :else (lcmtch/text->ids (io/input-stream f))))))) ; Default is to assume it's a plain text file containing license text(s) - -(defn dir->expressions - "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir - may be a String or a java.io.File, both of which must refer to a - readable directory. - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." - [dir] - (when dir -;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! - (lcu/nset (mapcat file->expressions (probable-license-files dir))))) + (let [lfname (s/lower-case fname)] + (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions f) + (s/ends-with? lfname ".pom") (lcmvn/pom->expressions f) + (instance? java.io.InputStream f) (lcmtch/text->ids f) + :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids is)))) ; Default is to assume it's a plain text file containing license text(s) + fname))))) (defn zip->expressions "Attempt to detect the SPDX license expression(s) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file. + Throws on invalid zip file (doesn't exist, not readable, not ZIP format, etc.). + The result has metadata attached that describes how the identifiers in the expression(s) were determined." [zip] @@ -95,7 +95,48 @@ entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) -;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! - (recur (set/union result (file->expressions zip-is (lcu/filename entry))) (.getNextEntry zip-is)) - (recur result (.getNextEntry zip-is))) + (recur (lcimd/union result (lcimd/prepend-source (file->expressions zip-is (lcu/filename entry)) (lcu/filename zip-file))) + (.getNextEntry zip-is)) + (recur result (.getNextEntry zip-is))) (doall (some-> (seq result) set)))))))) ; De-lazy the result before we exit the with-open scope + +(defn- zip-compressed-files + "Returns all probable ZIP compressed files in the given directory, + recursively, as a set of java.io.File objects. dir may be a String or a + java.io.File, either of which must refer to a readable directory." + [dir] + (when-let [dir (ensure-readable-dir dir)] + (lcu/nset (filter #(and (.isFile ^java.io.File %) + (or (s/ends-with? (str %) ".zip") + (s/ends-with? (str %) ".jar"))) + (file-seq dir))))) + +(defn dir->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir + may be a String or a java.io.File, both of which must refer to a + readable directory. + + The optional `opts` map has these keys: + * `include-zips?` (boolean, default false) - controls whether zip compressed + files found in the directory are included in the scan or not + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([dir] (dir->expressions dir nil)) + ([dir {:keys [include-zips?] :or {include-zips? false}}] + (when dir + (let [file-expressions (apply lcimd/union (map file->expressions (probable-license-files dir)))] + (if include-zips? + (let [zip-expressions (apply lcimd/union (map #(try (zip->expressions %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (lcimd/union file-expressions zip-expressions)) + file-expressions))))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcmtch/init!) + (lcmvn/init!) + nil) diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj new file mode 100644 index 0000000..7b7eabd --- /dev/null +++ b/src/lice_comb/impl/http.clj @@ -0,0 +1,83 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.http + "HTTP helper functionality. Note: this namespace is not part of + the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [hato.client :as hc] + [lice-comb.impl.utils :as lcu])) + +(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 + :redirect-policy :always + :cookie-policy :none}))) + +(defn uri-resolves? + "Does the given URI resolve (i.e. does the resource it points to exist)? + + Note: does not throw - returns false on errors." + [uri] + (when (lcu/valid-http-uri? (str uri)) + (try + (when-let [response (hc/head (str uri) + {:http-client @http-client-d + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (= 200 (:status response))) + (catch Exception _ + false)))) + +(defn- cdn-uri + "Converts raw URIs into CDN URIs, for these 'known' hosts: + + * github.com e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE + + If the given URI is not known, returns the input unchanged." + [uri] + (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] + (case (s/lower-case (.getHost uri-obj)) + "github.com" (-> uri + (s/replace #"(?i)github\.com" "raw.githubusercontent.com") + (s/replace "/blob/" "/")) + uri) ; Default case + uri)) + +(defn get-text + "Attempts to get plain text as a String from the given URI, returning nil if + unable to do so (including for error conditions - there is no way to + disambiguate errors from non-text content, for example)." + [uri] + (when (lcu/valid-http-uri? uri) + (try + (when-let [response (hc/get (cdn-uri uri) + {:http-client @http-client-d + :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (when (= :text/plain (:content-type response)) + (:body response))) + (catch Exception _ + nil)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + @http-client-d + nil) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index d8e7eaa..4f4b7d9 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -22,18 +22,15 @@ (:require [clojure.string :as s] [clojure.set :as set] [clojure.java.io :as io] - [hato.client :as hc] [spdx.exceptions :as se] [spdx.matching :as sm] [lice-comb.impl.spdx :as lcis] [lice-comb.impl.regex-matching :as lcirm] + [lice-comb.impl.metadata :as lcimd] [lice-comb.impl.3rd-party :as lc3] + [lice-comb.impl.http :as lcihttp] [lice-comb.impl.utils :as lcu])) -(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 - :redirect-policy :always - :cookie-policy :none}))) - (def ^:private direct-replacements-map { #{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"} #{"GPL-2.0-or-later" "Classpath-exception-2.0"} #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} @@ -96,12 +93,16 @@ "Manually fix certain invalid combinations of license identifiers in a set." [ids] (when ids - (some-> ids - direct-replacements - fix-gpl-only-or-later - fix-public-domain-cc0 - fix-mpl-2 - set))) + (let [m (meta ids) + result (some-> ids + direct-replacements + fix-gpl-only-or-later + fix-public-domain-cc0 + fix-mpl-2 + set) + removed-ids (apply disj (set (keys m)) result) + m (apply dissoc m removed-ids)] + (with-meta result m)))) (defmulti text->ids "Attempts to determine the SPDX license and/or exception identifier(s) (a set) @@ -127,7 +128,7 @@ f-exc (future (sm/exceptions-within-text s @lcis/exception-ids-d)) ids (manual-fixes (set/union @f-lic @f-exc))] (when ids - (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-matching}]) ids)))))) + (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-text-matching}]) ids)))))) (defmethod text->ids java.io.Reader [r] @@ -145,37 +146,6 @@ (with-open [r (io/reader src)] (text->ids r)))) -(defn- cdn-uri - "Converts raw URIs into CDN URIs, for these 'known' hosts: - - * github.com e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE - - If the given URI is not known, returns the input unchanged." - [uri] - (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] - (case (s/lower-case (.getHost uri-obj)) - "github.com" (-> uri - (s/replace #"(?i)github\.com" "raw.githubusercontent.com") - (s/replace "/blob/" "/")) - uri) ; Default case - uri)) - -(defn- attempt-text-http-get - "Attempts to get plain text as a String from the given URI, returning nil if - unable to do so (including for error conditions - there is no way to - disambiguate errors from non-text content, for example)." - [uri] - (when (lcu/valid-http-uri? uri) - (try - (when-let [response (hc/get (cdn-uri uri) - {:http-client @http-client-d - :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 - :header {"user agent" "com.github.pmonks/lice-comb"}})] - (when (= :text/plain (:content-type response)) - (:body response))) - (catch Exception _ - nil)))) - (defn uri->ids "Returns the SPDX license and/or exception identifiers (a set) for the given uri, or nil if there aren't any. It does this via two steps: @@ -199,22 +169,25 @@ (when-not (s/blank? uri) (manual-fixes (let [suri (lcu/simplify-uri uri)] - ; First, see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) + ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) (if-let [ids (get @lcis/index-uri-to-id-d suri)] - (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-listed-uri :source (list uri)}]) ids))) - ; Second, attempt to retrieve the text/plain contents of the uri and perform full license matching on it - (when-let [license-text (attempt-text-http-get uri)] + (let [metadata (into {} (map #(vec [% {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)}]) ids))] + (with-meta ids metadata)) + ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it + (when-let [license-text (lcihttp/get-text uri)] (when-let [ids (text->ids license-text)] - (let [metadata (lcu/mapfonv #(assoc % :source (conj (:source %) (str uri ""))) (meta ids))] ; Append to existing metadata returned from text->ids - (with-meta ids metadata))))))))) + (lcimd/prepend-source ids (str uri " (retrieved text)"))))))))) (defn- string->ids-info "Converts the given String into a sequence of singleton maps, each of which has a key is that is an SPDX identifier (either a listed SPDX license or - exception id if the value is recognised, or a lice-comb specific 'unlisted' - LicenseRef if not), and whose value is meta-information about how that - identifier was found. The result sequence is ordered in the same order of - appearance as the source values in s. + exception id), and whose value is meta-information about how that identifier + was found. The result sequence is ordered in the same order of appearance as + the source values in s. + + If no listed SPDX license or exception identifiers are found, returns a + singleton sequence containing a map with a lice-comb specific 'unlisted' + LicenseRef. This involves: 1. Seeing if it's a listed license or exception id @@ -233,14 +206,14 @@ (list {id {:type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)}})) ; 2. Is it an SPDX license or exception name? (if-let [ids (get @lcis/index-name-to-id-d (s/trim (s/lower-case s)))] - (map #(hash-map % {:type :concluded :confidence :low :strategy :spdx-listed-name :source (list s)}) ids) + (map #(hash-map % {:type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)}) ids) ; 3. Is it a URI? If so, perform URI matching on it (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) (if-let [ids (uri->ids s)] - (mapcat #(list {(key %) (val %)}) (meta ids)) + (let [metadata (meta ids)] + (map #(hash-map % (get metadata %)) ids)) ; Convert metadata from uri->ids back into a regular map (so that it survives expression building) ; 4. Attempt regex name matching (if-let [ids (lcirm/match-regexes s)] - (map #(hash-map % {:type :concluded :confidence :low :strategy :regex-matching :source (list s)}) ids) - ; 5. Give up and return a lice-comb "unlisted" LicenseRef + (map #(hash-map % (get (meta ids) %)) ids) ; Convert metadata from match-regexes back into a regular map (so that it survives expression building) (list {(lcis/name->unlisted s) {:type :concluded :confidence :low :strategy :unlisted :source (list s)}})))))))) (defn- filter-blanks @@ -269,9 +242,9 @@ [s] (when-not (s/blank? s) (->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b") - (map-split-and-interpose #"(?i)(\band|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) - (map-split-and-interpose #"(?i)\bor(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?)))" :or) - (map-split-and-interpose #"(?i)\b(with|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) + (map-split-and-interpose #"(?i)(\band\b|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) + (map-split-and-interpose #"(?i)\bor\b(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?)))" :or) + (map-split-and-interpose #"(?i)\b(with\b|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) filter-blanks (map #(if (string? %) (s/trim %) %))))) @@ -327,6 +300,9 @@ (lc3/rdrop-while keyword?) (map #(if (keyword? %) % (string->ids-info %))) flatten + (filter identity) + (drop-while keyword?) + (lc3/rdrop-while keyword?) seq)] (let [spdx-expressions (build-spdx-expressions (map #(if (keyword? %) % (first (keys %))) partial-expressions)) metadata (into {} (filter (complement keyword?) partial-expressions))] @@ -342,5 +318,5 @@ [] (lcis/init!) (lcirm/init!) - @http-client-d + (lcihttp/init!) nil) diff --git a/src/lice_comb/impl/metadata.clj b/src/lice_comb/impl/metadata.clj new file mode 100644 index 0000000..a627109 --- /dev/null +++ b/src/lice_comb/impl/metadata.clj @@ -0,0 +1,122 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.metadata + "Metadata helper functionality. Note: this namespace is not part of + the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.set :as set] + [lice-comb.impl.utils :as lcu])) + +(defn prepend-source + "Prepends the given source (a string) onto the list of sources for all of + the entries of the metadata for object o. Returns o with the new metadata." + [o s] + (if (and o (not (s/blank? s))) + (if-let [m (meta o)] + (with-meta o (lcu/mapfonv #(if (map? %) (assoc % :source (conj (seq (:source %)) s)) %) m)) + o) + o)) + +(defn- merge-conflicting-key + "Merges the metadata values for a single key that exists in both m1 and m2." + [m1 m2 k] +;####TODO: IMPROVE THIS SIMPLISTIC "PICK A WINNER" IMPLEMENTATION!!!!! + (let [m1v (get m1 k) + m2v (get m2 k)] + ; If both values are maps, perhaps lice-comb specific metadata merging + (if (and (map? m1v) (map? m2v)) + (if (= :declared (:type m1v)) + m1v + (if (= :declared (:type m2v)) + m2v + (case [(:confidence m1v) (:confidence m2v)] + ([:high :high] [:high :medium] [:high :low] [:high nil]) m1v + ([:medium :medium] [:medium :low] [:medium nil]) m1v + ([:low :low] [:low nil]) m1v + m2v))) + (throw (ex-info "Attempt to merge non-lice-comb metadata maps" {}))))) + + +(defn merge-metadata + "Merges lice-comb metadata maps." + ([] {}) + ([m] m) + ([m1 m2] + (if (and m1 m2) + (let [keys-in-both (set/intersection (set (keys m1)) (set (keys m2))) + keys-in-m1-only (apply disj (set (keys m1)) keys-in-both) + keys-in-m2-only (apply disj (set (keys m2)) keys-in-both)] + (merge {} + (into {} (map #(vec [% (merge-conflicting-key m1 m2 %)]) keys-in-both)) + (into {} (map #(vec [% (get m1 %)]) keys-in-m1-only)) + (into {} (map #(vec [% (get m2 %)]) keys-in-m2-only)))) + (if m1 + m1 + m2))) + ([m1 m2 & maps] + (loop [result (merge-metadata m1 m2) + f (first maps) + r (rest maps)] + (if f + (recur (merge-metadata result f) (first r) (rest r)) + result)))) + +(defn union + "Equivalent to set/union, but preserves lice-comb metadata from the sets using + merge-metadata." + ([] #{}) + ([s] s) + ([s1 s2] + (with-meta (set/union s1 s2) + (merge-metadata (meta s1) (meta s2)))) + ([s1 s2 & sets] + (let [data (apply set/union (concat [s1 s2] sets)) + metadata (apply merge-metadata (concat [(meta s1) (meta s2)] (filter identity (map meta sets))))] + (with-meta data metadata)))) + +(def ^:private strategies { + :spdx-expression "SPDX expression" + :spdx-listed-identifier-exact-match "SPDX identifier" + :spdx-listed-identifier-case-insensitive-match "SPDX identifier (case insensitive match)" + :spdx-text-matching "SPDX license text matching" + :spdx-listed-name "SPDX listed name (case insensitive match)" + :spdx-listed-uri "SPDX listed URI (relaxed matching)" + :regex-name-matching "Regular expression name matching" + :unlisted "Unlisted"}) + +(defn- metadata-element->string + "Converts a single element in a lice-comb metadata map (identified by id) + into a human-readable string." + [m id] + (when-let [metadata (get m id)] + (str id ": " + (name (:type metadata)) + (when-let [confidence (:confidence metadata)] + (str "\n Confidence: " (name confidence))) + (when-let [strategy (:strategy metadata)] + (str "\n Strategy: " (get strategies strategy (str "#### MISSING VALUE: " strategy " ####")))) + (when-let [source (seq (:source metadata))] + (str "\n Source: " (s/join " > " source)))))) + +(defn metadata->string + "Converts a lice-comb metadata map m into a human-readable string." + [m] + (when m + (let [ids (sort (keys m))] + (s/join "\n\n" (map (partial metadata-element->string m) ids))))) diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index 8e4b6f8..94f1c3f 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -58,14 +58,22 @@ "A generic SPDX id constructor which works for many simple regexes." [m] (when m - (let [id (str (:id m) - (when-let [ver (get-rencgs m ["version"] (:latest-ver m))] - (str "-" - ver - (when (and (:pad-ver? m) - (not (s/includes? ver "."))) - ".0"))))] - (assert-listed-id id)))) + (let [version (get-rencgs m ["version"]) + confidence (if (or (and (s/blank? version) + (not (s/blank? (:latest-ver m)))) + (and (:pad-ver? m) + (not (s/includes? version ".")))) + :low ; We required a version but either didn't get one or it was incomplete + :medium) ; We didn't require a version, or it was complete + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (and (:pad-ver? m) + (not (s/includes? version "."))) + (str version ".0") + version) + id (str (:id m) (when-not (s/blank? version) (str "-" version)))] + [(assert-listed-id id) confidence]))) (defn- number-name-to-number "Converts the name of a number to that number (as a string). e.g. @@ -81,93 +89,111 @@ (defn- bsd-id-constructor "An SPDX id constructor specific to the BSD family of licenses." [m] - (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) - clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) - preferred-clause-count (case [(lcu/is-digits? clause-count1) (lcu/is-digits? clause-count2)] - [true true] clause-count1 - [true false] clause-count1 - [false true] clause-count2 - (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) - clause-count1 - clause-count2)) - clause-count (case preferred-clause-count - ("2" "simplified") "2" - ("3" "new" "revised" "modified" "aduna") "3" - "4") ; Note: we default to 4 clause, since it was the original form of the BSD license - suffix (case (get-rencgs m ["suffix"]) - "patent" "Patent" - "views" "Views" - "attribution" "Attribution" - "clear" "Clear" - "lbnl" "LBNL" - "modification" "Modification" - ("no military license" "no military licence") "No-Military-License" - ("no nuclear license" "no nuclear licence") "No-Nuclear-License" - ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" - "no nuclear warranty" "No-Nuclear-Warranty" - "open mpi" "Open-MPI" - "shortened" "Shortened" - "uc" "UC" - nil) - base-id (str (:id m) "-" clause-count "-Clause") - id-with-suffix (str base-id "-" suffix)] + (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) + clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) + preferred-clause-count (case [(lcu/is-digits? clause-count1) (lcu/is-digits? clause-count2)] + [true true] clause-count1 + [true false] clause-count1 + [false true] clause-count2 + (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) + clause-count1 + clause-count2)) + [clause-count confidence] (case preferred-clause-count + ("2" "simplified") ["2" :medium] + ("3" "new" "revised" "modified" "aduna") ["3" :medium] + ("4" "original") ["4" :medium] + ["4" :low]) ; Note: we default to 4 clause, since it was the original form of the BSD license + suffix (case (get-rencgs m ["suffix"]) + "patent" "Patent" + "views" "Views" + "attribution" "Attribution" + "clear" "Clear" + "lbnl" "LBNL" + "modification" "Modification" + ("no military license" "no military licence") "No-Military-License" + ("no nuclear license" "no nuclear licence") "No-Nuclear-License" + ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" + "no nuclear warranty" "No-Nuclear-Warranty" + "open mpi" "Open-MPI" + "shortened" "Shortened" + "uc" "UC" + nil) + base-id (str (:id m) "-" clause-count "-Clause") + id-with-suffix (str base-id "-" suffix)] (if (contains? @lcis/license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it - id-with-suffix - (assert-listed-id base-id)))) + [id-with-suffix confidence] + [(assert-listed-id base-id) confidence]))) (defn- cc-id-constructor "An SPDX id constructor specific to the Creative Commons family of licenses." [m] - (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) - nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) - sa? (not (s/blank? (get-rencgs m ["sharealike"]))) - version (let [ver (s/replace (get-rencgs m ["version"] (:latest-ver m)) #"\p{Punct}+" ".")] - (if (s/includes? ver ".") - ver - (str ver ".0"))) - base-id (str "CC-BY-" - (when nc? "NC-") - (when nd? "ND-") - (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND - version) - region (case (get-rencgs m ["region"]) - "australia" "AU" - "austria" "AT" - ("england" "england and wales" "england & wales" "uk") "UK" - "france" "FR" - "germany" "DE" - "igo" "IGO" - "japan" "JP" - "netherlands" "NL" - ("united states" "usa" "us") "US" - nil) + (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) + nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) + sa? (not (s/blank? (get-rencgs m ["sharealike"]))) + version (get-rencgs m ["version"] "") + version (s/replace version #"\p{Punct}+" ".") + confidence (if (or (s/blank? version) + (not (s/includes? version "."))) + :low + :medium) + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (s/includes? version ".") + version + (str version ".0")) + base-id (str "CC-BY-" + (when nc? "NC-") + (when nd? "ND-") + (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND + version) + region (case (get-rencgs m ["region"]) + "australia" "AU" + "austria" "AT" + ("england" "england and wales" "england & wales" "uk") "UK" + "france" "FR" + "germany" "DE" + "igo" "IGO" + "japan" "JP" + "netherlands" "NL" + ("united states" "usa" "us") "US" + nil) id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] (if (contains? @lcis/license-ids-d id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it - id-with-region - (assert-listed-id base-id)))) + [id-with-region confidence] + [(assert-listed-id base-id) confidence]))) (defn- gpl-id-constructor "An SPDX id constructor specific to the GNU family of licenses." [m] - (let [variant (cond (contains? m "agpl") "AGPL" - (contains? m "lgpl") "LGPL" - (contains? m "gpl") "GPL") - version (let [ver (s/replace (get-rencgs m ["version"] (:latest-ver m)) #"\p{Punct}+" ".")] - (if (s/includes? ver ".") - ver - (str ver ".0"))) - suffix (if (contains? m "orLater") - "or-later" - "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix - id (str variant "-" version "-" suffix)] - (assert-listed-id id))) + (let [variant (cond (contains? m "agpl") "AGPL" + (contains? m "lgpl") "LGPL" + (contains? m "gpl") "GPL") + version (get-rencgs m ["version"] "") + version (s/replace version #"\p{Punct}+" ".") + confidence (if (or (s/blank? version) + (not (s/includes? version "."))) + :low + :medium) + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (s/includes? version ".") + version + (str version ".0")) + suffix (if (contains? m "orLater") + "or-later" + "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix + id (str variant "-" version "-" suffix)] + [(assert-listed-id id) confidence])) (defn- simple-regex-match - "Constructs a 'simple' name match structure" + "Constructs a 'simple' name match structure that's a case-insensitive match + for s." [s] {:id s - :regex (re-pattern (str "(?i)\\b" s "\\b")) - :fn (constantly s)}) + :regex (re-pattern (str "(?i)\\b" (lcu/escape-re s) "\\b")) + :fn (constantly [s :medium])}) ; The regex for the GNU family is a nightmare, so we build it up (and test it) in pieces (def agpl-re #"(?AGPL|Affero)(\s+GNU)?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?AGPL\)?)?") @@ -186,7 +212,7 @@ "\n# Only/or-Later suffix\n" only-or-later-re)) -; Regexes used for license name matching, along with functions for constructing an SPDX id from them +; Regexes used for license name matching, along with functions for constructing an SPDX id and confidence metric from them (def ^:private license-name-matching-d (delay (concat ; By default we add most SPDX ids as "simple" regex matches @@ -210,7 +236,7 @@ :latest-ver "2.0"} {:id "Beerware" :regex #"(?i)\bBeer-?ware\b" - :fn (constantly "Beerware")} + :fn (constantly ["Beerware" :medium])} {:id "BSL" :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" :fn generic-id-constructor @@ -221,7 +247,7 @@ :fn bsd-id-constructor} {:id "CC0" :regex #"(?i)\bCC\s*0" - :fn (constantly "CC0-1.0")} + :fn (constantly ["CC0-1.0" :medium])} {:id "CECILL" :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" :fn generic-id-constructor @@ -259,7 +285,7 @@ :latest-ver "1.2"} {:id "FreeBSD" :regex #"(?i)\bFreeBSD\b" - :fn (constantly "BSD-2-Clause-FreeBSD")} + :fn (constantly ["BSD-2-Clause-FreeBSD" :medium])} {:id "GNU license family" :regex gnu-re :fn gpl-id-constructor @@ -267,13 +293,13 @@ :latest-ver 3.0} {:id "Hippocratic" :regex #"(?i)\bHippocratic\b" - :fn (constantly "Hippocratic-2.1")} ; There are no other listed versions of this license + :fn (constantly ["Hippocratic-2.1" :medium])} ; There are no other listed versions of this license {:id "LLVM-exception" :regex #"(?i)\bLLVM[\s-]+Exception\b" - :fn (constantly "LLVM-exception")} + :fn (constantly ["LLVM-exception" :medium])} {:id "MIT" :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" - :fn (constantly "MIT")} + :fn (constantly ["MIT" :medium])} {:id "MPL" :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" :fn generic-id-constructor @@ -281,7 +307,7 @@ :latest-ver "2.0"} {:id "MX4J" :regex #"(?i)\bMX4J\s+Licen[cs]e(,?\s+v(ersion)?\s*1\.0)?\b" - :fn (constantly "Apache-1.1")} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX + :fn (constantly ["Apache-1.1" :medium])} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX {:id "NASA" :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" :fn generic-id-constructor @@ -289,16 +315,16 @@ :latest-ver "1.3"} {:id "Plexus" :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" - :fn (constantly "Plexus")} + :fn (constantly ["Plexus" :medium])} {:id "Proprietary or commercial" - :regex #"(?i)\b(Propriet[ao]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" - :fn lcis/proprietary-commercial} + :regex #"(?i)\b(Propriet[aoe]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" + :fn (constantly [(lcis/proprietary-commercial) :medium])} {:id "Public Domain" :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" - :fn lcis/public-domain} + :fn (constantly [(lcis/public-domain) :medium])} {:id "Ruby" :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" - :fn (constantly "Ruby")} + :fn (constantly ["Ruby" :medium])} {:id "SGI-B" :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" :fn generic-id-constructor @@ -306,32 +332,55 @@ :latest-ver "2.0"} {:id "Unlicense" :regex #"(?i)\bUnlicen[cs]e\b" - :fn (constantly "Unlicense")} + :fn (constantly ["Unlicense" :medium])} {:id "WTFPL" :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" - :fn (constantly "WTFPL")} + :fn (constantly ["WTFPL" :medium])} {:id "Zlib" :regex #"\b(?i)zlib(?![\s/]+libpng)\b" - :fn (constantly "Zlib")} + :fn (constantly ["Zlib" :medium])} ]))) (defn- match-regex - "Returns a map containing the SPDX :id and :start index of the given - regex in the string if a match occurred, or nil if there was no match." + "If a match occured for the given regex element when tested against string s, + returns a map containing the following keys, or nil if there was no match: + * :id The SPDX identifier of the found license or exception + * :type The 'type' of match - will always have the value :concluded + * :confidence The confidence of the match: either :high, :medium, or :low + * :strategy The matching strategy - will always have the value :regex-name-matching + * :source A list of strings containing source information (specifically + the portion of the string s that matched this regex element) + *: start The start index of the given match within s" [s elem] (when-let [match (rencg/re-find-ncg (:regex elem) s)] - {:id ((:fn elem) (merge {:name s} elem match)) - :start (:start match)})) + (let [[id confidence] ((:fn elem) (merge {:name s} elem match)) + source (s/trim (subs s (:start match) (:end match)))] + {:id id + :type :concluded + :confidence (if (= source id) :high confidence) + :strategy :regex-name-matching + :source (list source) + :start (:start match)}))) (defn match-regexes - "Returns a sequence (NOT A SET!) of the matched SPDX license or - exception ids for the given string, or nil if there were no matches. - Results are in the order in which they appear in the string." + "Returns a sequence (NOT A SET!) of the SPDX license or exception ids that + were found in the string s, or nil if there were no matches. Results are in + the order in which they appear in the string. The result also has metadata + attached, which is a map whose keys are each of the SPDX license or exception + ids, and whose values are a map containing these keys: + * :type The 'type' of match - will always have the value :concluded + * :confidence The confidence of the match: either :high, :medium, or :low + * :strategy The matching strategy - will always have the value :regex-name-matching + * :source A list of strings containing source information (specifically + the portion of the string s that matched this identifier" [s] - (some->> (seq (filter identity (pmap (partial match-regex s) @license-name-matching-d))) - (sort-by :start) - (map :id) - distinct)) + (when-let [matches (seq (distinct (filter identity (pmap (partial match-regex s) @license-name-matching-d))))] + (let [ids (some->> matches + (sort-by :start) + (map :id) + (distinct)) + metadata (into {} (map #(vec [% (dissoc (first (filter (fn [x] (= % (:id x))) matches)) :start :id)]) ids))] + (with-meta ids metadata)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/spdx.clj b/src/lice_comb/impl/spdx.clj index 20fbe96..c46dfb6 100644 --- a/src/lice_comb/impl/spdx.clj +++ b/src/lice_comb/impl/spdx.clj @@ -22,6 +22,7 @@ (:require [clojure.string :as s] [spdx.licenses :as sl] [spdx.exceptions :as se] + [spdx.expressions :as sexp] [lice-comb.impl.utils :as lcu])) ; The subset of SPDX license identifiers that we use; specifically excludes the superceded deprecated GPL family identifiers @@ -125,6 +126,7 @@ se-init (future (se/init!))] @sl-init @se-init) + (sexp/init!) ; Serially initialise this namespace's dependent state - they're all pretty fast (< 1s) @license-ids-d diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 6190d95..1d1d143 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -107,7 +107,7 @@ (defn valid-http-uri? "Returns true if given string is a valid HTTP or HTTPS URI." - [^String s] + [s] ; Note: no nil check needed since the isValid method handles null sanely (.isValid (org.apache.commons.validator.routines.UrlValidator. ^"[Ljava.lang.String;" (into-array String ["http" "https"])) s)) diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index a43a572..7ee7601 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -111,13 +111,16 @@ (when-not (s/blank? name) (let [name (s/trim name)] ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set - (if-let [normalised-expression (sexp/normalise name)] - (with-meta #{normalised-expression} {:type :declared :strategy :spdx-expression :source (list name)}) + (if-let [parsed-expression (sexp/parse name)] + (let [ids (sexp/extract-ids parsed-expression) + normalised-expression (sexp/unparse parsed-expression) + metadata (into {} (map #(vec [% {:type :declared :strategy :spdx-expression :source (list normalised-expression)}]) ids))] + (with-meta #{normalised-expression} metadata)) ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) (if (lcu/valid-http-uri? name) (if-let [ids (uri->ids name)] ids - (with-meta #{(lcis/name->unlisted name)} {:type :concluded :confidence :low :strategy :unresolvable-uri :source (list name)})) + (with-meta #{(lcis/name->unlisted name)} {(lcis/name->unlisted name) {:type :concluded :confidence :low :strategy :unlisted :source (list name)}})) ; 3. Attempt to build SPDX expression(s) from the name (lcim/attempt-to-build-expressions name)))))) @@ -129,5 +132,6 @@ Note: this method has a substantial performance cost." [] + (lcis/init!) (lcim/init!) nil) diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index d482c23..c45fc4c 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -19,15 +19,18 @@ (ns lice-comb.maven "Functionality related to finding and determining license information from Maven POMs." - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [clojure.data.xml :as xml] - [clojure.java.shell :as sh] - [clojure.tools.logging :as log] - [xml-in.core :as xi] - [lice-comb.matching :as lcmtch] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.data.xml :as xml] + [clojure.java.shell :as sh] + [clojure.tools.logging :as log] + [xml-in.core :as xi] + [spdx.expressions :as sexp] + [lice-comb.matching :as lcmtch] + [lice-comb.impl.matching :as lcim] + [lice-comb.impl.metadata :as lcimd] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.utils :as lcu])) (def ^:private local-maven-repo-d (delay @@ -45,15 +48,6 @@ ; TODO: make this configurable (def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) -;####TODO: MOVE THIS TO UTILS AND REIMPLEMENT ON HATO?? -(defn- uri-resolves? - "Does the given URI resolve (i.e. does the resource it points to exist)?" - [^java.net.URI uri] - (and uri - (let [http (doto ^java.net.HttpURLConnection (.openConnection (.toURL uri)) - (.setRequestMethod "HEAD"))] - (= 200 (.getResponseCode http))))) - ;####TODO: MOVE THIS TO AN IMPL NS?? (defn pom-uri-for-gav "Attempts to locate the POM for the given GAV, which is a URI that may point @@ -69,7 +63,7 @@ (if (and (.exists local-pom) (.isFile local-pom)) (.toURI local-pom) - (first (filter uri-resolves? (map #(java.net.URI. (str % "/" gav-path)) remote-maven-repos)))))))) + (first (filter lcihttp/uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))))))) (defn- licenses-from-pair "Attempts to determine the license(s) (a set) from a POM license name/URL pair. @@ -77,16 +71,10 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." [{:keys [name url]}] - ; Attempt to find a match from the name first - (let [name-expressions (lcmtch/name->expressions name)] - (if (every? lcmtch/unlisted? name-expressions) - ; If all we got were unlisted expressions from the name, try the URI - (let [uri-expressions (lcmtch/uri->ids url)] - (if (every? lcmtch/unlisted? uri-expressions) - ; Neither worked, so just return all of the unlisted placeholders - (set/union name-expressions uri-expressions) ;####TODO: MERGE METADATA!!!! - uri-expressions)) - name-expressions))) + (let [name-expressions (when-not (s/blank? name) (lcmtch/name->expressions name)) + name-ids (some-> (seq (mapcat #(sexp/extract-ids (sexp/parse %)) name-expressions)) set) + uri-ids (when-not (s/blank? url) (apply disj (lcmtch/uri->ids url) name-ids))] ; Only include ids detected from the URL that weren't already detected in the name + (lcimd/union name-expressions uri-ids))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") @@ -95,26 +83,27 @@ file. pom may be a java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream. - Note: if an InputStream is provided, it's the caller's responsibility to open - and close it. + Note that if an InputStream is provided: + 1. it's the caller's responsibility to open and close it + 2. a filename *must* be provided along with the stream (2nd arg) The result has metadata attached that describes how the identifiers in the expression(s) were determined." - {:arglists '([pom])} - type) + {:arglists '([pom] [pom file-name])} + (fn [& args] (type (first args)))) ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags here (defmethod pom->expressions java.io.InputStream - [pom-is] + [pom-is fname] (let [pom-xml (xml/parse pom-is) licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] (if (or licenses licenses-no-ns) - ; Licenses block exists - process it + ; block exists - process it (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) - (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url]))))] -;####TODO: MERGE METADATA MAPS AND EMBELLISH :source!!!! - (lcu/nset (mapcat licenses-from-pair name-uri-pairs))) + (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url])))) + licenses (map #(lcimd/prepend-source (licenses-from-pair %) fname) name-uri-pairs)] + (lcim/manual-fixes (apply lcimd/union licenses))) ; License block doesn't exist, so attempt to lookup the parent pom and get it from there (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) @@ -129,12 +118,13 @@ (pom->expressions (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep (defmethod pom->expressions :default - [pom] - (when pom - (with-open [pom-is (io/input-stream pom)] - (if-let [expressions (pom->expressions pom-is)] - expressions - (log/info (str "'" pom "'") "contains no license information"))))) + ([pom] (pom->expressions pom (lcu/filename pom))) + ([pom fname] + (when pom + (with-open [pom-is (io/input-stream pom)] + (if-let [expressions (pom->expressions pom-is fname)] + expressions + (log/info (str "'" pom "'") "contains no license information")))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent @@ -142,5 +132,6 @@ this fn, as initialisation will occur implicitly anyway; it is provided to allow explicit control of the cost of initialisation to callers who need it." [] + (lcmtch/init!) @local-maven-repo-d nil) diff --git a/test/lice_comb/data/complex.pom b/test/lice_comb/data/complex.pom new file mode 100644 index 0000000..a317375 --- /dev/null +++ b/test/lice_comb/data/complex.pom @@ -0,0 +1,21 @@ + + + + + Apache License 2.0 + http://www.apache.org/licenses/LICENSE-2.0.html + + + mit license + + + gpl 2.0 with classpath exception + + + https://opensource.org/licenses/BSD-3-Clause + + + Unlicense AND CC0-1.0 + + + diff --git a/test/lice_comb/data/pom-in-a-zip.zip b/test/lice_comb/data/pom-in-a-zip.zip new file mode 100644 index 0000000000000000000000000000000000000000..2ed5086f9384810cef3eebc4d6674b390db3a656 GIT binary patch literal 384 zcmWIWW@Zs#U}E542->X_ZX&rX?F<70LkuGW13!ZdLvdzqK~Ab}VtPT2UO|3tXb2|* zv$Xn`6c8@0;AUWC`3h7CG=PDjb%Jg7VFQ7-^8Z{5W92iAM3k*dGR523ZaIW6l&9OGVa?ho3`jKIcZyG;GT+ z=uDMf`SAIaV^J#)#xX3FU2EOU^Kr?y_tsk{{xVxSexpressions dir->expressions zip->expressions]])) + [lice-comb.files :refer [init! probable-license-file? probable-license-files file->expressions dir->expressions zip->expressions]])) (use-fixtures :once fixture) (def test-data-path "./test/lice_comb/data") +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + (deftest probable-license-file?-tests (testing "Nil, empty or blank names" (is (= false (probable-license-file? nil))) @@ -96,20 +100,6 @@ (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/asf-cat-1.0.12.pom")))) (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/with-parent.pom")))))) -(deftest dir->expressions-tests - (testing "Nil, empty, or blank directory name" - (is (nil? (dir->expressions nil))) - (is (thrown? java.io.FileNotFoundException (dir->expressions ""))) - (is (thrown? java.io.FileNotFoundException (dir->expressions " "))) - (is (thrown? java.io.FileNotFoundException (dir->expressions "\n"))) - (is (thrown? java.io.FileNotFoundException (dir->expressions "\t")))) - (testing "Non-existent or invalid directory" - (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) - (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) - (testing "Valid directory" -; (is (valid= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->expressions "."))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 -)) - (deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" (is (nil? (zip->expressions nil))) @@ -122,5 +112,23 @@ (testing "Invalid zip file" (is (thrown? java.util.zip.ZipException (zip->expressions (str test-data-path "/bad.zip"))))) (testing "Valid zip file" - (is (valid= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))))) + (is (valid= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))) + (is (valid= #{"AGPL-3.0-or-later"} (zip->expressions (str test-data-path "/pom-in-a-zip.zip")))))) +(deftest dir->expressions-tests + (testing "Nil, empty, or blank directory name" + (is (nil? (dir->expressions nil))) + (is (thrown? java.io.FileNotFoundException (dir->expressions ""))) + (is (thrown? java.io.FileNotFoundException (dir->expressions " "))) + (is (thrown? java.io.FileNotFoundException (dir->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (dir->expressions "\t")))) + (testing "Non-existent or invalid directory" + (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) + (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) + (testing "Valid directory" + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0"} + (dir->expressions ".")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (testing "Valid directory - include ZIP compressed files" + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0" "AGPL-3.0-or-later"} + (dir->expressions "." {:include-zips? true})))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 +) diff --git a/test/lice_comb/impl/matching_test.clj b/test/lice_comb/impl/matching_test.clj new file mode 100644 index 0000000..59a7d0a --- /dev/null +++ b/test/lice_comb/impl/matching_test.clj @@ -0,0 +1,69 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.matching :refer [split-on-operators]])) + +(use-fixtures :once fixture) + +(deftest split-on-operators-tests + (testing "nil/empty/blank" + (is (nil? (split-on-operators nil))) + (is (nil? (split-on-operators ""))) + (is (nil? (split-on-operators " ")))) + (testing "Simple non-splits" + (is (= '("foo") (split-on-operators "foo"))) + (is (= '("Apache") (split-on-operators "Apache"))) + (is (= '("Apache MIT BSD") (split-on-operators "Apache MIT BSD"))) + (is (= '("ApacheandMIT") (split-on-operators "ApacheandMIT"))) + (is (= '("Apacheand MIT") (split-on-operators "Apacheand MIT"))) + (is (= '("Apache andMIT") (split-on-operators "Apache andMIT"))) + (is (= '("ApacheorMIT") (split-on-operators "ApacheorMIT"))) + (is (= '("Apacheor MIT") (split-on-operators "Apacheor MIT"))) + (is (= '("Apache orMIT") (split-on-operators "Apache orMIT"))) + (is (= '("ApachewithMIT") (split-on-operators "ApachewithMIT"))) + (is (= '("Apachewith MIT") (split-on-operators "Apachewith MIT"))) + (is (= '("Apache withMIT") (split-on-operators "Apache withMIT"))) + (is (= '("Apachew/MIT") (split-on-operators "Apachew/MIT"))) + (is (= '("Apachew/ MIT") (split-on-operators "Apachew/ MIT")))) + (testing "Simple and splits" + (is (= '("Apache" :and "MIT") (split-on-operators "Apache and MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache AND MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache aNd MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache & MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache &MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache&MIT")))) + (testing "Simple or splits" + (is (= '("Apache" :or "MIT") (split-on-operators "Apache or MIT"))) + (is (= '("Apache" :or "MIT") (split-on-operators "Apache OR MIT"))) + (is (= '("Apache" :or "MIT") (split-on-operators "Apache oR MIT")))) + (testing "Simple with splits" + (is (= '("Apache" :with "MIT") (split-on-operators "Apache with MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache WITH MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache wItH MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache w/ MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache w/MIT")))) + (testing "Complex non-splits" + (is (= '("COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0") (split-on-operators "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= '("Copyright & all rights reserved Lean Pixel") (split-on-operators "Copyright & all rights reserved Lean Pixel"))) + (is (= '("GNU General Public License v3.0 or later") (split-on-operators "GNU General Public License v3.0 or later"))) + (is (= '("GNU General Public License, Version 3 (or later)") (split-on-operators "GNU General Public License, Version 3 (or later)"))) + (is (= '("GNU Lesser General Public License, version 2.1 or newer") (split-on-operators "GNU Lesser General Public License, version 2.1 or newer"))) + (is (= '("LGPL-3.0-or-later") (split-on-operators "LGPL-3.0-or-later"))))) diff --git a/test/lice_comb/impl/metadata_test.clj b/test/lice_comb/impl/metadata_test.clj new file mode 100644 index 0000000..8d69bec --- /dev/null +++ b/test/lice_comb/impl/metadata_test.clj @@ -0,0 +1,78 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.metadata-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.metadata :refer [prepend-source union]])) + +(use-fixtures :once fixture) + +(def md1 { + "Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} + "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} +}) + +(def md2 { + "Apache-2.0" {:type :concluded :confidence :low :strategy :regex-matching :source '("Apache style license")} + "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} + }) + +(deftest prepend-source-tests + (testing "nil/empty/blank" + (is (nil? (prepend-source nil nil))) + (is (nil? (prepend-source nil ""))) + (is (= #{} (prepend-source #{} nil))) + (is (nil? (meta (prepend-source #{} nil)))) + (is (= #{} (prepend-source #{} ""))) + (is (nil? (meta (prepend-source #{} ""))))) + (testing "non-nil metadata that isn't lice-comb specific" + (is (= {} (meta (prepend-source (with-meta #{:a} {}) "foo")))) + (is (= {:foo "foo"} (meta (prepend-source (with-meta #{:a} {:foo "foo"}) "bar")))) + (testing "non-nil metadata that is lice-comb specific" + (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("pom.xml" "Apache Software Licence v2.0")} + "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("pom.xml" "MIT")}} + (meta (prepend-source (with-meta #{:a} md1) "pom.xml")))) + (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("library.jar" "pom.xml" "Apache Software Licence v2.0")} + "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("library.jar" "pom.xml" "MIT")}} + (meta (prepend-source (prepend-source (with-meta #{:a} md1) "pom.xml") "library.jar"))))))) + +(deftest union-tests + (testing "zero arg" + (is (= #{} (union)))) + (testing "one arg" + (is (nil? (union nil))) + (is (= #{} (union #{}))) + (is (= #{:foo} (union #{:foo})))) + (testing "two arg" + (is (= #{:foo :bar} (union #{:foo} #{:bar})))) + (testing "multi-arg" + (is (= #{:a :b :c} (union #{:a} #{:b} #{:c}))) + (is (= #{:a :b :c :d} (union #{:a} #{:b} #{:c} #{:d}))) + (is (= #{:a :b :c :d :e :f :g :h :i :j :k :l :m :n :o} (union #{:a :b} #{:c :d :e} #{:f :g :h :i} #{:j :k :l :m :n :o})))) + (testing "metadata" + (is (= {:foo "foo"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}))))) + (is (= {:foo "foo" :bar "bar"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}))))) + (is (= {:foo "foo" :bar "bar" :blah "blah"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}) (with-meta #{:g :h :i} {:blah "blah"}))))) + (is (thrown? clojure.lang.ExceptionInfo (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:foo "bar"}))))) ; Non lice-comb conflicting key in metadata maps = exception + (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} + "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} + "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} + } + (meta (union (with-meta #{:a :b :c} md1) (with-meta #{:d :e :f} md2))))))) + diff --git a/test/lice_comb/impl_regex_matching_test.clj b/test/lice_comb/impl/regex_matching_test.clj similarity index 96% rename from test/lice_comb/impl_regex_matching_test.clj rename to test/lice_comb/impl/regex_matching_test.clj index 7da3986..44116f9 100644 --- a/test/lice_comb/impl_regex_matching_test.clj +++ b/test/lice_comb/impl/regex_matching_test.clj @@ -16,12 +16,12 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.impl-regex-matching-test - (:require [clojure.test :refer [deftest testing is are use-fixtures]] +(ns lice-comb.impl.regex-matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.set :as set] [rencg.api :as rencg] [lice-comb.impl.utils :as lcu] - [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.test-boilerplate :refer [fixture testing-with-data]] [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re match-regexes]])) (use-fixtures :once fixture) @@ -209,23 +209,6 @@ (def not-nil? (complement nil?)) -(defn when-pred - [val pred then] - (if (pred val) - (then val) - val)) - -(defmacro testing-with-data - "A form of `clojure.test/testing` that generates multiple `clojure.test/is` - clauses, based on applying f to the keys in m, and comparing to the associated - value in m." - [name f m] - `(testing ~name - ~@(map #(list `is `(= (~f ~(key %)) ~(when-pred (val %) list? (partial list 'quote)))) - (if (isa? (type m) clojure.lang.Symbol) - @(resolve m) - m)))) - ; Add input to result to make troubleshooting test failures easier (defn test-regex [re s] diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index d13c102..2da63c3 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -374,7 +374,7 @@ (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) - (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) ; ####TODO: THINK MORE ABOUT THIS ONE!!! + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later"))) (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later"))) diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index 7b5d2cc..1478dfa 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -20,12 +20,16 @@ (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture valid=]] [lice-comb.impl.spdx :as lcis] - [lice-comb.maven :refer [pom->expressions]])) + [lice-comb.maven :refer [init! pom->expressions]])) (use-fixtures :once fixture) (def test-data-path "./test/lice_comb/data") +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + (deftest pom->expressions-tests (testing "Nil pom" (is (nil? (pom->expressions nil)))) @@ -38,7 +42,8 @@ (is (thrown? java.io.FileNotFoundException (pom->expressions "./this/path/and/file/doesnt/exist.pom")))) (testing "Synthetic pom files" (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/simple.pom")))) - (is (valid= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom"))))) + (is (valid= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom")))) + (is (valid= #{"Apache-2.0" "MIT" "GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Unlicense AND CC0-1.0"} (pom->expressions (str test-data-path "/complex.pom"))))) (testing "Real pom files - local" (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index ab60a84..3afaef7 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -21,24 +21,41 @@ [spdx.expressions :as sexp])) ; Here we hack up a "global once" function -(def ^:private global-setup (memoize (fn [] - ; Because java.util.logging is a hot mess - (org.slf4j.bridge.SLF4JBridgeHandler/removeHandlersForRootLogger) - (org.slf4j.bridge.SLF4JBridgeHandler/install) +(def ^:private global-setup (delay + ; Because java.util.logging is a hot mess + (org.slf4j.bridge.SLF4JBridgeHandler/removeHandlersForRootLogger) + (org.slf4j.bridge.SLF4JBridgeHandler/install) - ; Enable spec validation - (spec/check-asserts true) + ; Enable spec validation + (spec/check-asserts true) - (println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) - ))) + (println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) + nil)) (defn fixture [f] - (global-setup) + @global-setup (f)) (def not-nil? (complement nil?)) +(defn when-pred + [val pred then] + (if (pred val) + (then val) + val)) + +(defmacro testing-with-data + "A form of `clojure.test/testing` that generates multiple `clojure.test/is` + clauses, based on applying f to the keys in m, and comparing to the associated + value in m." + [name f m] + `(clojure.test/testing ~name + ~@(map #(list `clojure.test/is `(= (~f ~(key %)) ~(when-pred (val %) list? (partial list 'quote)))) + (if (isa? (type m) clojure.lang.Symbol) + @(resolve m) + m)))) + (defn valid= "Returns true if all of the following are true: * s2 has metadata @@ -52,7 +69,7 @@ (let [metadata? (or (nil? s2) (not-nil? (meta s2))) is-a-set? (or (nil? s2) (set? s2)) is-equal? (= s1 s2) - all-valid-expressions? (every? true? (map sexp/valid? s2)) + all-valid-expressions? (and (set? s2) (every? true? (map sexp/valid? s2))) result (and metadata? is-a-set? is-equal? From fce6fa2d86b29485d64c3d096a6427bac38a140a Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Sat, 2 Sep 2023 23:47:17 -0700 Subject: [PATCH 19/34] :construction: Ongoing work on issue #3 --- src/lice_comb/files.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index cbfbf19..b3360a0 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -71,8 +71,8 @@ ([f fname] (when (and f fname) (let [lfname (s/lower-case fname)] - (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions f) - (s/ends-with? lfname ".pom") (lcmvn/pom->expressions f) + (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions f fname) + (s/ends-with? lfname ".pom") (lcmvn/pom->expressions f fname) (instance? java.io.InputStream f) (lcmtch/text->ids f) :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids is)))) ; Default is to assume it's a plain text file containing license text(s) fname))))) From 8885937e7db96b2870dafb578e6e25f54aa13b49 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Mon, 4 Sep 2023 12:39:28 -0700 Subject: [PATCH 20/34] :construction: Ongoing work on issue #3 --- deps.edn | 1 + src/lice_comb/deps.clj | 38 +++-- src/lice_comb/files.clj | 103 ++++++++++---- src/lice_comb/impl/http.clj | 17 +-- src/lice_comb/impl/matching.clj | 125 +++++++++-------- src/lice_comb/impl/metadata.clj | 145 +++++++++----------- src/lice_comb/impl/regex_matching.clj | 63 +++++---- src/lice_comb/impl/utils.clj | 48 ++++++- src/lice_comb/matching.clj | 97 ++++++++++--- src/lice_comb/maven.clj | 105 +++++++++----- test/lice_comb/impl/metadata_test.clj | 100 ++++++++------ test/lice_comb/impl/regex_matching_test.clj | 6 +- test/lice_comb/{ => impl}/utils_test.clj | 62 ++++++++- test/lice_comb/matching_test.clj | 75 ++++++---- test/lice_comb/test_boilerplate.clj | 48 +++++-- 15 files changed, 672 insertions(+), 361 deletions(-) rename test/lice_comb/{ => impl}/utils_test.clj (50%) diff --git a/deps.edn b/deps.edn index 550d82b..81252fd 100644 --- a/deps.edn +++ b/deps.edn @@ -24,6 +24,7 @@ clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} tolitius/xml-in {:mvn/version "0.1.1"} hato/hato {:mvn/version "0.9.0"} + dev.weavejester/medley {:mvn/version "1.7.0"} miikka/clj-base62 {:mvn/version "0.1.1"} com.github.pmonks/clj-spdx {:mvn/version "1.0.91"} com.github.pmonks/rencg {:mvn/version "1.0.34"}} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 148e154..b91600f 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -53,8 +53,8 @@ (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) -(defmulti dep->expressions - "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps +(defmulti dep->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a tools.deps style dep (a MapEntry or two-element sequence of `[groupId/artifactId dep-info]`). @@ -63,7 +63,7 @@ {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) -(defmethod dep->expressions :mvn +(defmethod dep->expressions-info :mvn [dep] (when dep (let [[ga info] (normalise-dep dep) @@ -73,38 +73,48 @@ ; override (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) expressions ;(check-fallbacks ga - (if-let [expressions (lcmvn/pom->expressions pom-uri)] + (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] expressions - (apply lcimd/union (mapcat lcf/zip->expressions (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too + (into {} (pmap #(lcimd/prepend-source (lcf/zip->expressions-info %) dep) (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too expressions))));) -(defmethod dep->expressions :deps +(defmethod dep->expressions-info :deps [dep] (when dep (let [[ga info] (normalise-dep dep) version (:git/sha info)] ; (if-let [override (check-overrides ga version)] ; override -; (check-fallbacks ga - (lcf/dir->expressions (:deps/root info)))));)) +; (check-fallbacks ga + (lcf/dir->expressions-info (:deps/root info)))));)) -(defmethod dep->expressions nil +(defmethod dep->expressions-info nil [_]) -(defmethod dep->expressions :default +(defmethod dep->expressions-info :default [dep] (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) {:dep dep}))) +(defn dep->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps + style dep (a MapEntry or two-element sequence of + `[groupId/artifactId dep-info]`). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + [dep] + (some-> (dep->expressions-info dep) + keys + set)) + (defn deps-expressions "Attempt to detect the SPDX license expression(s) in a tools.deps 'lib map', returning a new lib map with the licenses assoc'ed in (in key - `:lice-comb/license-expressions`)" + `:lice-comb/license-info`)" [deps] (when deps - (into {} -;####TODO: CHECK WHETHER METADATA MAPS NEED TO BE MERGED!!!! - (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-expressions (dep->expressions [k v]))]) deps)))) + (into {} (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-info (dep->expressions-info [k v]))]) deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index b3360a0..c8c4fa3 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -40,7 +40,7 @@ (throw (java.nio.file.NotDirectoryException. (str dir)))) (throw (java.io.FileNotFoundException. (str dir))))))) -(defn probable-license-file? +(defn- probable-license-file? "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." [f] @@ -50,35 +50,52 @@ (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) -(defn probable-license-files +(defn- probable-license-files "Returns all probable license files in the given directory, recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." [dir] (when-let [dir (ensure-readable-dir dir)] - (lcu/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))))) + (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))) + set))) -(defn file->expressions - "Attempts to determine the SPDX license expression(s) (a set) from the given +(defn file->expressions-info + "Attempts to determine the SPDX license expression(s) (a map) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, it must already be open and the associated - filename should also be provided as the second parameter (it is optional in + filepath should also be provided as the second parameter (it is optional in other cases). The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([f] (file->expressions f (lcu/filename f))) - ([f fname] - (when (and f fname) - (let [lfname (s/lower-case fname)] - (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions f fname) - (s/ends-with? lfname ".pom") (lcmvn/pom->expressions f fname) + ([f] (file->expressions-info f (lcu/filepath f))) + ([f filepath] + (when (and f (not (s/blank? filepath))) + (let [fname (lcu/filename filepath) + lfname (s/lower-case fname)] + (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) + (s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname) (instance? java.io.InputStream f) (lcmtch/text->ids f) :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids is)))) ; Default is to assume it's a plain text file containing license text(s) - fname))))) + filepath))))) -(defn zip->expressions - "Attempt to detect the SPDX license expression(s) in a ZIP file. zip may be a +(defn file->expressions + "Attempts to determine the SPDX license expression(s) (a set) from the given + file (an InputStream or something that can have an io/input-stream opened on + it). If an InputStream is provided, it must already be open and the associated + filepath should also be provided as the second parameter (it is optional in + other cases). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([f] (file->expressions f (lcu/filepath f))) + ([f filepath] + (some-> (file->expressions-info f filepath) + keys + set))) + +(defn zip->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file. @@ -91,14 +108,28 @@ (let [zip-file (io/file zip)] (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] - (loop [result #{} + (loop [result {} entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (lcimd/union result (lcimd/prepend-source (file->expressions zip-is (lcu/filename entry)) (lcu/filename zip-file))) + (recur (merge result (lcimd/prepend-source (file->expressions-info zip-is (lcu/filename entry)) (lcu/filepath zip-file))) (.getNextEntry zip-is)) (recur result (.getNextEntry zip-is))) - (doall (some-> (seq result) set)))))))) ; De-lazy the result before we exit the with-open scope + (when-not (empty? result) result))))))) + +(defn zip->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a ZIP file. zip may be a + String or a java.io.File, both of which must refer to a ZIP-format compressed + file. + + Throws on invalid zip file (doesn't exist, not readable, not ZIP format, etc.). + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + [zip] + (some-> (zip->expressions-info zip) + keys + set)) (defn- zip-compressed-files "Returns all probable ZIP compressed files in the given directory, @@ -106,12 +137,13 @@ java.io.File, either of which must refer to a readable directory." [dir] (when-let [dir (ensure-readable-dir dir)] - (lcu/nset (filter #(and (.isFile ^java.io.File %) - (or (s/ends-with? (str %) ".zip") - (s/ends-with? (str %) ".jar"))) - (file-seq dir))))) + (some-> (seq (filter #(and (.isFile ^java.io.File %) + (or (s/ends-with? (str %) ".zip") + (s/ends-with? (str %) ".jar"))) + (file-seq dir))) + set))) -(defn dir->expressions +(defn dir->expressions-info "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir may be a String or a java.io.File, both of which must refer to a readable directory. @@ -122,15 +154,32 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([dir] (dir->expressions dir nil)) + ([dir] (dir->expressions-info dir nil)) ([dir {:keys [include-zips?] :or {include-zips? false}}] (when dir - (let [file-expressions (apply lcimd/union (map file->expressions (probable-license-files dir)))] + (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] (if include-zips? - (let [zip-expressions (apply lcimd/union (map #(try (zip->expressions %) (catch Exception _ nil)) (zip-compressed-files dir)))] - (lcimd/union file-expressions zip-expressions)) + (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (merge file-expressions zip-expressions)) file-expressions))))) +(defn dir->expressions + "Attempt to detect the SPDX license expression(s) (a map) in a directory. dir + may be a String or a java.io.File, both of which must refer to a + readable directory. + + The optional `opts` map has these keys: + * `include-zips?` (boolean, default false) - controls whether zip compressed + files found in the directory are included in the scan or not + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([dir] (dir->expressions dir nil)) + ([dir opts] + (some-> (dir->expressions-info dir opts) + keys + set))) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj index 7b7eabd..9d27bfc 100644 --- a/src/lice_comb/impl/http.clj +++ b/src/lice_comb/impl/http.clj @@ -33,14 +33,15 @@ Note: does not throw - returns false on errors." [uri] - (when (lcu/valid-http-uri? (str uri)) - (try - (when-let [response (hc/head (str uri) - {:http-client @http-client-d - :header {"user agent" "com.github.pmonks/lice-comb"}})] - (= 200 (:status response))) - (catch Exception _ - false)))) + (boolean + (when (lcu/valid-http-uri? (str uri)) + (try + (when-let [response (hc/head (str uri) + {:http-client @http-client-d + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (= 200 (:status response))) + (catch Exception _ + false))))) (defn- cdn-uri "Converts raw URIs into CDN URIs, for these 'known' hosts: diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 4f4b7d9..21209e4 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -55,8 +55,14 @@ "LGPL-2.1" "LGPL-3.0"}) +(defn- dis + "Remove the given key(s) from the associative collection (set or map)." + [associative & ks] + (cond (set? associative) (apply disj associative ks) + (map? associative) (apply dissoc associative ks))) + (defn- fix-gpl-only-or-later - "If the set of ids includes both an 'only' and an 'or-later' variant of the + "If the keys of ids includes both an 'only' and an 'or-later' variant of the same underlying GNU family identifier, remove the 'only' variant." [ids] (loop [result ids @@ -65,47 +71,42 @@ (if f (recur (if (and (contains? result (str f "-only")) (contains? result (str f "-or-later"))) - (disj result (str f "-only")) + (dis result (str f "-only")) result) (first r) (rest r)) result))) (defn- fix-public-domain-cc0 - "If the set of ids includes both CC0-1.0 and lice-comb's public domain + "If the keys of ids includes both CC0-1.0 and lice-comb's public domain LicenseRef, remove the LicenseRef as it's redundant." [ids] (if (and (contains? ids (lcis/public-domain)) (contains? ids "CC0-1.0")) - (disj ids (lcis/public-domain)) + (dis ids (lcis/public-domain)) ids)) (defn- fix-mpl-2 - "If the set of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, + "If the keys of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, remove the MPL-2.0-no-copyleft-exception as it's redundant." [ids] (if (and (contains? ids "MPL-2.0") (contains? ids "MPL-2.0-no-copyleft-exception")) - (disj ids "MPL-2.0-no-copyleft-exception") + (dis ids "MPL-2.0-no-copyleft-exception") ids)) (defn manual-fixes - "Manually fix certain invalid combinations of license identifiers in a set." + "Manually fix certain invalid combinations of license identifiers in a set or + map." [ids] - (when ids - (let [m (meta ids) - result (some-> ids - direct-replacements - fix-gpl-only-or-later - fix-public-domain-cc0 - fix-mpl-2 - set) - removed-ids (apply disj (set (keys m)) result) - m (apply dissoc m removed-ids)] - (with-meta result m)))) + (some-> ids + direct-replacements + fix-gpl-only-or-later + fix-public-domain-cc0 + fix-mpl-2)) (defmulti text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a set) + "Attempts to determine the SPDX license and/or exception identifier(s) (a map) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). The result has metadata attached that describes how the identifiers were @@ -126,9 +127,9 @@ ; These clj-spdx APIs are *expensive*, so we paralellise them (let [f-lic (future (sm/licenses-within-text s @lcis/license-ids-d)) f-exc (future (sm/exceptions-within-text s @lcis/exception-ids-d)) - ids (manual-fixes (set/union @f-lic @f-exc))] + ids (set/union @f-lic @f-exc)] (when ids - (with-meta ids (into {} (map #(vec [% {:type :concluded :confidence :high :strategy :spdx-text-matching}]) ids)))))) + (manual-fixes (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-text-matching})) ids)))))) (defmethod text->ids java.io.Reader [r] @@ -147,7 +148,7 @@ (text->ids r)))) (defn uri->ids - "Returns the SPDX license and/or exception identifiers (a set) for the given + "Returns the SPDX license and/or exception identifiers (a map) for the given uri, or nil if there aren't any. It does this via two steps: 1. Seeing if the given URI is in the license or exception list, and returning the ids of the associated licenses and/or exceptions if so @@ -171,19 +172,18 @@ (let [suri (lcu/simplify-uri uri)] ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) (if-let [ids (get @lcis/index-uri-to-id-d suri)] - (let [metadata (into {} (map #(vec [% {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)}]) ids))] - (with-meta ids metadata)) + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it (when-let [license-text (lcihttp/get-text uri)] (when-let [ids (text->ids license-text)] - (lcimd/prepend-source ids (str uri " (retrieved text)"))))))))) + (lcimd/prepend-source ids (str "Text retrieved from " uri))))))))) (defn- string->ids-info - "Converts the given String into a sequence of singleton maps, each of which - has a key is that is an SPDX identifier (either a listed SPDX license or - exception id), and whose value is meta-information about how that identifier - was found. The result sequence is ordered in the same order of appearance as - the source values in s. + "Converts the given String into a sequence of singleton maps (NOT A SINGLE + MAP!), each of which has a key is that is an SPDX identifier (either a listed + SPDX license or exception id), and whose value is a list of meta-information + about how that identifier was found. The result sequence is ordered in the + same order of appearance as the source values in s. If no listed SPDX license or exception identifiers are found, returns a singleton sequence containing a map with a lice-comb specific 'unlisted' @@ -202,19 +202,20 @@ (let [s (s/trim s)] (if-let [id (get @lcis/spdx-ids-d (s/lower-case s))] (if (= id s) - (list {id {:type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)}}) - (list {id {:type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)}})) + (list {id (list {:id id :type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)})}) + (list {id (list {:id id :type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)})})) ; 2. Is it an SPDX license or exception name? (if-let [ids (get @lcis/index-name-to-id-d (s/trim (s/lower-case s)))] - (map #(hash-map % {:type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)}) ids) + (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)})) ids) ; 3. Is it a URI? If so, perform URI matching on it (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) (if-let [ids (uri->ids s)] - (let [metadata (meta ids)] - (map #(hash-map % (get metadata %)) ids)) ; Convert metadata from uri->ids back into a regular map (so that it survives expression building) + ids ; 4. Attempt regex name matching - (if-let [ids (lcirm/match-regexes s)] - (map #(hash-map % (get (meta ids) %)) ids) ; Convert metadata from match-regexes back into a regular map (so that it survives expression building) - (list {(lcis/name->unlisted s) {:type :concluded :confidence :low :strategy :unlisted :source (list s)}})))))))) + (if-let [ids (lcirm/matches s)] + ids + ; 5. No clue, so return a single unlisted SPDX LicenseRef + (let [id (lcis/name->unlisted s)] + (list {id (list {:id id :type :concluded :confidence :low :strategy :unlisted :source (list s)})}))))))))) (defn- filter-blanks "Filter blank strings out of coll" @@ -252,7 +253,8 @@ (defn- process-expression-element "Processes a single new expression element e (either a keyword representing - an SPDX operator, or an SPDX identifier) in the context of stack (list) s." + an SPDX operator, or a map representing an SPDX identifier) in the context of + stack (list) s." [s e] (if (keyword? e) ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword @@ -272,41 +274,42 @@ (if (nil? prior) (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s - (se/listed-id? e)) - (push s-minus-2 (s/join " " [prior operator e])) + (se/listed-id? (first (keys e)))) + (let [k (s/join " " [(first (keys prior)) operator (first (keys e))]) + v (distinct (concat (list {:type :concluded :confidence :low :strategy :expression-inference}) + (first (vals prior)) + (first (vals e))))] + (push s-minus-2 {k v})) (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on ; Many keywords? That's invalid (since we dedupe them when they get pushed on, so this means they're different), so drop all of them and push e onto s (push (drop-while keyword? s) e)))) -(defn- build-spdx-expressions - "Builds a set of SPDX expressions from the given list of strings & keywords." +(defn- build-spdx-expressions-map + "Builds a single SPDX expressions map from the given list of keywords and SPDX expession maps." [l] (loop [result '() f (first l) r (rest l)] (if f (recur (process-expression-element result f) (first r) (rest r)) - (some-> (seq (reverse result)) ; Remember to reverse the expressions, since lists-as-stacks grow at the front, not the end - set - manual-fixes)))) + (manual-fixes (into {} result))))) (defn attempt-to-build-expressions - "Attempts to build SPDX expression(s) (a set of strings) from the - given name. The result has metadata attached that describes how the - identifiers were determined." + "Attempts to build SPDX expression(s) (a map) from the given name. + + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [name] - (when-let [partial-expressions (some->> (split-on-operators name) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - (map #(if (keyword? %) % (string->ids-info %))) - flatten - (filter identity) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - seq)] - (let [spdx-expressions (build-spdx-expressions (map #(if (keyword? %) % (first (keys %))) partial-expressions)) - metadata (into {} (filter (complement keyword?) partial-expressions))] - (with-meta spdx-expressions metadata)))) + (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids-info %))) + flatten + (filter identity) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + seq + build-spdx-expressions-map)) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/metadata.clj b/src/lice_comb/impl/metadata.clj index a627109..6c530eb 100644 --- a/src/lice_comb/impl/metadata.clj +++ b/src/lice_comb/impl/metadata.clj @@ -19,76 +19,30 @@ (ns lice-comb.impl.metadata "Metadata helper functionality. Note: this namespace is not part of the public API of lice-comb and may change without notice." - (:require [clojure.string :as s] - [clojure.set :as set] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s])) (defn prepend-source - "Prepends the given source (a string) onto the list of sources for all of - the entries of the metadata for object o. Returns o with the new metadata." - [o s] - (if (and o (not (s/blank? s))) - (if-let [m (meta o)] - (with-meta o (lcu/mapfonv #(if (map? %) (assoc % :source (conj (seq (:source %)) s)) %) m)) - o) - o)) + "Prepends the given source s (a String) onto all metadata sub-maps in m (a + lice-comb id+metadata-list map)." + [m s] + (if (or (empty? m) (s/blank? s)) + m + (into {} (map #(if (sequential? (val %)) + (let [id (key %) + metadata-list (val %)] + (hash-map id (map (fn [x] (assoc x :source (conj (seq (:source x)) s))) metadata-list))) + %) + m)))) -(defn- merge-conflicting-key - "Merges the metadata values for a single key that exists in both m1 and m2." - [m1 m2 k] -;####TODO: IMPROVE THIS SIMPLISTIC "PICK A WINNER" IMPLEMENTATION!!!!! - (let [m1v (get m1 k) - m2v (get m2 k)] - ; If both values are maps, perhaps lice-comb specific metadata merging - (if (and (map? m1v) (map? m2v)) - (if (= :declared (:type m1v)) - m1v - (if (= :declared (:type m2v)) - m2v - (case [(:confidence m1v) (:confidence m2v)] - ([:high :high] [:high :medium] [:high :low] [:high nil]) m1v - ([:medium :medium] [:medium :low] [:medium nil]) m1v - ([:low :low] [:low nil]) m1v - m2v))) - (throw (ex-info "Attempt to merge non-lice-comb metadata maps" {}))))) - - -(defn merge-metadata - "Merges lice-comb metadata maps." - ([] {}) - ([m] m) - ([m1 m2] - (if (and m1 m2) - (let [keys-in-both (set/intersection (set (keys m1)) (set (keys m2))) - keys-in-m1-only (apply disj (set (keys m1)) keys-in-both) - keys-in-m2-only (apply disj (set (keys m2)) keys-in-both)] - (merge {} - (into {} (map #(vec [% (merge-conflicting-key m1 m2 %)]) keys-in-both)) - (into {} (map #(vec [% (get m1 %)]) keys-in-m1-only)) - (into {} (map #(vec [% (get m2 %)]) keys-in-m2-only)))) - (if m1 - m1 - m2))) - ([m1 m2 & maps] - (loop [result (merge-metadata m1 m2) - f (first maps) - r (rest maps)] - (if f - (recur (merge-metadata result f) (first r) (rest r)) - result)))) - -(defn union - "Equivalent to set/union, but preserves lice-comb metadata from the sets using - merge-metadata." - ([] #{}) - ([s] s) - ([s1 s2] - (with-meta (set/union s1 s2) - (merge-metadata (meta s1) (meta s2)))) - ([s1 s2 & sets] - (let [data (apply set/union (concat [s1 s2] sets)) - metadata (apply merge-metadata (concat [(meta s1) (meta s2)] (filter identity (map meta sets))))] - (with-meta data metadata)))) +(defn merge-maps + "Merges any number of lice-comb maps, by concatenating and de-duping values + for the same key (expression)." + [& maps] + (let [maps (filter identity maps)] + (when-not (empty? maps) + (let [grouped-maps (group-by first (mapcat identity maps))] + (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) + (keys grouped-maps))))))) (def ^:private strategies { :spdx-expression "SPDX expression" @@ -97,25 +51,56 @@ :spdx-text-matching "SPDX license text matching" :spdx-listed-name "SPDX listed name (case insensitive match)" :spdx-listed-uri "SPDX listed URI (relaxed matching)" - :regex-name-matching "Regular expression name matching" + :expression-inference "Inferred SPDX expression" + :regex-matching "Regular expression matching" :unlisted "Unlisted"}) +(defn- metadata-keyfn + "sort-by keyfn for lice-comb metadata maps" + [metadata] + (str (case (:id metadata) + nil "0" + "1") + "-" + (case (:type metadata) + :declared "0" + :concluded "1") + "-" + (case (:confidence metadata) + nil "0" + :high "1" + :medium "2" + :low "3") + "-" + (case (:strategy metadata) + :spdx-expression "0" + :spdx-listed-identifier-exact-match "1" + :spdx-listed-identifier-case-insensitive-match "2" + :spdx-text-matching "3" + :spdx-listed-name "4" + :spdx-listed-uri "5" + :expression-inference "6" + :regex-matching "7" + :unlisted "8"))) + (defn- metadata-element->string - "Converts a single element in a lice-comb metadata map (identified by id) - into a human-readable string." + "Converts the metadata list for the given identifier into a human-readable + string." [m id] - (when-let [metadata (get m id)] - (str id ": " - (name (:type metadata)) - (when-let [confidence (:confidence metadata)] - (str "\n Confidence: " (name confidence))) - (when-let [strategy (:strategy metadata)] - (str "\n Strategy: " (get strategies strategy (str "#### MISSING VALUE: " strategy " ####")))) - (when-let [source (seq (:source metadata))] - (str "\n Source: " (s/join " > " source)))))) + (str id ":\n" + (when-let [metadata-list (sort-by metadata-keyfn (seq (get m id)))] + (s/join "\n" (map #(str " " + (when-let [md-id (:id %)] (when (not= id md-id) (str md-id " "))) + (case (:type %) + :declared "Declared" + :concluded "Concluded") + (when-let [confidence (:confidence %)] (str "\n Confidence: " (name confidence))) + (when-let [strategy (:strategy %)] (str "\n Strategy: " (get strategies strategy (name strategy)))) + (when-let [source (seq (:source %))] (str "\n Source:\n > " (s/join "\n > " source)))) + metadata-list))))) (defn metadata->string - "Converts a lice-comb metadata map m into a human-readable string." + "Converts lice-comb map m into a human-readable string." [m] (when m (let [ids (sort (keys m))] diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index 94f1c3f..7d3fab4 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -19,10 +19,11 @@ (ns lice-comb.impl.regex-matching "Helper functionality focused on regex matching. Note: this namespace is not part of the public API of lice-comb and may change without notice." - (:require [clojure.string :as s] - [rencg.api :as rencg] - [lice-comb.impl.spdx :as lcis] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [medley.core :as med] + [rencg.api :as rencg] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.utils :as lcu])) (defn- get-rencgs "Get a value for an re-ncg, potentially looking at multiple ncgs in order @@ -50,7 +51,7 @@ (contains? @lcis/exception-ids-d id)) id (throw (ex-info (str "Invalid SPDX id constructed: '" id - "'' - please raise an issue at " + "' - please raise an issue at " "https://github.com/pmonks/lice-comb/issues/new?assignees=pmonks&labels=bug&template=Invalid_id_constructed.md&title=Invalid+SPDX+identifer+constructed:+" id) {:id id})))) @@ -269,7 +270,7 @@ :pad-ver? true :latest-ver "1.0"} {:id "Creative commons family" - :regex #"(?i)(\bCC\sBY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" + :regex #"(?i)(\bCC[\s-]BY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" :fn cc-id-constructor :pad-ver? true :latest-ver "4.0"} @@ -341,16 +342,18 @@ :fn (constantly ["Zlib" :medium])} ]))) -(defn- match-regex +(defn- match "If a match occured for the given regex element when tested against string s, - returns a map containing the following keys, or nil if there was no match: - * :id The SPDX identifier of the found license or exception + returns a map containing the following keys: + * :id The SPDX license or exception identifier that was determined * :type The 'type' of match - will always have the value :concluded * :confidence The confidence of the match: either :high, :medium, or :low - * :strategy The matching strategy - will always have the value :regex-name-matching + * :strategy The matching strategy - will always have the value :regex-matching * :source A list of strings containing source information (specifically the portion of the string s that matched this regex element) - *: start The start index of the given match within s" + *: start The start index of the given match within s + + Returns nil if there was no match." [s elem] (when-let [match (rencg/re-find-ncg (:regex elem) s)] (let [[id confidence] ((:fn elem) (merge {:name s} elem match)) @@ -358,29 +361,33 @@ {:id id :type :concluded :confidence (if (= source id) :high confidence) - :strategy :regex-name-matching + :strategy :regex-matching :source (list source) :start (:start match)}))) -(defn match-regexes - "Returns a sequence (NOT A SET!) of the SPDX license or exception ids that - were found in the string s, or nil if there were no matches. Results are in - the order in which they appear in the string. The result also has metadata - attached, which is a map whose keys are each of the SPDX license or exception - ids, and whose values are a map containing these keys: +(defn matches + "Returns a sequence (NOT A SET!) of maps where each key is a SPDX license or + exception identifier (a String) that was found in s, and the value is a + sequence containing a single map describing how the identifier was determined. + The map contains these keys: * :type The 'type' of match - will always have the value :concluded * :confidence The confidence of the match: either :high, :medium, or :low - * :strategy The matching strategy - will always have the value :regex-name-matching - * :source A list of strings containing source information (specifically - the portion of the string s that matched this identifier" + * :strategy The matching strategy - will always have the value :regex-matching + * :source A sequence of strings containing source information + (specifically the substring of s that matched this identifier) + + Results are in the order in which they appear in the string, and the function + returns nil if there were no matches." [s] - (when-let [matches (seq (distinct (filter identity (pmap (partial match-regex s) @license-name-matching-d))))] - (let [ids (some->> matches - (sort-by :start) - (map :id) - (distinct)) - metadata (into {} (map #(vec [% (dissoc (first (filter (fn [x] (= % (:id x))) matches)) :start :id)]) ids))] - (with-meta ids metadata)))) + (when-let [matches (seq (filter identity (map (partial match s) @license-name-matching-d)))] + (some->> matches + (med/distinct-by :id) ;####TODO: THINK ABOUT MERGING INSTEAD OF DROPPING + (sort-by :start) + (map #(hash-map (:id %) (list {:id (:id %) ; We duplicate this here in case the result gets merged into an expression + :type (:type %) + :confidence (:confidence %) + :strategy (:strategy %) + :source (:source %)})))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 1d1d143..d635402 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -124,9 +124,45 @@ (s/replace #"\.[\p{Alnum}]{3,}\z" "")) ; Strip file type extension (if any) luri))))) +(defmulti filepath + "Returns the full path and name of the given file-like thing (String, File, + ZipEntry, URI, URL)." + type) + +(defmethod filepath nil + [_]) + +(defmethod filepath java.io.File + [^java.io.File f] + (.getPath f)) + +(defmethod filepath java.lang.String + [s] + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filepath (io/as-url s)) + (filepath (io/file s)))))) + +(defmethod filepath java.util.zip.ZipEntry + [^java.util.zip.ZipEntry ze] + (.getName ze)) + +(defmethod filepath java.net.URI + [^java.net.URI uri] + (str uri)) + +(defmethod filepath java.net.URL + [^java.net.URL url] + (str url)) + +(defmethod filepath java.io.InputStream + [_] + (throw (ex-info "Cannot determine filepath of an InputStream - did you forget to provide it separately?" {}))) + (defmulti filename - "Returns just the name component of the given file or path string, excluding - any parents." + "Returns just the name component of the given file-like thing (String, File, + ZipEntry, URI, URL), excluding any parents." type) (defmethod filename nil @@ -138,11 +174,15 @@ (defmethod filename java.lang.String [s] - (filename (io/file s))) + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filename (io/as-url s)) + (filename (io/file s)))))) (defmethod filename java.util.zip.ZipEntry [^java.util.zip.ZipEntry ze] - (filename (.getName ze))) ; Note that Zip Entry names include the entire path + (filename (.getName ze))) (defmethod filename java.net.URI [^java.net.URI uri] diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 7ee7601..7d12888 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -56,11 +56,14 @@ (unlisted? id) (lcis/unlisted->name id) :else id))) -(defn text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a set) +(defn text->ids-info + "Attempts to determine the SPDX license and/or exception identifier(s) (a map) within the given license text (a String, Reader, InputStream, or something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined. + Notes: * this function implements the SPDX matching guidelines (via clj-spdx). See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ @@ -74,8 +77,29 @@ [text] (lcim/text->ids text)) -(defn uri->ids - "Returns the SPDX license and/or exception identifiers (a set) for the given +(defn text->ids + "Attempts to determine the SPDX license and/or exception identifier(s) (a set + of Strings) within the given license text (a String, Reader, InputStream, or + something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, + etc.). + + Notes: + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames through clojure.java.io/file first + + The result has metadata attached that describes how the identifiers were + determined." + [text] + (some-> (text->ids-info text) + keys + set)) + +(defn uri->ids-info + "Returns the SPDX license and/or exception identifiers (a map) for the given uri, or nil if there aren't any. It does this via two steps: 1. Seeing if the given URI is in the license or exception list, and returning the ids of the associated licenses and/or exceptions if so @@ -91,39 +115,72 @@ 2. URIs in the SPDX license and exception lists are not unique - the same URI may represent multiple licenses and/or exceptions. - The result has metadata attached that describes how the identifiers were - determined." + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [uri] (lcim/uri->ids uri)) -(defn name->expressions - "Attempts to determine the SPDX license expression(s) (a set of Strings) - from the given 'license name' (a String), or nil if there aren't any. - This involves: +(defn uri->ids + "Returns the SPDX license and/or exception identifiers (a set of Strings) for + the given uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions." + [uri] + (some-> (uri->ids-info uri) + keys + set)) + +(defn name->expressions-info + "Attempts to determine the SPDX license expression(s) (a map) from the given + 'license name' (a String), or nil if there aren't any. This involves: 1. Determining whether the name is a valid SPDX license expression, and if so normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it - 2. attempting to construct one or more SPDX license expressions from the + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids-info) + 3. attempting to construct one or more SPDX license expressions from the name - The result has metadata attached that describes how the identifiers were - determined." + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." [name] (when-not (s/blank? name) (let [name (s/trim name)] ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set - (if-let [parsed-expression (sexp/parse name)] - (let [ids (sexp/extract-ids parsed-expression) - normalised-expression (sexp/unparse parsed-expression) - metadata (into {} (map #(vec [% {:type :declared :strategy :spdx-expression :source (list normalised-expression)}]) ids))] - (with-meta #{normalised-expression} metadata)) + (if-let [normalised-expression (sexp/normalise name)] + {normalised-expression (list {:type :declared :strategy :spdx-expression :source (list name)})} ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) (if (lcu/valid-http-uri? name) - (if-let [ids (uri->ids name)] + (if-let [ids (uri->ids-info name)] ids - (with-meta #{(lcis/name->unlisted name)} {(lcis/name->unlisted name) {:type :concluded :confidence :low :strategy :unlisted :source (list name)}})) + {(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted ; 3. Attempt to build SPDX expression(s) from the name (lcim/attempt-to-build-expressions name)))))) +(defn name->expressions + "Attempts to determine the SPDX license expression(s) (a set of Strings) from + the given 'license name' (a String), or nil if there aren't any. This involves: + 1. Determining whether the name is a valid SPDX license expression, and if so + normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids) + 3. attempting to construct one or more SPDX license expressions from the + name" + [name] + (some-> (name->expressions-info name) + keys + set)) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index c45fc4c..d452023 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -25,7 +25,6 @@ [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] - [spdx.expressions :as sexp] [lice-comb.matching :as lcmtch] [lice-comb.impl.matching :as lcim] [lice-comb.impl.metadata :as lcimd] @@ -48,7 +47,6 @@ ; TODO: make this configurable (def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) -;####TODO: MOVE THIS TO AN IMPL NS?? (defn pom-uri-for-gav "Attempts to locate the POM for the given GAV, which is a URI that may point to a file in the local Maven repository or a remote Maven repository (e.g. on @@ -66,44 +64,72 @@ (first (filter lcihttp/uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))))))) (defn- licenses-from-pair - "Attempts to determine the license(s) (a set) from a POM license name/URL pair. - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + "Attempts to determine the license(s) (a map) from a POM license name/URL + pair. Returns nil if no matches were found." [{:keys [name url]}] - (let [name-expressions (when-not (s/blank? name) (lcmtch/name->expressions name)) - name-ids (some-> (seq (mapcat #(sexp/extract-ids (sexp/parse %)) name-expressions)) set) - uri-ids (when-not (s/blank? url) (apply disj (lcmtch/uri->ids url) name-ids))] ; Only include ids detected from the URL that weren't already detected in the name - (lcimd/union name-expressions uri-ids))) + ; 1. Look in the name field(s) + (if-let [name-expressions (lcimd/prepend-source (lcmtch/name->expressions-info name) " tag")] + name-expressions + ; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate) + (when-let [uri-ids (lcimd/prepend-source (lcmtch/uri->ids-info url) " tag")] + uri-ids))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") -(defmulti pom->expressions - "Attempt to detect the license expression(s) (a set) reported in a pom.xml +(defn- xml-find-all-alts + "As for xi/find-all, but supports an alternative fallback set of tags (to + help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (seq (xi/find-all xml ks1))] + result + (seq (xi/find-all xml ks2)))) + +(defn- xml-find-first-string + "As for xi/find-first, but assumes the target is a single content tag (and + returns that, or nil if it's blank or the tag doesn't exist." + [xml ks] + (when-let [result (first (xi/find-first xml ks))] + (when-not (s/blank? result) + result))) + +(defn- xml-find-first-string-alts + "As for xml-find-first-string, but supports an alternative fallback set of + tags (to help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (xml-find-first-string xml ks1)] + result + (xml-find-first-string xml ks2))) + +(defmulti pom->expressions-info + "Attempt to detect the license expression(s) (a map) reported in a pom.xml file. pom may be a java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream. Note that if an InputStream is provided: 1. it's the caller's responsibility to open and close it - 2. a filename *must* be provided along with the stream (2nd arg) + 2. a filepath *must* be provided along with the stream (the 2nd arg) The result has metadata attached that describes how the identifiers in the expression(s) were determined." - {:arglists '([pom] [pom file-name])} + {:arglists '([pom] [pom filepath])} (fn [& args] (type (first args)))) -; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags here -(defmethod pom->expressions java.io.InputStream - [pom-is fname] - (let [pom-xml (xml/parse pom-is) - licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) - licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] - (if (or licenses licenses-no-ns) +; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags +(defmethod pom->expressions-info java.io.InputStream + [pom-is filepath] + (let [pom-xml (xml/parse pom-is)] + (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] ; block exists - process it - (let [name-uri-pairs (lcu/nset (concat (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) - (lcu/map-pad #(hash-map :name (lcu/strim %1) :url (lcu/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url])))) - licenses (map #(lcimd/prepend-source (licenses-from-pair %) fname) name-uri-pairs)] - (lcim/manual-fixes (apply lcimd/union licenses))) + (let [name-uri-pairs (some->> pom-licenses + (filter map?) ; Get rid of non-tag content (whitespace etc.) + (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) + (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) + url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] + (when (or name url) + {:name name :url url})))) + set) + licenses (into {} (map #(lcimd/prepend-source (licenses-from-pair %) filepath) name-uri-pairs))] + (lcim/manual-fixes licenses)) ; License block doesn't exist, so attempt to lookup the parent pom and get it from there (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) @@ -115,16 +141,33 @@ :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) - (pom->expressions (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + (pom->expressions-info (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep -(defmethod pom->expressions :default - ([pom] (pom->expressions pom (lcu/filename pom))) - ([pom fname] +(defmethod pom->expressions-info :default + ([pom] (pom->expressions-info pom (lcu/filepath pom))) + ([pom filepath] (when pom (with-open [pom-is (io/input-stream pom)] - (if-let [expressions (pom->expressions pom-is fname)] + (if-let [expressions (pom->expressions-info pom-is filepath)] expressions - (log/info (str "'" pom "'") "contains no license information")))))) + (log/info (str "'" filepath "'") "contains no license information")))))) + +(defn pom->expressions + "Attempt to detect the license expression(s) (a set) reported in a pom.xml + file. pom may be a java.io.InputStream, or anything that can be opened by + clojure.java.io/input-stream. + + Note that if an InputStream is provided: + 1. it's the caller's responsibility to open and close it + 2. a filepath *must* be provided along with the stream (the 2nd arg) + + The result has metadata attached that describes how the identifiers in the + expression(s) were determined." + ([pom] (pom->expressions pom (lcu/filepath pom))) + ([pom filepath] + (some-> (pom->expressions-info pom filepath) + keys + set))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/test/lice_comb/impl/metadata_test.clj b/test/lice_comb/impl/metadata_test.clj index 8d69bec..cc95a43 100644 --- a/test/lice_comb/impl/metadata_test.clj +++ b/test/lice_comb/impl/metadata_test.clj @@ -19,60 +19,70 @@ (ns lice-comb.impl.metadata-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.impl.metadata :refer [prepend-source union]])) + [lice-comb.impl.metadata :refer [prepend-source merge-maps]])) (use-fixtures :once fixture) (def md1 { - "Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} -}) + "Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")})}) (def md2 { - "Apache-2.0" {:type :concluded :confidence :low :strategy :regex-matching :source '("Apache style license")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} - }) + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})}) + +(def md3 { + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})}) + +(def mds (list md1 md2 md3)) (deftest prepend-source-tests (testing "nil/empty/blank" - (is (nil? (prepend-source nil nil))) - (is (nil? (prepend-source nil ""))) - (is (= #{} (prepend-source #{} nil))) - (is (nil? (meta (prepend-source #{} nil)))) - (is (= #{} (prepend-source #{} ""))) - (is (nil? (meta (prepend-source #{} ""))))) + (is (nil? (prepend-source nil nil))) + (is (nil? (prepend-source nil ""))) + (is (= {} (prepend-source {} nil))) + (is (= {} (prepend-source {} "")))) (testing "non-nil metadata that isn't lice-comb specific" - (is (= {} (meta (prepend-source (with-meta #{:a} {}) "foo")))) - (is (= {:foo "foo"} (meta (prepend-source (with-meta #{:a} {:foo "foo"}) "bar")))) + (is (= {:a "a"} (prepend-source {:a "a"} "foo")))) (testing "non-nil metadata that is lice-comb specific" - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("pom.xml" "Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("pom.xml" "MIT")}} - (meta (prepend-source (with-meta #{:a} md1) "pom.xml")))) - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("library.jar" "pom.xml" "Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("library.jar" "pom.xml" "MIT")}} - (meta (prepend-source (prepend-source (with-meta #{:a} md1) "pom.xml") "library.jar"))))))) - -(deftest union-tests - (testing "zero arg" - (is (= #{} (union)))) - (testing "one arg" - (is (nil? (union nil))) - (is (= #{} (union #{}))) - (is (= #{:foo} (union #{:foo})))) - (testing "two arg" - (is (= #{:foo :bar} (union #{:foo} #{:bar})))) - (testing "multi-arg" - (is (= #{:a :b :c} (union #{:a} #{:b} #{:c}))) - (is (= #{:a :b :c :d} (union #{:a} #{:b} #{:c} #{:d}))) - (is (= #{:a :b :c :d :e :f :g :h :i :j :k :l :m :n :o} (union #{:a :b} #{:c :d :e} #{:f :g :h :i} #{:j :k :l :m :n :o})))) - (testing "metadata" - (is (= {:foo "foo"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}))))) - (is (= {:foo "foo" :bar "bar"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}))))) - (is (= {:foo "foo" :bar "bar" :blah "blah"} (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:bar "bar"}) (with-meta #{:g :h :i} {:blah "blah"}))))) - (is (thrown? clojure.lang.ExceptionInfo (meta (union (with-meta #{:a :b :c} {:foo "foo"}) (with-meta #{:d :e :f} {:foo "bar"}))))) ; Non lice-comb conflicting key in metadata maps = exception - (is (= {"Apache-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Apache Software Licence v2.0")} - "MIT" {:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source '("MIT")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")} - } - (meta (union (with-meta #{:a :b :c} md1) (with-meta #{:d :e :f} md2))))))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "MIT")})} + (prepend-source md1 "pom.xml"))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("library.jar" "pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("library.jar" "pom.xml" "MIT")})} + (prepend-source (prepend-source md1 "pom.xml") "library.jar"))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("pom.xml" "apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "GNU General Public License 3.0 or later")})} + (prepend-source md3 "pom.xml"))))) +(deftest merge-maps-tests + (testing "nil/empty" + (is (nil? (merge-maps))) + (is (nil? (merge-maps nil)))) + (testing "identity" + (is (= md1 (merge-maps md1)))) + (testing "merges" + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})} + (merge-maps md1 md2))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (merge-maps md2 md3))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (apply merge-maps mds))))) diff --git a/test/lice_comb/impl/regex_matching_test.clj b/test/lice_comb/impl/regex_matching_test.clj index 44116f9..4bffced 100644 --- a/test/lice_comb/impl/regex_matching_test.clj +++ b/test/lice_comb/impl/regex_matching_test.clj @@ -22,7 +22,7 @@ [rencg.api :as rencg] [lice-comb.impl.utils :as lcu] [lice-comb.test-boilerplate :refer [fixture testing-with-data]] - [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re match-regexes]])) + [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re matches]])) (use-fixtures :once fixture) @@ -234,5 +234,5 @@ (is (every? not-nil? (map (partial test-regex gnu-re) gnu-licenses))))) (deftest match-regexes-tests - (testing-with-data "GNU Family Regexes - correct identifier results" match-regexes gnu-licenses-and-ids) - (testing-with-data "CC Family Regexes - correct identifier results" match-regexes cc-by-licenses-and-ids)) + (testing-with-data "GNU Family Regexes - correct identifier results" #(mapcat keys (matches %)) gnu-licenses-and-ids) + (testing-with-data "CC Family Regexes - correct identifier results" #(mapcat keys (matches %)) cc-by-licenses-and-ids)) diff --git a/test/lice_comb/utils_test.clj b/test/lice_comb/impl/utils_test.clj similarity index 50% rename from test/lice_comb/utils_test.clj rename to test/lice_comb/impl/utils_test.clj index 67511a3..29927c2 100644 --- a/test/lice_comb/utils_test.clj +++ b/test/lice_comb/impl/utils_test.clj @@ -16,10 +16,11 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.utils-test +(ns lice-comb.impl.utils-test (:require [clojure.test :refer [deftest testing is use-fixtures]] + [clojure.java.io :as io] [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.impl.utils :refer [simplify-uri]])) + [lice-comb.impl.utils :refer [simplify-uri filepath filename]])) (use-fixtures :once fixture) @@ -58,3 +59,60 @@ (is (= "http://gnu.org/software/classpath/license" (simplify-uri "https://www.gnu.org/software/classpath/license.html"))) (is (= "http://raw.githubusercontent.com/pmonks/lice-comb/main/license" (simplify-uri "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) (is (= "http://github.com/pmonks/lice-comb/blob/main/license" (simplify-uri "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) + +(deftest filepath-tests + (testing "Nil, empty or blank values" + (is (nil? (filepath nil))) + (is (= "" (filepath ""))) + (is (= "" (filepath " "))) + (is (= "" (filepath "\n"))) + (is (= "" (filepath "\t")))) + (testing "Files" + (is (= "/file.txt" (filepath (io/file "/file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filepath "file.txt"))) + (is (= "/some/path/or/other/file.txt" (filepath "/some/path/or/other/file.txt"))) + (is (= "https://www.google.com/" (filepath "https://www.google.com/"))) + (is (= "https://www.google.com/" (filepath " https://www.google.com/ "))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filepath (java.util.zip.ZipEntry. "file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "https://www.google.com/" (filepath (io/as-url "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "https://www.google.com/" (filepath (java.net.URI. "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filepath (io/input-stream "deps.edn")))))) + +(deftest filename-tests + (testing "Nil, empty or blank values" + (is (nil? (filename nil))) + (is (= "" (filename ""))) + (is (= "" (filename " "))) + (is (= "" (filename "\n"))) + (is (= "" (filename "\t")))) + (testing "Files" + (is (= "file.txt" (filename (io/file "file.txt")))) + (is (= "file.txt" (filename (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filename "file.txt"))) + (is (= "file.txt" (filename "/some/path/or/other/file.txt"))) + (is (= "" (filename "https://www.google.com"))) + (is (= "" (filename "https://www.google.com/"))) + (is (= "deps.edn" (filename "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "file.txt")))) + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "" (filename (io/as-url "https://www.google.com/")))) + (is (= "deps.edn" (filename (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "" (filename (java.net.URI. "https://www.google.com/")))) + (is (= "deps.edn" (filename (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filename (io/input-stream "deps.edn")))))) + diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 2da63c3..561fd4b 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -18,9 +18,9 @@ (ns lice-comb.matching-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.test-boilerplate :refer [fixture valid= valid-info=]] [lice-comb.impl.spdx :as lcis] - [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions uri->ids]] + [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions name->expressions-info uri->ids]] [spdx.licenses :as sl] [spdx.exceptions :as se])) @@ -71,7 +71,7 @@ (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Commercial"))) (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved")))) - (testing "Expressions that are valid SPDX" + (testing "SPDX expressions" (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) @@ -704,31 +704,56 @@ (is (unlisted-only? (name->expressions "wisdragon"))) (is (unlisted-only? (name->expressions "wiseloong"))))) -;####TEST!!!! -(comment +(deftest name->expressions-info-tests + (testing "Nil, empty or blank" + (is (nil? (name->expressions-info nil))) + (is (nil? (name->expressions-info ""))) + (is (nil? (name->expressions-info " "))) + (is (nil? (name->expressions-info "\n"))) + (is (nil? (name->expressions-info "\t")))) + (testing "SPDX license ids" + (is (valid-info= {"AGPL-3.0-only" {:type :declared :strategy :spdx-expression :source '("AGPL-3.0")}} + (name->expressions-info "AGPL-3.0"))) + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0-with-classpath-exception")}} + (name->expressions-info "GPL-2.0-with-classpath-exception")))) + (testing "SPDX expressions" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) + (testing "Single expressions that are not valid SPDX" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) + (testing "Multiple expressions" + (is (valid-info= {"MIT" {:type :declared :strategy :spdx-listed-identifier-exact-match :source '("MIT")} + "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-name-matching :source '("BSD")}} + (name->expressions-info "MIT / BSD")))) + (testing "All names seen in POMs on Clojars as of 2023-07-13" + (is (valid-info= {"BSD-3-Clause" {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source '("https://opensource.org/licenses/BSD-3-Clause")}} + (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) + (is (valid-info= {"EPL-2.0" {:type :concluded :confidence :medium :strategy :regex-name-matching :source '("Eclipse Public License - v 2.0")}} + (name->expressions-info "Eclipse Public License - v 2.0"))))) + (deftest uri->ids-tests (testing "Nil, empty or blank uri" - (is (nil? (uri->ids nil))) - (is (nil? (uri->ids ""))) - (is (nil? (uri->ids " "))) - (is (nil? (uri->ids "\n"))) - (is (nil? (uri->ids "\t")))) + (is (nil? (uri->ids nil))) + (is (nil? (uri->ids ""))) + (is (nil? (uri->ids " "))) + (is (nil? (uri->ids "\n"))) + (is (nil? (uri->ids "\t")))) (testing "URIs that appear verbatim in the SPDX license or exception lists" - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace - (is (= #{"AGPL-3.0-or-later" "AGPL-3.0-only" "AGPL-3.0"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) - (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) (testing "URI variations that should be handled identically" - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) (testing "URIs that appear in licensey things, but aren't in the SPDX license list as shown" - (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) (testing "URIs that aren't in the SPDX license list, but do match via retrieval and full text matching" - (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) - (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) - (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) -) \ No newline at end of file + (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 3afaef7..c4496b5 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -58,26 +58,48 @@ (defn valid= "Returns true if all of the following are true: - * s2 has metadata - * s2 is a set - * s2 is equal to s1 - * every entry in s2 is a valid SPDX license expression + * actual is a set + * actual equals expected + * everything in actual is a valid SPDX license expression Also prints (to stdout) which of the above is not true, in the event that any of them are not true." - [s1 s2] - (let [metadata? (or (nil? s2) (not-nil? (meta s2))) - is-a-set? (or (nil? s2) (set? s2)) - is-equal? (= s1 s2) - all-valid-expressions? (and (set? s2) (every? true? (map sexp/valid? s2))) - result (and metadata? - is-a-set? + [expected actual] + (let [is-a-set? (or (nil? actual) (set? actual)) + is-equal? (= (set expected) actual) + all-valid-expressions? (and is-a-set? (every? true? (map sexp/valid? actual))) + result (and is-a-set? is-equal? all-valid-expressions?)] ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) - (when-not metadata? (print "\n* Missing metadata")) (when-not is-a-set? (print "\n* Not a set")) - (when-not is-equal? (print "\n* Not equal to expected value")) + (when-not is-equal? (print "\n* Not equal to expected")) (when-not all-valid-expressions? (print "\n* Not all valid SPDX expressions")) result)) + +(defn valid-info= + "Returns true if all of the following are true: + * actual is a map + * the keys in actual are identical to expected-keys + * all vals in actual are maps + * every key in actual is a valid SPDX license expression + + Also prints (to stdout) which of the above is not true, in the event that any + of them are not true." + [expected actual] + (let [is-a-map? (or (nil? actual) (map? actual)) + is-equal? (= expected actual) + values-are-maps? (or (nil? actual) (every? map? (vals actual))) + all-valid-expressions? (and is-a-map? (every? true? (map sexp/valid? (keys actual)))) + result (and values-are-maps? + is-a-map? + is-equal? + all-valid-expressions?)] + ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not is-a-map? (print "\n* Not a map")) + (when-not is-equal? (print "\n* Not equal to expected")) + (when-not values-are-maps? (print "\n* Not all values are maps")) + (when-not all-valid-expressions? (print "\n* Not all keys are valid SPDX expressions")) + result)) From 5fdbe78baaa4d7bd2b1d54d3320a37ef29d9b653 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Mon, 4 Sep 2023 12:42:51 -0700 Subject: [PATCH 21/34] :construction: Ongoing work on issue #3 --- src/lice_comb/files.clj | 6 ++++-- test/lice_comb/matching_test.clj | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index c8c4fa3..3097fa3 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -40,7 +40,8 @@ (throw (java.nio.file.NotDirectoryException. (str dir)))) (throw (java.io.FileNotFoundException. (str dir))))))) -(defn- probable-license-file? +; This is public because it's used in the tests +(defn probable-license-file? "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." [f] @@ -50,7 +51,8 @@ (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) -(defn- probable-license-files +; This is public because it's used in the tests +(defn probable-license-files "Returns all probable license files in the given directory, recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 561fd4b..da89f55 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -724,12 +724,12 @@ (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) (testing "Multiple expressions" (is (valid-info= {"MIT" {:type :declared :strategy :spdx-listed-identifier-exact-match :source '("MIT")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-name-matching :source '("BSD")}} + "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")}} (name->expressions-info "MIT / BSD")))) (testing "All names seen in POMs on Clojars as of 2023-07-13" (is (valid-info= {"BSD-3-Clause" {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source '("https://opensource.org/licenses/BSD-3-Clause")}} (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) - (is (valid-info= {"EPL-2.0" {:type :concluded :confidence :medium :strategy :regex-name-matching :source '("Eclipse Public License - v 2.0")}} + (is (valid-info= {"EPL-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Eclipse Public License - v 2.0")}} (name->expressions-info "Eclipse Public License - v 2.0"))))) (deftest uri->ids-tests From 498eb5cbc332a4d761ae3626b318953bd6550a7d Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 5 Sep 2023 00:07:24 -0700 Subject: [PATCH 22/34] :construction: Ongoing work on issue #3 --- deps.edn | 1 + src/lice_comb/deps.clj | 48 +++++++---- src/lice_comb/files.clj | 74 +++++++++-------- src/lice_comb/impl/expressions_info.clj | 45 ++++++++++ src/lice_comb/impl/http.clj | 6 +- src/lice_comb/impl/matching.clj | 26 +++--- src/lice_comb/impl/regex_matching.clj | 29 +++---- src/lice_comb/impl/spdx.clj | 16 ++-- src/lice_comb/lein.clj | 72 ++++++++++++++++ src/lice_comb/matching.clj | 4 +- src/lice_comb/maven.clj | 82 ++++++++++--------- .../{impl/metadata.clj => utils.clj} | 60 +++++--------- test/lice_comb/deps_test.clj | 2 +- test/lice_comb/files_test.clj | 17 ++-- ...ata_test.clj => expressions_info_test.clj} | 8 +- test/lice_comb/matching_test.clj | 21 +++-- test/lice_comb/test_boilerplate.clj | 30 +++---- 17 files changed, 338 insertions(+), 203 deletions(-) create mode 100644 src/lice_comb/impl/expressions_info.clj create mode 100644 src/lice_comb/lein.clj rename src/lice_comb/{impl/metadata.clj => utils.clj} (60%) rename test/lice_comb/impl/{metadata_test.clj => expressions_info_test.clj} (95%) diff --git a/deps.edn b/deps.edn index 81252fd..7bc39ca 100644 --- a/deps.edn +++ b/deps.edn @@ -25,6 +25,7 @@ tolitius/xml-in {:mvn/version "0.1.1"} hato/hato {:mvn/version "0.9.0"} dev.weavejester/medley {:mvn/version "1.7.0"} + dom-top/dom-top {:mvn/version "1.0.8"} miikka/clj-base62 {:mvn/version "0.1.1"} com.github.pmonks/clj-spdx {:mvn/version "1.0.91"} com.github.pmonks/rencg {:mvn/version "1.0.34"}} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index b91600f..72198ee 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -18,17 +18,18 @@ (ns lice-comb.deps "Functionality related to finding and determining license information from - deps in tools.deps lib-map format." - (:require [clojure.string :as s] - [spdx.licenses :as sl] - [lice-comb.maven :as lcmvn] - [lice-comb.files :as lcf] - [lice-comb.impl.data :as lcd] - [lice-comb.impl.metadata :as lcimd])) + dependencies in tools.deps lib-map format." + (:require [clojure.string :as s] + [dom-top.core :as dom] + [spdx.licenses :as sl] + [lice-comb.maven :as lcmvn] + [lice-comb.files :as lcf] + [lice-comb.impl.data :as lcd] + [lice-comb.impl.expressions-info :as lciei])) ;####TODO: FIGURE OUT HOW TO HANDLE METADATA FOR OVERRIDES / FALLBACKS!!!! -(def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) -(def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) +;(def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) +;(def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) ;(defn- check-overrides ; "Checks if an override should be used for the given dep" @@ -38,6 +39,7 @@ ; (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version ;(defn- check-fallbacks +;####TODO: UPDATE FOR license-info MAP RATHER THAN ID SET ; "Checks if a fallback should be used for the given dep, given the set of ; detected ids" ; [ga ids] @@ -53,6 +55,19 @@ (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) +(defmulti ^:private dep->string + "Converts a dep to a string." + {:arglists '([[ga info]])} + (fn [[_ info]] (:deps/manifest info))) + +(defmethod ^:private dep->string :mvn + [[ga info]] + (str ga "@" (:mvn/version info))) + +(defmethod ^:private dep->string :deps + [[ga info]] + (str ga "@" (:git/sha info) (when-let [tag (:git/tag info)] (str "/" tag)))) + (defmulti dep->expressions-info "Attempt to detect the SPDX license expression(s) (a map) in a tools.deps style dep (a MapEntry or two-element sequence of @@ -75,8 +90,9 @@ expressions ;(check-fallbacks ga (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] expressions - (into {} (pmap #(lcimd/prepend-source (lcf/zip->expressions-info %) dep) (:paths info))))];)] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too - expressions))));) + (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))));) ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) + ] + (lciei/prepend-source expressions (dep->string dep))))));) (defmethod dep->expressions-info :deps [dep] @@ -86,7 +102,7 @@ ; (if-let [override (check-overrides ga version)] ; override ; (check-fallbacks ga - (lcf/dir->expressions-info (:deps/root info)))));)) + (lciei/prepend-source (lcf/dir->expressions-info (:deps/root info)) (dep->string dep)))));)) (defmethod dep->expressions-info nil [_]) @@ -114,7 +130,7 @@ `:lice-comb/license-info`)" [deps] (when deps - (into {} (pmap #(let [[k v] %] [k (assoc v :lice-comb/license-info (dep->expressions-info [k v]))]) deps)))) + (into {} (dom/real-pmap #(let [[k v] %] [k (assoc v :lice-comb/license-info (dep->expressions-info [k v]))]) deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent @@ -122,6 +138,8 @@ this fn, as initialisation will occur implicitly anyway; it is provided to allow explicit control of the cost of initialisation to callers who need it." [] - @overrides-d - @fallbacks-d + (lcmvn/init!) + (lcf/init!) +; @overrides-d +; @fallbacks-d nil) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 3097fa3..2c8b99d 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -19,26 +19,33 @@ (ns lice-comb.files "Functionality related to finding and determining license information from files and directories." - (:require [clojure.string :as s] - [clojure.java.io :as io] - [lice-comb.matching :as lcmtch] - [lice-comb.maven :as lcmvn] - [lice-comb.impl.metadata :as lcimd] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.java.io :as io] + [lice-comb.matching :as lcmtch] + [lice-comb.maven :as lcmvn] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.utils :as lciu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... (defn- ensure-readable-dir "Ensures dir (a String or File) refers to a readable directory, and returns it - as a File." + as a File. + + Throws: + * java.io.FileNotFoundException if dir doesn't exist + * java.nio.file.AccessDeniedException if dir is not readable + * java.nio.file.NotDirectoryException if dir is not a directory" [dir] - (when dir - (let [dir (io/file dir)] - (if (.exists dir) - (if (.isDirectory dir) - dir - (throw (java.nio.file.NotDirectoryException. (str dir)))) - (throw (java.io.FileNotFoundException. (str dir))))))) + (let [result (io/file dir)] + (if (and result + (.exists result)) + (if (.canRead result) + (if (.isDirectory result) + result + (throw (java.nio.file.NotDirectoryException. (str result)))) + (throw (java.nio.file.AccessDeniedException. (str result)))) + (throw (java.io.FileNotFoundException. (str result)))))) ; This is public because it's used in the tests (defn probable-license-file? @@ -46,7 +53,7 @@ probable license file, false otherwise." [f] (and (not (nil? f)) - (let [fname (s/lower-case (lcu/filename f))] + (let [fname (s/lower-case (lciu/filename f))] (and (not (s/blank? fname)) (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) @@ -57,9 +64,10 @@ set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." [dir] - (when-let [dir (ensure-readable-dir dir)] - (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq dir))) - set))) + (when dir + (when-let [d (ensure-readable-dir dir)] + (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq d))) + set)))) (defn file->expressions-info "Attempts to determine the SPDX license expression(s) (a map) from the given @@ -70,15 +78,15 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([f] (file->expressions-info f (lcu/filepath f))) + ([f] (file->expressions-info f (lciu/filepath f))) ([f filepath] - (when (and f (not (s/blank? filepath))) - (let [fname (lcu/filename filepath) + (when f + (let [fname (lciu/filename filepath) lfname (s/lower-case fname)] - (lcimd/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) + (lciei/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) (s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname) - (instance? java.io.InputStream f) (lcmtch/text->ids f) - :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids is)))) ; Default is to assume it's a plain text file containing license text(s) + (instance? java.io.InputStream f) (lcmtch/text->ids-info f) + :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is)))) ; Default is to assume it's a plain text file containing license text(s) filepath))))) (defn file->expressions @@ -90,7 +98,7 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([f] (file->expressions f (lcu/filepath f))) + ([f] (file->expressions f (lciu/filepath f))) ([f filepath] (some-> (file->expressions-info f filepath) keys @@ -114,10 +122,10 @@ entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (merge result (lcimd/prepend-source (file->expressions-info zip-is (lcu/filename entry)) (lcu/filepath zip-file))) + (recur (merge result (file->expressions-info zip-is (lciu/filename entry))) (.getNextEntry zip-is)) (recur result (.getNextEntry zip-is))) - (when-not (empty? result) result))))))) + (when-not (empty? result) (lciei/prepend-source result (lciu/filepath zip-file))))))))) (defn zip->expressions "Attempt to detect the SPDX license expression(s) (a set) in a ZIP file. zip may be a @@ -159,11 +167,13 @@ ([dir] (dir->expressions-info dir nil)) ([dir {:keys [include-zips?] :or {include-zips? false}}] (when dir - (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] - (if include-zips? - (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] - (merge file-expressions zip-expressions)) - file-expressions))))) + (lciei/prepend-source + (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] + (if include-zips? + (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (merge file-expressions zip-expressions)) + file-expressions)) + (lciu/filepath dir))))) (defn dir->expressions "Attempt to detect the SPDX license expression(s) (a map) in a directory. dir diff --git a/src/lice_comb/impl/expressions_info.clj b/src/lice_comb/impl/expressions_info.clj new file mode 100644 index 0000000..bb92df6 --- /dev/null +++ b/src/lice_comb/impl/expressions_info.clj @@ -0,0 +1,45 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.expressions-info + "lice-comb expressions-info map helper functionality. Note: this namespace is + not part of the public API of lice-comb and may change without notice." + (:require [clojure.string :as s])) + +(defn prepend-source + "Prepends the given source s (a String) onto all metadata sub-maps in m (a + lice-comb expressions-info map)." + [m s] + (if (or (empty? m) (s/blank? s)) + m + (into {} (map #(if (sequential? (val %)) + (let [id (key %) + metadata-list (val %)] + (hash-map id (map (fn [x] (assoc x :source (conj (seq (:source x)) s))) metadata-list))) + %) + m)))) + +(defn merge-maps + "Merges any number of lice-comb expressions-info maps, by concatenating and + de-duping values for the same key (expression)." + [& maps] + (let [maps (filter identity maps)] + (when-not (empty? maps) + (let [grouped-maps (group-by first (mapcat identity maps))] + (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) + (keys grouped-maps))))))) diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj index 9d27bfc..935c109 100644 --- a/src/lice_comb/impl/http.clj +++ b/src/lice_comb/impl/http.clj @@ -22,7 +22,7 @@ (:require [clojure.string :as s] [clojure.java.io :as io] [hato.client :as hc] - [lice-comb.impl.utils :as lcu])) + [lice-comb.impl.utils :as lciu])) (def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 :redirect-policy :always @@ -34,7 +34,7 @@ Note: does not throw - returns false on errors." [uri] (boolean - (when (lcu/valid-http-uri? (str uri)) + (when (lciu/valid-http-uri? (str uri)) (try (when-let [response (hc/head (str uri) {:http-client @http-client-d @@ -63,7 +63,7 @@ unable to do so (including for error conditions - there is no way to disambiguate errors from non-text content, for example)." [uri] - (when (lcu/valid-http-uri? uri) + (when (lciu/valid-http-uri? uri) (try (when-let [response (hc/get (cdn-uri uri) {:http-client @http-client-d diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 21209e4..773f779 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -19,17 +19,17 @@ (ns lice-comb.impl.matching "Matching helper functionality. Note: this namespace is not part of the public API of lice-comb and may change without notice." - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [spdx.exceptions :as se] - [spdx.matching :as sm] - [lice-comb.impl.spdx :as lcis] - [lice-comb.impl.regex-matching :as lcirm] - [lice-comb.impl.metadata :as lcimd] - [lice-comb.impl.3rd-party :as lc3] - [lice-comb.impl.http :as lcihttp] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.set :as set] + [clojure.java.io :as io] + [spdx.exceptions :as se] + [spdx.matching :as sm] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.regex-matching :as lcirm] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.3rd-party :as lc3] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.utils :as lciu])) (def ^:private direct-replacements-map { #{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"} @@ -169,14 +169,14 @@ [uri] (when-not (s/blank? uri) (manual-fixes - (let [suri (lcu/simplify-uri uri)] + (let [suri (lciu/simplify-uri uri)] ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) (if-let [ids (get @lcis/index-uri-to-id-d suri)] (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it (when-let [license-text (lcihttp/get-text uri)] (when-let [ids (text->ids license-text)] - (lcimd/prepend-source ids (str "Text retrieved from " uri))))))))) + (lciei/prepend-source ids (str "content from " uri))))))))) (defn- string->ids-info "Converts the given String into a sequence of singleton maps (NOT A SINGLE diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index 7d3fab4..b6ac781 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -21,9 +21,10 @@ part of the public API of lice-comb and may change without notice." (:require [clojure.string :as s] [medley.core :as med] + [dom-top.core :as dom] [rencg.api :as rencg] [lice-comb.impl.spdx :as lcis] - [lice-comb.impl.utils :as lcu])) + [lice-comb.impl.utils :as lciu])) (defn- get-rencgs "Get a value for an re-ncg, potentially looking at multiple ncgs in order @@ -92,7 +93,7 @@ [m] (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) - preferred-clause-count (case [(lcu/is-digits? clause-count1) (lcu/is-digits? clause-count2)] + preferred-clause-count (case [(lciu/is-digits? clause-count1) (lciu/is-digits? clause-count2)] [true true] clause-count1 [true false] clause-count1 [false true] clause-count2 @@ -193,7 +194,7 @@ for s." [s] {:id s - :regex (re-pattern (str "(?i)\\b" (lcu/escape-re s) "\\b")) + :regex (re-pattern (str "(?i)\\b" (lciu/escape-re s) "\\b")) :fn (constantly [s :medium])}) ; The regex for the GNU family is a nightmare, so we build it up (and test it) in pieces @@ -202,16 +203,16 @@ (def gpl-re #"(?GNU(?!\s+Classpath)|(?\d+([\._]\d+)?)?") (def only-or-later-re #"[\s-]*((?only)|(\(?or(\s+\(?at\s+your\s+(option|discretion)\)?)?(\s+any)?)?([\s-]*(?later|lator|newer|\+)))?") -(def gnu-re (lcu/re-concat "(?x)(?i)\\b(\n# Alternative 1: AGPL\n" - agpl-re - "\n# Alternative 2: LGPL\n|" - lgpl-re - "\n# Alternative 3: GPL\n|" - gpl-re - "\n)\n# Version\n" - version-re - "\n# Only/or-Later suffix\n" - only-or-later-re)) +(def gnu-re (lciu/re-concat "(?x)(?i)\\b(\n# Alternative 1: AGPL\n" + agpl-re + "\n# Alternative 2: LGPL\n|" + lgpl-re + "\n# Alternative 3: GPL\n|" + gpl-re + "\n)\n# Version\n" + version-re + "\n# Only/or-Later suffix\n" + only-or-later-re)) ; Regexes used for license name matching, along with functions for constructing an SPDX id and confidence metric from them (def ^:private license-name-matching-d (delay @@ -379,7 +380,7 @@ Results are in the order in which they appear in the string, and the function returns nil if there were no matches." [s] - (when-let [matches (seq (filter identity (map (partial match s) @license-name-matching-d)))] + (when-let [matches (seq (filter identity (dom/real-pmap (partial match s) @license-name-matching-d)))] (some->> matches (med/distinct-by :id) ;####TODO: THINK ABOUT MERGING INSTEAD OF DROPPING (sort-by :start) diff --git a/src/lice_comb/impl/spdx.clj b/src/lice_comb/impl/spdx.clj index c46dfb6..1407f95 100644 --- a/src/lice_comb/impl/spdx.clj +++ b/src/lice_comb/impl/spdx.clj @@ -23,7 +23,7 @@ [spdx.licenses :as sl] [spdx.exceptions :as se] [spdx.expressions :as sexp] - [lice-comb.impl.utils :as lcu])) + [lice-comb.impl.utils :as lciu])) ; The subset of SPDX license identifiers that we use; specifically excludes the superceded deprecated GPL family identifiers (def license-ids-d @@ -54,18 +54,18 @@ [list-entry] [(s/lower-case (s/trim (:name list-entry))) (:id list-entry)]) -(def index-name-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) - (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) +(def index-name-to-id-d (delay (merge (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) + (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) (defn- urls-to-id-tuples "Extracts all urls for a given list (license or exception) entry." [list-entry] (let [id (:id list-entry) - simplified-uris (map lcu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] + simplified-uris (map lciu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] (map #(vec [% id]) simplified-uris))) -(def index-uri-to-id-d (delay (merge (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) - (lcu/mapfonv #(lcu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) +(def index-uri-to-id-d (delay (merge (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) + (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" @@ -100,7 +100,7 @@ unlisted license, with the given name appended as Base62 (since clj-spdx identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." [name] - (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lcu/base62-encode name))))) + (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lciu/base62-encode name))))) (defn unlisted->name "Get the original name of the given unlisted license. Returns nil if id is nil @@ -109,7 +109,7 @@ (when (unlisted? id) (str "Unlisted (" (if (> (count id) (count unlisted-license-ref-prefix)) - (lcu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) + (lciu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) "-original name not available-") ")"))) diff --git a/src/lice_comb/lein.clj b/src/lice_comb/lein.clj new file mode 100644 index 0000000..d391ba6 --- /dev/null +++ b/src/lice_comb/lein.clj @@ -0,0 +1,72 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.lein + "Functionality related to finding and determining license information from + dependencies in Leiningen's dependency vector format." + (:require [dom-top.core :as dom] + [lice-comb.deps :as lcd] + [lice-comb.impl.expressions-info :as lciei])) + +(defn- lein-dep->toolsdeps-dep + "Converts a leiningen style dependency vector into a (partial) tools.deps style + dependency map. This is partial in that just enough of the tools.deps style + map is constructed for lice-comb.deps to function." + [[ga version :as dep]] + (when dep + (hash-map ga {:mvn/version version :deps/manifest :mvn}))) ;####TODO: Synthesise :paths key (for paths to JAR files) + +(defn dep->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a Leiningen + style dep (a vector of the form `[groupId/artifactId \"version\"]`)." + [dep] + (when-let [toolsdep-dep (lein-dep->toolsdeps-dep dep)] + (lciei/prepend-source (lcd/dep->expressions-info toolsdep-dep) (pr-str dep)))) + +(defn dep->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a Leiningen + style dep (a vector of the form `[groupId/artifactId \"version\"]`)." + [dep] + (some-> (dep->expressions-info dep) + keys + set)) + +(defn deps->expressions-info + "Attempt to detect all of the SPDX license expression(s) in a Leiningen style + dependency vector. The result is a map, where each entry in the map has a key + that is the Leiningen dep, and the value is the lice-comb expressions-info map + for that dep." + [deps] + (into {} (dom/real-pmap #(vec [% (dep->expressions-info %)]) deps))) + +(defn deps->expressions-info + "Attempt to detect all of the SPDX license expression(s) in a Leiningen style + dependency vector. The result is a map, where each entry in the map has a key + that is the Leiningen dep, and the value is the set of SPDX license + expression(s) for that dep." + [deps] + (into {} (dom/real-pmap #(vec [% (dep->expressions %)]) deps))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcd/init!) + nil) diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index 7d12888..c0d614c 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -25,7 +25,7 @@ [spdx.expressions :as sexp] [lice-comb.impl.spdx :as lcis] [lice-comb.impl.matching :as lcim] - [lice-comb.impl.utils :as lcu])) + [lice-comb.impl.utils :as lciu])) (defn public-domain? "Is the given id lice-comb's custom 'public domain' LicenseRef?" @@ -160,7 +160,7 @@ (if-let [normalised-expression (sexp/normalise name)] {normalised-expression (list {:type :declared :strategy :spdx-expression :source (list name)})} ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) - (if (lcu/valid-http-uri? name) + (if (lciu/valid-http-uri? name) (if-let [ids (uri->ids-info name)] ids {(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index d452023..e6bff68 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -19,17 +19,17 @@ (ns lice-comb.maven "Functionality related to finding and determining license information from Maven POMs." - (:require [clojure.string :as s] - [clojure.java.io :as io] - [clojure.data.xml :as xml] - [clojure.java.shell :as sh] - [clojure.tools.logging :as log] - [xml-in.core :as xi] - [lice-comb.matching :as lcmtch] - [lice-comb.impl.matching :as lcim] - [lice-comb.impl.metadata :as lcimd] - [lice-comb.impl.http :as lcihttp] - [lice-comb.impl.utils :as lcu])) + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.data.xml :as xml] + [clojure.java.shell :as sh] + [clojure.tools.logging :as log] + [xml-in.core :as xi] + [lice-comb.matching :as lcmtch] + [lice-comb.impl.matching :as lcim] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.utils :as lciu])) (def ^:private local-maven-repo-d (delay @@ -68,10 +68,10 @@ pair. Returns nil if no matches were found." [{:keys [name url]}] ; 1. Look in the name field(s) - (if-let [name-expressions (lcimd/prepend-source (lcmtch/name->expressions-info name) " tag")] + (if-let [name-expressions (lciei/prepend-source (lcmtch/name->expressions-info name) "")] name-expressions ; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate) - (when-let [uri-ids (lcimd/prepend-source (lcmtch/uri->ids-info url) " tag")] + (when-let [uri-ids (lciei/prepend-source (lcmtch/uri->ids-info url) "")] uri-ids))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") @@ -117,34 +117,36 @@ ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags (defmethod pom->expressions-info java.io.InputStream [pom-is filepath] - (let [pom-xml (xml/parse pom-is)] - (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] - ; block exists - process it - (let [name-uri-pairs (some->> pom-licenses - (filter map?) ; Get rid of non-tag content (whitespace etc.) - (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) - (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) - url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] - (when (or name url) - {:name name :url url})))) - set) - licenses (into {} (map #(lcimd/prepend-source (licenses-from-pair %) filepath) name-uri-pairs))] - (lcim/manual-fixes licenses)) - ; License block doesn't exist, so attempt to lookup the parent pom and get it from there - (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) - parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) - parent-gav (merge {} - (when parent {:group-id (lcu/strim (first (xi/find-first parent [::pom/groupId]))) - :artifact-id (lcu/strim (first (xi/find-first parent [::pom/artifactId]))) - :version (lcu/strim (first (xi/find-first parent [::pom/version])))}) - (when parent-no-ns {:group-id (lcu/strim (first (xi/find-first parent-no-ns [:groupId]))) - :artifact-id (lcu/strim (first (xi/find-first parent-no-ns [:artifactId]))) - :version (lcu/strim (first (xi/find-first parent-no-ns [:version])))}))] - (when-not (empty? parent-gav) - (pom->expressions-info (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + (lciei/prepend-source + (let [pom-xml (xml/parse pom-is)] + (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] + ; block exists - process it + (let [name-uri-pairs (some->> pom-licenses + (filter map?) ; Get rid of non-tag content (whitespace etc.) + (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) + (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) + url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] + (when (or name url) + {:name name :url url})))) + set) + licenses (into {} (map licenses-from-pair name-uri-pairs))] + (lcim/manual-fixes licenses)) + ; License block doesn't exist, so attempt to lookup the parent pom and get it from there + (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) + parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) + parent-gav (merge {} + (when parent {:group-id (lciu/strim (first (xi/find-first parent [::pom/groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent [::pom/artifactId]))) + :version (lciu/strim (first (xi/find-first parent [::pom/version])))}) + (when parent-no-ns {:group-id (lciu/strim (first (xi/find-first parent-no-ns [:groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent-no-ns [:artifactId]))) + :version (lciu/strim (first (xi/find-first parent-no-ns [:version])))}))] + (when-not (empty? parent-gav) + (pom->expressions-info (pom-uri-for-gav parent-gav)))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + filepath)) (defmethod pom->expressions-info :default - ([pom] (pom->expressions-info pom (lcu/filepath pom))) + ([pom] (pom->expressions-info pom (lciu/filepath pom))) ([pom filepath] (when pom (with-open [pom-is (io/input-stream pom)] @@ -163,7 +165,7 @@ The result has metadata attached that describes how the identifiers in the expression(s) were determined." - ([pom] (pom->expressions pom (lcu/filepath pom))) + ([pom] (pom->expressions pom (lciu/filepath pom))) ([pom filepath] (some-> (pom->expressions-info pom filepath) keys diff --git a/src/lice_comb/impl/metadata.clj b/src/lice_comb/utils.clj similarity index 60% rename from src/lice_comb/impl/metadata.clj rename to src/lice_comb/utils.clj index 6c530eb..cb26a51 100644 --- a/src/lice_comb/impl/metadata.clj +++ b/src/lice_comb/utils.clj @@ -16,47 +16,23 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.impl.metadata - "Metadata helper functionality. Note: this namespace is not part of - the public API of lice-comb and may change without notice." +(ns lice-comb.utils + "Miscellaneous functionality." (:require [clojure.string :as s])) -(defn prepend-source - "Prepends the given source s (a String) onto all metadata sub-maps in m (a - lice-comb id+metadata-list map)." - [m s] - (if (or (empty? m) (s/blank? s)) - m - (into {} (map #(if (sequential? (val %)) - (let [id (key %) - metadata-list (val %)] - (hash-map id (map (fn [x] (assoc x :source (conj (seq (:source x)) s))) metadata-list))) - %) - m)))) - -(defn merge-maps - "Merges any number of lice-comb maps, by concatenating and de-duping values - for the same key (expression)." - [& maps] - (let [maps (filter identity maps)] - (when-not (empty? maps) - (let [grouped-maps (group-by first (mapcat identity maps))] - (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) - (keys grouped-maps))))))) - -(def ^:private strategies { +(def ^:private strategy->string { :spdx-expression "SPDX expression" :spdx-listed-identifier-exact-match "SPDX identifier" :spdx-listed-identifier-case-insensitive-match "SPDX identifier (case insensitive match)" :spdx-text-matching "SPDX license text matching" :spdx-listed-name "SPDX listed name (case insensitive match)" :spdx-listed-uri "SPDX listed URI (relaxed matching)" - :expression-inference "Inferred SPDX expression" - :regex-matching "Regular expression matching" - :unlisted "Unlisted"}) + :expression-inference "inferred SPDX expression" + :regex-matching "regular expression matching" + :unlisted "fallback to unlisted LicenseRef"}) -(defn- metadata-keyfn - "sort-by keyfn for lice-comb metadata maps" +(defn- info-keyfn + "sort-by keyfn for lice-comb info maps" [metadata] (str (case (:id metadata) nil "0" @@ -83,25 +59,27 @@ :regex-matching "7" :unlisted "8"))) -(defn- metadata-element->string - "Converts the metadata list for the given identifier into a human-readable - string." +(defn- license-info-element->string + "Converts the info list for the given identifier into a human-readable + string, using the information in license-info map m." [m id] (str id ":\n" - (when-let [metadata-list (sort-by metadata-keyfn (seq (get m id)))] + (when-let [info-list (sort-by info-keyfn (seq (get m id)))] (s/join "\n" (map #(str " " (when-let [md-id (:id %)] (when (not= id md-id) (str md-id " "))) (case (:type %) :declared "Declared" :concluded "Concluded") (when-let [confidence (:confidence %)] (str "\n Confidence: " (name confidence))) - (when-let [strategy (:strategy %)] (str "\n Strategy: " (get strategies strategy (name strategy)))) + (when-let [strategy (:strategy %)] (str "\n Strategy: " (get strategy->string strategy (name strategy)))) (when-let [source (seq (:source %))] (str "\n Source:\n > " (s/join "\n > " source)))) - metadata-list))))) + info-list))))) -(defn metadata->string - "Converts lice-comb map m into a human-readable string." +(defn license-info->string + "Converts lice-comb license-info map m into a human-readable string. This + function is mostly intended for debugging / developer discovery purposes, and + the content and format of the output may change without warning." [m] (when m (let [ids (sort (keys m))] - (s/join "\n\n" (map (partial metadata-element->string m) ids))))) + (s/join "\n\n" (map (partial license-info-element->string m) ids))))) diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index dbf27bf..09b9e92 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -38,7 +38,7 @@ (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) (is (valid= #{"EPL-1.0"} (dep->expressions ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) - (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) + (is (valid= #{"BSD-4-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) (is (valid= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index 72db742..bb0e1aa 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -56,7 +56,8 @@ (is (= false (probable-license-file? "pm.xml")))) (testing "Filenames including paths" (is (= true (probable-license-file? "/path/to/a/project/containing/a/pom.xml"))) - (is (= false (probable-license-file? "/a/different/path/to/some/NOTICES"))))) + (is (= false (probable-license-file? "/a/different/path/to/some/NOTICES"))) + (is (= true (probable-license-file? "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom"))))) (deftest probable-license-files-tests (testing "Nil, empty, or blank directory" @@ -65,6 +66,8 @@ (is (thrown? java.io.FileNotFoundException (probable-license-files " "))) (is (thrown? java.io.FileNotFoundException (probable-license-files "\n"))) (is (thrown? java.io.FileNotFoundException (probable-license-files "\t")))) + (testing "Doesn't exist" + (is (thrown? java.io.FileNotFoundException (probable-license-files "THIS_DIRECTORY_DOESNT_EXIST")))) (testing "Not a directory" (is (thrown? java.nio.file.NotDirectoryException (probable-license-files "deps.edn")))) (testing "A real directory" @@ -72,6 +75,7 @@ (io/file (str test-data-path "/with-parent.pom")) (io/file (str test-data-path "/no-xml-ns.pom")) (io/file (str test-data-path "/simple.pom")) + (io/file (str test-data-path "/complex.pom")) (io/file (str test-data-path "/CC-BY-4.0/LICENSE")) (io/file (str test-data-path "/MPL-2.0/LICENSE"))} (probable-license-files test-data-path))))) @@ -126,9 +130,10 @@ (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) (testing "Valid directory" - (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0"} - (dir->expressions ".")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 + #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0"} + (dir->expressions ".")))) (testing "Valid directory - include ZIP compressed files" - (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0" "AGPL-3.0-or-later"} - (dir->expressions "." {:include-zips? true})))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 -) + (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0" "AGPL-3.0-or-later"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 + #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "AGPL-3.0-or-later"} + (dir->expressions "." {:include-zips? true}))))) diff --git a/test/lice_comb/impl/metadata_test.clj b/test/lice_comb/impl/expressions_info_test.clj similarity index 95% rename from test/lice_comb/impl/metadata_test.clj rename to test/lice_comb/impl/expressions_info_test.clj index cc95a43..d75bdd5 100644 --- a/test/lice_comb/impl/metadata_test.clj +++ b/test/lice_comb/impl/expressions_info_test.clj @@ -16,10 +16,10 @@ ; SPDX-License-Identifier: Apache-2.0 ; -(ns lice-comb.impl.metadata-test - (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.impl.metadata :refer [prepend-source merge-maps]])) +(ns lice-comb.impl.expressions-info-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.expressions-info :refer [prepend-source merge-maps]])) (use-fixtures :once fixture) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index da89f55..68ef6e2 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -151,6 +151,7 @@ (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2+CE"))) ;#### !!!! (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) (is (valid= #{"JSON"} (name->expressions "JSON License"))) (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest @@ -712,24 +713,26 @@ (is (nil? (name->expressions-info "\n"))) (is (nil? (name->expressions-info "\t")))) (testing "SPDX license ids" - (is (valid-info= {"AGPL-3.0-only" {:type :declared :strategy :spdx-expression :source '("AGPL-3.0")}} + (is (valid-info= {"AGPL-3.0-only" (list {:type :declared :strategy :spdx-expression :source (list "AGPL-3.0")})} (name->expressions-info "AGPL-3.0"))) - (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0-with-classpath-exception")}} + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0-with-classpath-exception")})} (name->expressions-info "GPL-2.0-with-classpath-exception")))) (testing "SPDX expressions" - (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0 WITH Classpath-exception-2.0")})} (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) (testing "Single expressions that are not valid SPDX" - (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" {:type :declared :strategy :spdx-expression :source '("GPL-2.0 WITH Classpath-exception-2.0")}} + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :low :strategy :expression-inference} + {:id "GPL-2.0-only" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "Classpath Exception")})} (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) (testing "Multiple expressions" - (is (valid-info= {"MIT" {:type :declared :strategy :spdx-listed-identifier-exact-match :source '("MIT")} - "BSD-4-Clause" {:type :concluded :confidence :low :strategy :regex-matching :source '("BSD")}} + (is (valid-info= {"MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT")}) + "BSD-4-Clause" (list {:id "BSD-4-Clause" :type :concluded :confidence :low :strategy :regex-matching :source (list "BSD")})} (name->expressions-info "MIT / BSD")))) - (testing "All names seen in POMs on Clojars as of 2023-07-13" - (is (valid-info= {"BSD-3-Clause" {:type :concluded :confidence :medium :strategy :spdx-listed-uri :source '("https://opensource.org/licenses/BSD-3-Clause")}} + (testing "Some names from Clojars" + (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) - (is (valid-info= {"EPL-2.0" {:type :concluded :confidence :medium :strategy :regex-matching :source '("Eclipse Public License - v 2.0")}} + (is (valid-info= {"EPL-2.0" (list {:id "EPL-2.0" :type :concluded :confidence :medium :strategy :regex-matching :source (list "Eclipse Public License - v 2.0")})} (name->expressions-info "Eclipse Public License - v 2.0"))))) (deftest uri->ids-tests diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index c4496b5..71bd4c4 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -73,7 +73,7 @@ all-valid-expressions?)] ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) - (when-not is-a-set? (print "\n* Not a set")) + (when-not is-a-set? (print "\n* Not a set:" (type actual))) (when-not is-equal? (print "\n* Not equal to expected")) (when-not all-valid-expressions? (print "\n* Not all valid SPDX expressions")) result)) @@ -82,24 +82,24 @@ "Returns true if all of the following are true: * actual is a map * the keys in actual are identical to expected-keys - * all vals in actual are maps + * all vals in actual are lists * every key in actual is a valid SPDX license expression Also prints (to stdout) which of the above is not true, in the event that any of them are not true." [expected actual] - (let [is-a-map? (or (nil? actual) (map? actual)) - is-equal? (= expected actual) - values-are-maps? (or (nil? actual) (every? map? (vals actual))) - all-valid-expressions? (and is-a-map? (every? true? (map sexp/valid? (keys actual)))) - result (and values-are-maps? - is-a-map? - is-equal? - all-valid-expressions?)] + (let [is-a-map? (or (nil? actual) (map? actual)) + is-equal? (= expected actual) + values-are-sequentials? (or (nil? actual) (every? sequential? (vals actual))) + all-valid-expressions? (and is-a-map? (every? true? (map sexp/valid? (keys actual)))) + result (and values-are-sequentials? + is-a-map? + is-equal? + all-valid-expressions?)] ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message - (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) - (when-not is-a-map? (print "\n* Not a map")) - (when-not is-equal? (print "\n* Not equal to expected")) - (when-not values-are-maps? (print "\n* Not all values are maps")) - (when-not all-valid-expressions? (print "\n* Not all keys are valid SPDX expressions")) + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not is-a-map? (print "\n* Not a map:" (type actual))) + (when-not is-equal? (print "\n* Not equal to expected")) + (when-not values-are-sequentials? (print "\n* Not all values are sequential:" (pr-str (map type (vals actual))))) + (when-not all-valid-expressions? (print "\n* Not all keys are valid SPDX expressions")) result)) From 2a6aaf02dcaf2bf83527bbf5b0b3851bcbe39de8 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 5 Sep 2023 16:05:45 -0700 Subject: [PATCH 23/34] :arrow_up: Upgrade dependencies --- .github/workflows/ci.yml | 2 +- .github/workflows/dependencies.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/docs.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 92fdba4..55911ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-java@v3 with: distribution: 'temurin' diff --git a/.github/workflows/dependencies.yml b/.github/workflows/dependencies.yml index 57bb42b..d971671 100644 --- a/.github/workflows/dependencies.yml +++ b/.github/workflows/dependencies.yml @@ -11,5 +11,5 @@ jobs: container: image: uochan/antq steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: java -jar /tmp/antq/antq.jar --skip=pom --error-format="::error file={{file}}::{{message}}" diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6128aff..6ea8c91 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: environment: clojars steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Make sure we get the full history, or else the version number gets screwed up - uses: actions/setup-java@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ad96b57..58d3454 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-java@v3 with: distribution: 'temurin' From 645ed1c6052b19714b68d181e992cda24e2d3b65 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 5 Sep 2023 21:38:06 -0700 Subject: [PATCH 24/34] :construction: Ongoing work on issue #3 --- deps.edn | 2 +- resources/lice_comb/names.edn | 8 ++ src/lice_comb/deps.clj | 6 +- src/lice_comb/files.clj | 81 +++++--------- src/lice_comb/impl/expressions_info.clj | 11 +- src/lice_comb/impl/matching.clj | 100 ++++++++++-------- src/lice_comb/impl/utils.clj | 44 ++++++++ src/lice_comb/lein.clj | 4 +- src/lice_comb/maven.clj | 57 +++++----- src/lice_comb/utils.clj | 6 +- test/lice_comb/files_test.clj | 58 +++++----- test/lice_comb/impl/expressions_info_test.clj | 14 +-- test/lice_comb/matching_test.clj | 12 +-- test/lice_comb/test_boilerplate.clj | 3 +- 14 files changed, 226 insertions(+), 180 deletions(-) create mode 100644 resources/lice_comb/names.edn diff --git a/deps.edn b/deps.edn index 7bc39ca..366757f 100644 --- a/deps.edn +++ b/deps.edn @@ -27,7 +27,7 @@ dev.weavejester/medley {:mvn/version "1.7.0"} dom-top/dom-top {:mvn/version "1.0.8"} miikka/clj-base62 {:mvn/version "0.1.1"} - com.github.pmonks/clj-spdx {:mvn/version "1.0.91"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.95"} com.github.pmonks/rencg {:mvn/version "1.0.34"}} :aliases {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} diff --git a/resources/lice_comb/names.edn b/resources/lice_comb/names.edn new file mode 100644 index 0000000..eaa163d --- /dev/null +++ b/resources/lice_comb/names.edn @@ -0,0 +1,8 @@ +; Map of name values seen in the wild that are too ambiguous or cursed to support any reasonable form of automated parsing +{ + ;Seen in https://repo.maven.apache.org/maven2/com/sun/mail/all/1.4.7/all-1.4.7.pom + "GPLv2+CE" {"GPL-2.0-only WITH Classpath-exception-2.0" + ({:type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE")} + {:id "GPL-2.0-only" :type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE" "GPLv2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE" "CE")})} +} \ No newline at end of file diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 72198ee..edfcb9c 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -71,7 +71,7 @@ (defmulti dep->expressions-info "Attempt to detect the SPDX license expression(s) (a map) in a tools.deps style dep (a MapEntry or two-element sequence of - `[groupId/artifactId dep-info]`). + `['groupId/artifactId dep-info]`). The result has metadata attached that describes how the identifiers in the expression(s) were determined." @@ -92,7 +92,7 @@ expressions (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))));) ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) ] - (lciei/prepend-source expressions (dep->string dep))))));) + (lciei/prepend-source (dep->string dep) expressions)))));) (defmethod dep->expressions-info :deps [dep] @@ -102,7 +102,7 @@ ; (if-let [override (check-overrides ga version)] ; override ; (check-fallbacks ga - (lciei/prepend-source (lcf/dir->expressions-info (:deps/root info)) (dep->string dep)))));)) + (lciei/prepend-source (dep->string dep) (lcf/dir->expressions-info (:deps/root info))))));)) (defmethod dep->expressions-info nil [_]) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 2c8b99d..ee8e96f 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -28,25 +28,6 @@ (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... -(defn- ensure-readable-dir - "Ensures dir (a String or File) refers to a readable directory, and returns it - as a File. - - Throws: - * java.io.FileNotFoundException if dir doesn't exist - * java.nio.file.AccessDeniedException if dir is not readable - * java.nio.file.NotDirectoryException if dir is not a directory" - [dir] - (let [result (io/file dir)] - (if (and result - (.exists result)) - (if (.canRead result) - (if (.isDirectory result) - result - (throw (java.nio.file.NotDirectoryException. (str result)))) - (throw (java.nio.file.AccessDeniedException. (str result)))) - (throw (java.io.FileNotFoundException. (str result)))))) - ; This is public because it's used in the tests (defn probable-license-file? "Returns true if the given file-like thing (String, File, ZipEntry) is a @@ -64,40 +45,33 @@ set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." [dir] - (when dir - (when-let [d (ensure-readable-dir dir)] - (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq d))) - set)))) + (when (lciu/readable-dir? dir) + (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) + set))) (defn file->expressions-info "Attempts to determine the SPDX license expression(s) (a map) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, it must already be open and the associated - filepath should also be provided as the second parameter (it is optional in - other cases). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + filepath must be provided as the second parameter (it is optional in other + cases)." ([f] (file->expressions-info f (lciu/filepath f))) ([f filepath] - (when f + (when (lciu/readable-file? f) (let [fname (lciu/filename filepath) lfname (s/lower-case fname)] - (lciei/prepend-source (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) + (lciei/prepend-source filepath + (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) (s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname) - (instance? java.io.InputStream f) (lcmtch/text->ids-info f) - :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is)))) ; Default is to assume it's a plain text file containing license text(s) - filepath))))) + (instance? java.io.InputStream f) (doall (lcmtch/text->ids-info f)) + :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is))))))))) ; Default is to assume it's a plain text file containing license text(s) (defn file->expressions "Attempts to determine the SPDX license expression(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on it). If an InputStream is provided, it must already be open and the associated filepath should also be provided as the second parameter (it is optional in - other cases). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + other cases)." ([f] (file->expressions f (lciu/filepath f))) ([f filepath] (some-> (file->expressions-info f filepath) @@ -109,12 +83,9 @@ String or a java.io.File, both of which must refer to a ZIP-format compressed file. - Throws on invalid zip file (doesn't exist, not readable, not ZIP format, etc.). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + Throws on invalid zip format file." [zip] - (when zip + (when (lciu/readable-file? zip) (let [zip-file (io/file zip)] (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] @@ -125,17 +96,14 @@ (recur (merge result (file->expressions-info zip-is (lciu/filename entry))) (.getNextEntry zip-is)) (recur result (.getNextEntry zip-is))) - (when-not (empty? result) (lciei/prepend-source result (lciu/filepath zip-file))))))))) + (when-not (empty? result) (lciei/prepend-source (lciu/filepath zip-file) result)))))))) (defn zip->expressions "Attempt to detect the SPDX license expression(s) (a set) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file. - Throws on invalid zip file (doesn't exist, not readable, not ZIP format, etc.). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + Throws on invalid zip format file." [zip] (some-> (zip->expressions-info zip) keys @@ -146,11 +114,11 @@ recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, either of which must refer to a readable directory." [dir] - (when-let [dir (ensure-readable-dir dir)] + (when (lciu/readable-dir? dir) (some-> (seq (filter #(and (.isFile ^java.io.File %) (or (s/ends-with? (str %) ".zip") (s/ends-with? (str %) ".jar"))) - (file-seq dir))) + (file-seq (io/file dir)))) set))) (defn dir->expressions-info @@ -166,14 +134,13 @@ expression(s) were determined." ([dir] (dir->expressions-info dir nil)) ([dir {:keys [include-zips?] :or {include-zips? false}}] - (when dir - (lciei/prepend-source - (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] - (if include-zips? - (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] - (merge file-expressions zip-expressions)) - file-expressions)) - (lciu/filepath dir))))) + (when (lciu/readable-dir? dir) + (lciei/prepend-source (lciu/filepath dir) + (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] + (if include-zips? + (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (merge file-expressions zip-expressions)) + file-expressions)))))) (defn dir->expressions "Attempt to detect the SPDX license expression(s) (a map) in a directory. dir diff --git a/src/lice_comb/impl/expressions_info.clj b/src/lice_comb/impl/expressions_info.clj index bb92df6..f7bb77b 100644 --- a/src/lice_comb/impl/expressions_info.clj +++ b/src/lice_comb/impl/expressions_info.clj @@ -24,13 +24,18 @@ (defn prepend-source "Prepends the given source s (a String) onto all metadata sub-maps in m (a lice-comb expressions-info map)." - [m s] - (if (or (empty? m) (s/blank? s)) + [s m] + (if (or (s/blank? s) (empty? m)) m (into {} (map #(if (sequential? (val %)) (let [id (key %) metadata-list (val %)] - (hash-map id (map (fn [x] (assoc x :source (conj (seq (:source x)) s))) metadata-list))) + (hash-map id (map (fn [x] (assoc x :source (let [old-source (seq (:source x)) + new-source (if (not= s (first old-source)) ; Only add s if it isn't already there + (conj old-source s) + old-source)] + new-source))) + metadata-list))) %) m)))) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 773f779..f98dc5c 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -29,8 +29,11 @@ [lice-comb.impl.expressions-info :as lciei] [lice-comb.impl.3rd-party :as lc3] [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.data :as lcid] [lice-comb.impl.utils :as lciu])) +(def ^:private cursed-names-d (delay (lcid/load-edn-resource "lice_comb/names.edn"))) + (def ^:private direct-replacements-map { #{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"} #{"GPL-2.0-or-later" "Classpath-exception-2.0"} #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} @@ -168,22 +171,23 @@ determined." [uri] (when-not (s/blank? uri) - (manual-fixes - (let [suri (lciu/simplify-uri uri)] - ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) - (if-let [ids (get @lcis/index-uri-to-id-d suri)] - (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) - ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it - (when-let [license-text (lcihttp/get-text uri)] - (when-let [ids (text->ids license-text)] - (lciei/prepend-source ids (str "content from " uri))))))))) + (lciei/prepend-source uri + (manual-fixes + (let [suri (lciu/simplify-uri uri)] + ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) + (if-let [ids (get @lcis/index-uri-to-id-d suri)] + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) + ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it + (when-let [license-text (lcihttp/get-text uri)] + (when-let [ids (text->ids license-text)] + ids)))))))) (defn- string->ids-info - "Converts the given String into a sequence of singleton maps (NOT A SINGLE - MAP!), each of which has a key is that is an SPDX identifier (either a listed - SPDX license or exception id), and whose value is a list of meta-information - about how that identifier was found. The result sequence is ordered in the - same order of appearance as the source values in s. + "Converts the given String into a sequence of singleton maps (NOT A LICE-COMB + EXPRESSION INFO MAP!), each of which has a key is that is an SPDX identifier + (either a listed SPDX license or exception id), and whose value is a list of + meta-information about how that identifier was found. The result sequence is + ordered in the same order of appearance as the source values in s. If no listed SPDX license or exception identifiers are found, returns a singleton sequence containing a map with a lice-comb specific 'unlisted' @@ -198,24 +202,28 @@ 5. Returning a lice-comb specific 'unlisted' LicenseRef" [s] (when-not (s/blank? s) - ; 1. Is it an SPDX license or exception id? - (let [s (s/trim s)] - (if-let [id (get @lcis/spdx-ids-d (s/lower-case s))] - (if (= id s) - (list {id (list {:id id :type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)})}) - (list {id (list {:id id :type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)})})) - ; 2. Is it an SPDX license or exception name? - (if-let [ids (get @lcis/index-name-to-id-d (s/trim (s/lower-case s)))] - (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)})) ids) - ; 3. Is it a URI? If so, perform URI matching on it (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) - (if-let [ids (uri->ids s)] - ids - ; 4. Attempt regex name matching - (if-let [ids (lcirm/matches s)] - ids - ; 5. No clue, so return a single unlisted SPDX LicenseRef - (let [id (lcis/name->unlisted s)] - (list {id (list {:id id :type :concluded :confidence :low :strategy :unlisted :source (list s)})}))))))))) + (let [s (s/trim s) + ids (or ; 1. Is it an SPDX license or exception id? + (when-let [id (get @lcis/spdx-ids-d (s/lower-case s))] + (if (= id s) + (list {id (list {:id id :type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)})}) + (list {id (list {:id id :type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)})}))) + + ; 2. Is it the name of one or more SPDX licenses or exceptions? + (when-let [ids (get @lcis/index-name-to-id-d (s/lower-case s))] + (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)})) ids)) + + ; 3. Might it be a URI? (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) + (when-let [ids (uri->ids s)] + (map #(hash-map (key %) (val %)) ids)) + + ; 4. Attempt regex name matching + (lcirm/matches s) + + ; 5. No clue, so return a single unlisted SPDX LicenseRef + (let [id (lcis/name->unlisted s)] + (list {id (list {:id id :type :concluded :confidence :low :strategy :unlisted :source (list s)})})))] + (map (partial lciei/prepend-source s) ids)))) (defn- filter-blanks "Filter blank strings out of coll" @@ -300,16 +308,23 @@ The keys in the maps are the detected SPDX license and exception identifiers, and each value contains information about how that identifiers was determined." [name] - (some->> (split-on-operators name) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - (map #(if (keyword? %) % (string->ids-info %))) - flatten - (filter identity) - (drop-while keyword?) - (lc3/rdrop-while keyword?) - seq - build-spdx-expressions-map)) + (when-not (s/blank? name) + (let [name (s/trim name)] + (lciei/prepend-source name + (or ; 1. Is it a cursed name? + (get @cursed-names-d name) + + ; 2. Attempt to construct an SPDX expression from the name + (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids-info %))) + flatten + (filter identity) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + seq + build-spdx-expressions-map)))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent @@ -322,4 +337,5 @@ (lcis/init!) (lcirm/init!) (lcihttp/init!) + @cursed-names-d nil) diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index d635402..ed0686c 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -124,6 +124,50 @@ (s/replace #"\.[\p{Alnum}]{3,}\z" "")) ; Strip file type extension (if any) luri))))) +(defn readable-dir? + "Is d (a String or File) a readable directory?" + [d] + (let [d (io/file d)] + (and d + (.exists d) + (.canRead d) + (.isDirectory d)))) + +(defmulti readable-file? + "Is f (a String, File, InputStream, or Reader) a readable file?" + type) + +(defmethod readable-file? nil + [_]) + +(defmethod readable-file? java.io.File + [f] + (and f + (.exists f) + (.canRead f) + (not (.isDirectory f)))) + +(defmethod readable-file? java.lang.String + [s] + (or (valid-http-uri? s) + (readable-file? (io/file s)))) + +(defmethod readable-file? java.io.InputStream + [_] + true) + +(defmethod readable-file? java.io.Reader + [_] + true) + +(defmethod readable-file? java.net.URL + [_] + true) + +(defmethod readable-file? java.net.URI + [_] + true) + (defmulti filepath "Returns the full path and name of the given file-like thing (String, File, ZipEntry, URI, URL)." diff --git a/src/lice_comb/lein.clj b/src/lice_comb/lein.clj index d391ba6..301203d 100644 --- a/src/lice_comb/lein.clj +++ b/src/lice_comb/lein.clj @@ -36,7 +36,7 @@ style dep (a vector of the form `[groupId/artifactId \"version\"]`)." [dep] (when-let [toolsdep-dep (lein-dep->toolsdeps-dep dep)] - (lciei/prepend-source (lcd/dep->expressions-info toolsdep-dep) (pr-str dep)))) + (lciei/prepend-source (pr-str dep) (lcd/dep->expressions-info toolsdep-dep)))) (defn dep->expressions "Attempt to detect the SPDX license expression(s) (a set) in a Leiningen @@ -54,7 +54,7 @@ [deps] (into {} (dom/real-pmap #(vec [% (dep->expressions-info %)]) deps))) -(defn deps->expressions-info +(defn deps->expressions "Attempt to detect all of the SPDX license expression(s) in a Leiningen style dependency vector. The result is a map, where each entry in the map has a key that is the Leiningen dep, and the value is the set of SPDX license diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index e6bff68..8d172c3 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -68,10 +68,10 @@ pair. Returns nil if no matches were found." [{:keys [name url]}] ; 1. Look in the name field(s) - (if-let [name-expressions (lciei/prepend-source (lcmtch/name->expressions-info name) "")] + (if-let [name-expressions (lciei/prepend-source "" (lcmtch/name->expressions-info name))] name-expressions ; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate) - (when-let [uri-ids (lciei/prepend-source (lcmtch/uri->ids-info url) "")] + (when-let [uri-ids (lciei/prepend-source "" (lcmtch/uri->ids-info url))] uri-ids))) (xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") @@ -117,33 +117,32 @@ ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags (defmethod pom->expressions-info java.io.InputStream [pom-is filepath] - (lciei/prepend-source - (let [pom-xml (xml/parse pom-is)] - (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] - ; block exists - process it - (let [name-uri-pairs (some->> pom-licenses - (filter map?) ; Get rid of non-tag content (whitespace etc.) - (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) - (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) - url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] - (when (or name url) - {:name name :url url})))) - set) - licenses (into {} (map licenses-from-pair name-uri-pairs))] - (lcim/manual-fixes licenses)) - ; License block doesn't exist, so attempt to lookup the parent pom and get it from there - (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) - parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) - parent-gav (merge {} - (when parent {:group-id (lciu/strim (first (xi/find-first parent [::pom/groupId]))) - :artifact-id (lciu/strim (first (xi/find-first parent [::pom/artifactId]))) - :version (lciu/strim (first (xi/find-first parent [::pom/version])))}) - (when parent-no-ns {:group-id (lciu/strim (first (xi/find-first parent-no-ns [:groupId]))) - :artifact-id (lciu/strim (first (xi/find-first parent-no-ns [:artifactId]))) - :version (lciu/strim (first (xi/find-first parent-no-ns [:version])))}))] - (when-not (empty? parent-gav) - (pom->expressions-info (pom-uri-for-gav parent-gav)))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep - filepath)) + (lciei/prepend-source filepath + (let [pom-xml (xml/parse pom-is)] + (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] + ; block exists - process it + (let [name-uri-pairs (some->> pom-licenses + (filter map?) ; Get rid of non-tag content (whitespace etc.) + (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) + (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) + url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] + (when (or name url) + {:name name :url url})))) + set) + licenses (into {} (map licenses-from-pair name-uri-pairs))] + (lcim/manual-fixes licenses)) + ; License block doesn't exist, so attempt to lookup the parent pom and get it from there + (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) + parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) + parent-gav (merge {} + (when parent {:group-id (lciu/strim (first (xi/find-first parent [::pom/groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent [::pom/artifactId]))) + :version (lciu/strim (first (xi/find-first parent [::pom/version])))}) + (when parent-no-ns {:group-id (lciu/strim (first (xi/find-first parent-no-ns [:groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent-no-ns [:artifactId]))) + :version (lciu/strim (first (xi/find-first parent-no-ns [:version])))}))] + (when-not (empty? parent-gav) + (pom->expressions-info (pom-uri-for-gav parent-gav)))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep (defmethod pom->expressions-info :default ([pom] (pom->expressions-info pom (lciu/filepath pom))) diff --git a/src/lice_comb/utils.clj b/src/lice_comb/utils.clj index cb26a51..402dc9b 100644 --- a/src/lice_comb/utils.clj +++ b/src/lice_comb/utils.clj @@ -29,7 +29,8 @@ :spdx-listed-uri "SPDX listed URI (relaxed matching)" :expression-inference "inferred SPDX expression" :regex-matching "regular expression matching" - :unlisted "fallback to unlisted LicenseRef"}) + :unlisted "fallback to unlisted LicenseRef" + :manual-verification "manual verification"}) (defn- info-keyfn "sort-by keyfn for lice-comb info maps" @@ -57,7 +58,8 @@ :spdx-listed-uri "5" :expression-inference "6" :regex-matching "7" - :unlisted "8"))) + :unlisted "8" + :manual-verification "9"))) (defn- license-info-element->string "Converts the info list for the given identifier into a human-readable diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index bb0e1aa..260e22a 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -61,15 +61,15 @@ (deftest probable-license-files-tests (testing "Nil, empty, or blank directory" - (is (nil? (probable-license-files nil))) - (is (thrown? java.io.FileNotFoundException (probable-license-files ""))) - (is (thrown? java.io.FileNotFoundException (probable-license-files " "))) - (is (thrown? java.io.FileNotFoundException (probable-license-files "\n"))) - (is (thrown? java.io.FileNotFoundException (probable-license-files "\t")))) + (is (nil? (probable-license-files nil))) + (is (nil? (probable-license-files ""))) + (is (nil? (probable-license-files " "))) + (is (nil? (probable-license-files "\n"))) + (is (nil? (probable-license-files "\t")))) (testing "Doesn't exist" - (is (thrown? java.io.FileNotFoundException (probable-license-files "THIS_DIRECTORY_DOESNT_EXIST")))) + (is (nil? (probable-license-files "THIS_DIRECTORY_DOESNT_EXIST")))) (testing "Not a directory" - (is (thrown? java.nio.file.NotDirectoryException (probable-license-files "deps.edn")))) + (is (nil? (probable-license-files "deps.edn")))) (testing "A real directory" (is (= #{(io/file (str test-data-path "/asf-cat-1.0.12.pom")) (io/file (str test-data-path "/with-parent.pom")) @@ -82,13 +82,15 @@ (deftest file->expressions-tests (testing "Nil, empty, or blank filename" - (is (nil? (file->expressions nil))) - (is (thrown? java.io.FileNotFoundException (file->expressions ""))) - (is (thrown? java.io.FileNotFoundException (file->expressions " "))) - (is (thrown? java.io.FileNotFoundException (file->expressions "\n"))) - (is (thrown? java.io.FileNotFoundException (file->expressions "\t")))) + (is (nil? (file->expressions nil))) + (is (nil? (file->expressions ""))) + (is (nil? (file->expressions " "))) + (is (nil? (file->expressions "\n"))) + (is (nil? (file->expressions "\t")))) (testing "Non-existent files" - (is (thrown? java.io.FileNotFoundException (file->expressions "this_file_does_not_exist")))) + (is (nil? (file->expressions "this_file_does_not_exist")))) + (testing "Handed a directory" + (is (nil? (file->expressions ".")))) (testing "Files on disk" ; (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 (is (valid= #{"MPL-2.0"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) @@ -106,13 +108,15 @@ (deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" - (is (nil? (zip->expressions nil))) - (is (thrown? java.io.FileNotFoundException (zip->expressions ""))) ; Note the hodgepodge of different thrown exception types here - java.util.zip is a mess! - (is (thrown? java.nio.file.NoSuchFileException (zip->expressions " "))) - (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "\n"))) - (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "\t")))) + (is (nil? (zip->expressions nil))) + (is (nil? (zip->expressions ""))) + (is (nil? (zip->expressions " "))) + (is (nil? (zip->expressions "\n"))) + (is (nil? (zip->expressions "\t")))) (testing "Non-existent zip file" - (is (thrown? java.nio.file.NoSuchFileException (zip->expressions "this_zip_file_does_not_exist")))) + (is (nil? (zip->expressions "this_zip_file_does_not_exist")))) + (testing "Handed a directory" + (is (nil? (file->expressions ".")))) (testing "Invalid zip file" (is (thrown? java.util.zip.ZipException (zip->expressions (str test-data-path "/bad.zip"))))) (testing "Valid zip file" @@ -121,18 +125,18 @@ (deftest dir->expressions-tests (testing "Nil, empty, or blank directory name" - (is (nil? (dir->expressions nil))) - (is (thrown? java.io.FileNotFoundException (dir->expressions ""))) - (is (thrown? java.io.FileNotFoundException (dir->expressions " "))) - (is (thrown? java.io.FileNotFoundException (dir->expressions "\n"))) - (is (thrown? java.io.FileNotFoundException (dir->expressions "\t")))) + (is (nil? (dir->expressions nil))) + (is (nil? (dir->expressions ""))) + (is (nil? (dir->expressions " "))) + (is (nil? (dir->expressions "\n"))) + (is (nil? (dir->expressions "\t")))) (testing "Non-existent or invalid directory" - (is (thrown? java.io.FileNotFoundException (dir->expressions "this_directory_does_not_exist"))) - (is (thrown? java.nio.file.NotDirectoryException (dir->expressions "deps.edn")))) + (is (nil? (dir->expressions "this_directory_does_not_exist"))) + (is (nil? (dir->expressions "deps.edn")))) (testing "Valid directory" (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0"} - (dir->expressions ".")))) + (dir->expressions ".")))) (testing "Valid directory - include ZIP compressed files" (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0" "AGPL-3.0-or-later"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "AGPL-3.0-or-later"} diff --git a/test/lice_comb/impl/expressions_info_test.clj b/test/lice_comb/impl/expressions_info_test.clj index d75bdd5..87d01f6 100644 --- a/test/lice_comb/impl/expressions_info_test.clj +++ b/test/lice_comb/impl/expressions_info_test.clj @@ -42,23 +42,23 @@ (deftest prepend-source-tests (testing "nil/empty/blank" (is (nil? (prepend-source nil nil))) - (is (nil? (prepend-source nil ""))) - (is (= {} (prepend-source {} nil))) - (is (= {} (prepend-source {} "")))) + (is (= {} (prepend-source nil {}))) + (is (nil? (prepend-source "" nil))) + (is (= {} (prepend-source "" {})))) (testing "non-nil metadata that isn't lice-comb specific" - (is (= {:a "a"} (prepend-source {:a "a"} "foo")))) + (is (= {:a "a"} (prepend-source "foo" {:a "a"})))) (testing "non-nil metadata that is lice-comb specific" (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("pom.xml" "Apache Software Licence v2.0")}) "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "MIT")})} - (prepend-source md1 "pom.xml"))) + (prepend-source "pom.xml" md1))) (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("library.jar" "pom.xml" "Apache Software Licence v2.0")}) "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("library.jar" "pom.xml" "MIT")})} - (prepend-source (prepend-source md1 "pom.xml") "library.jar"))) + (prepend-source "library.jar" (prepend-source "pom.xml" md1)))) (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "Apache style license")} {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("pom.xml" "apache-2.0")} {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "Apache-2.0")}) "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "GNU General Public License 3.0 or later")})} - (prepend-source md3 "pom.xml"))))) + (prepend-source "pom.xml" md3))))) (deftest merge-maps-tests (testing "nil/empty" diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 68ef6e2..dc5b0df 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -151,7 +151,7 @@ (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2+CE"))) ;#### !!!! + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2+CE"))) ; From JavaMail (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) (is (valid= #{"JSON"} (name->expressions "JSON License"))) (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest @@ -721,13 +721,13 @@ (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0 WITH Classpath-exception-2.0")})} (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) (testing "Single expressions that are not valid SPDX" - (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :low :strategy :expression-inference} - {:id "GPL-2.0-only" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2")} - {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "Classpath Exception")})} + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :low :strategy :expression-inference :source (list "GNU General Public License, version 2 with the GNU Classpath Exception")} + {:id "GPL-2.0-only" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "GNU General Public License, version 2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "the GNU Classpath Exception" "Classpath Exception")})} (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) (testing "Multiple expressions" - (is (valid-info= {"MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT")}) - "BSD-4-Clause" (list {:id "BSD-4-Clause" :type :concluded :confidence :low :strategy :regex-matching :source (list "BSD")})} + (is (valid-info= {"BSD-4-Clause" (list {:id "BSD-4-Clause" :type :concluded :confidence :low :strategy :regex-matching :source (list "MIT / BSD" "BSD")}) + "MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT / BSD" "MIT")})} (name->expressions-info "MIT / BSD")))) (testing "Some names from Clojars" (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 71bd4c4..d231f12 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -20,6 +20,8 @@ (:require [clojure.spec.alpha :as spec] [spdx.expressions :as sexp])) +(println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) + ; Here we hack up a "global once" function (def ^:private global-setup (delay ; Because java.util.logging is a hot mess @@ -29,7 +31,6 @@ ; Enable spec validation (spec/check-asserts true) - (println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) nil)) (defn fixture From 086a85c6d5937a4b55030d588c08fdd4cff4cb43 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Tue, 5 Sep 2023 22:32:52 -0700 Subject: [PATCH 25/34] :construction: Ongoing work on issue #3 --- src/lice_comb/impl/regex_matching.clj | 2 +- test/lice_comb/deps_test.clj | 6 ++++-- test/lice_comb/matching_test.clj | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index b6ac781..d45b2f2 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -245,7 +245,7 @@ :pad-ver? true :latest-ver "1.0"} {:id "BSD" - :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Licen[cs]e|Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" :fn bsd-id-constructor} {:id "CC0" :regex #"(?i)\bCC\s*0" diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index 09b9e92..ea07f87 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -131,8 +131,8 @@ (is (valid= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) (is (valid= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" - (is (valid= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (valid= #{"EPL-1.0" "LGPL-2.1-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) (is (valid= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) (is (valid= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) @@ -140,6 +140,7 @@ ; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) +(comment (deftest deps-expressions-test (testing "Nil and empty deps" (is (nil? (deps-expressions nil))) @@ -202,3 +203,4 @@ 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2"} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1"} 'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}))))) +) \ No newline at end of file diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index dc5b0df..7954ecf 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -129,6 +129,7 @@ (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) (is (valid= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License (BSD3)"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD License 3"))) (is (valid= #{"BSD-3-Clause-Attribution"} (name->expressions "BSD 3-Clause Attribution"))) (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD"))) (is (valid= #{"CC-BY-3.0"} (name->expressions "Attribution 3.0 Unported"))) From ed72a62c5ed63a018bc24015b547260eab7680fe Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Wed, 6 Sep 2023 10:29:24 -0700 Subject: [PATCH 26/34] :books: Add demo section to readme --- README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aba851c..c1d2f42 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # lice-comb -A Clojure library for software license detection. It does this by combing through `tools.deps` dependency maps, Maven POMs, directory structures & ZIP files, and attempting to detect what license(s) they contain. +A Clojure library for software _lice_nse detection. It does this by _comb_ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, and attempting to detect what SPDX license expression(s) they contain. This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). @@ -17,7 +17,7 @@ This library leverages, and is inspired by, the *excellent* [SPDX project](https * `lice-comb` (all versions) requires an internet connection. -* `lice-comb` (all versions) assumes Maven is installed and in the `PATH` (but has fallback logic if it isn't). +* `lice-comb` (all versions) assumes Maven is installed and in the `PATH` (but has fallback logic if it isn't available). * `lice-comb` (v2.0+) requires JDK 11 or higher. @@ -46,6 +46,59 @@ $ lein try com.github.pmonks/lice-comb $ deps-try com.github.pmonks/lice-comb ``` +### Demo + +```clojure +;; License name and full text matching +(require '[lice-comb.matching :as lcm]) + +(lcm/name->expressions "Apache") +;=> #{"Apache-2.0"} + +(lcm/name->expressions "The MIT license") +;=> #{"MIT"} + +(lcm/name->expressions "GNU Public License 2.0 w/ the GNU Classpath Exception") +;=> #{"GPL-2.0-only WITH Classpath-exception-2.0"} + +(lcm/text->ids (slurp "https://www.apache.org/licenses/LICENSE-2.0.txt")) +;=> #{"Apache-2.0"} + +;; License extraction from Maven poms, including ones that aren't locally downloaded +(require '[lice-comb.maven :as lcmvn]) + +(lcmvn/pom->expressions (str (System/getProperty "user.home") "/.m2/repository/org/clojure/clojure/1.11.1/clojure-1.11.1.pom")) +;=> #{"EPL-1.0"} + +(lcmvn/pom->expressions "https://repo1.maven.org/maven2/org/springframework/spring-core/6.0.11/spring-core-6.0.11.pom") +;=> #{"Apache-2.0"} + +;; License extraction from tools.deps dependency maps +(require '[lice-comb.deps :as lcd]) + +(lcd/dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.11.1"}]) +;=> #{"EPL-1.0"} + +;; Information about matches +(lcm/name->expressions-info "Apache-2.0") +;=> {"Apache-2.0" ({:type :declared, :strategy :spdx-expression, :source ("Apache-2.0")})} + +(lcm/name->expressions-info "GNU Public License 2.0 or later w/ the GNU Classpath Exception") +;=> {"GPL-2.0-or-later WITH Classpath-exception-2.0" +; ({:type :concluded, :confidence :low, :strategy :expression-inference, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception")} +; {:id "GPL-2.0-or-later", :type :concluded, :confidence :medium, :strategy :regex-matching, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception" +; "GNU Public License 2.0 or later")} +; {:id "Classpath-exception-2.0", :type :concluded, :confidence :low, :strategy :regex-matching, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception" +; "the GNU Classpath Exception" +; "Classpath Exception")})} + +(lcmvn/pom->expressions-info "https://repo.clojars.org/canvas/canvas/0.1.6/canvas-0.1.6.pom") +;=> {"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" +; ({:type :declared, :strategy :spdx-expression, :source ("https://repo.clojars.org/canvas/canvas/0.1.6/canvas-0.1.6.pom" +; "" +; "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0")})} +``` + ### API Documentation [API documentation is available here](https://pmonks.github.io/lice-comb/), or [here on cljdoc](https://cljdoc.org/d/com.github.pmonks/lice-comb/). From 75bd5a73b1fdb04251e99c60c3efb3f2e3b5f402 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Wed, 6 Sep 2023 10:35:18 -0700 Subject: [PATCH 27/34] :arrow_up: Upgrade dependencies --- .github/workflows/ci.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/docs.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55911ae..b0d33e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.0 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6ea8c91..efe2fb5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.0 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 58d3454..e38ad38 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.0 with: cli: latest - uses: actions/cache@v3 From d7bed89e2fa5d8e6bac8a42c2c7b126ba3afe04b Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Wed, 6 Sep 2023 20:24:10 -0700 Subject: [PATCH 28/34] :construction: Ongoing work on issue #3 --- resources/lice_comb/deps/fallbacks.edn | 5 -- resources/lice_comb/deps/overrides.edn | 3 -- resources/lice_comb/matching/aliases.edn | 62 ------------------------ src/lice_comb/deps.clj | 49 +++---------------- test/lice_comb/deps_test.clj | 6 +-- 5 files changed, 11 insertions(+), 114 deletions(-) delete mode 100644 resources/lice_comb/deps/fallbacks.edn delete mode 100644 resources/lice_comb/deps/overrides.edn delete mode 100644 resources/lice_comb/matching/aliases.edn diff --git a/resources/lice_comb/deps/fallbacks.edn b/resources/lice_comb/deps/fallbacks.edn deleted file mode 100644 index e4f612a..0000000 --- a/resources/lice_comb/deps/fallbacks.edn +++ /dev/null @@ -1,5 +0,0 @@ -{ -; borkdude/sci.impl.reflector {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/borkdude/sci/blob/master/LICENSE"} -; org.ow2.asm/asm {:spdx true :licenses #{"BSD-3-Clause"} :evidence "https://asm.ow2.io/license.html"} -; slipset/deps-deploy {:spdx true :licenses #{"EPL-1.0"} :evidence "https://github.com/slipset/deps-deploy/blob/master/LICENSE"} -} \ No newline at end of file diff --git a/resources/lice_comb/deps/overrides.edn b/resources/lice_comb/deps/overrides.edn deleted file mode 100644 index 6784832..0000000 --- a/resources/lice_comb/deps/overrides.edn +++ /dev/null @@ -1,3 +0,0 @@ -{ -; javax.mail/mail {:spdx true :licenses #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} :evidence "https://javaee.github.io/javamail/JavaMail-License"} -} \ No newline at end of file diff --git a/resources/lice_comb/matching/aliases.edn b/resources/lice_comb/matching/aliases.edn deleted file mode 100644 index a04832a..0000000 --- a/resources/lice_comb/matching/aliases.edn +++ /dev/null @@ -1,62 +0,0 @@ -; Important note: we can't use regexes as map keys as they don't implement equality / hash 🙄 -; Instead we use the string representation, and compile-on-demand+memoize - -; Note escaping of \, as these are string, not regex, literals -{ - "3-clause\\s+bsd\\s+license" #{"BSD-3-Clause"} - "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*1(\\.0)?" #{"Apache-1.0"} - "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*1\\.1" #{"Apache-1.1"} - "apache(\\s+software)?(\\s+license(s)?(\\s*[,-])?)?(\\s+v(ersion)?)?\\s*2(\\.0)?" #{"Apache-2.0"} - "apache(\\s+software)?(\\s+license(s)?)?" #{"Apache-1.0"} ; Assume earliest version - "attribution\\s+3\\.0\\s+unported" #{"CC-BY-3.0"} - "attribution\\s+4\\.0\\s+international" #{"CC-BY-4.0"} - "bouncy\\s+castle\\s+licence" #{"MIT"} ; See https://github.com/spdx/license-list-XML/issues/910 - "bsd\\s+3-clause\\s+attribution" #{"BSD-3-Clause-Attribution"} - "bsd\\s+3-clause\\s+license" #{"BSD-3-Clause"} - "bsd\\s+license\\s+3" #{"BSD-3-Clause"} - "cc0(\\s+1(\\.0)?)?(\\s+universal)?" #{"CC0-1.0"} - "cddl" #{"CDDL-1.0"} - "cddl/gplv2\\+ce" #{"CDDL-1.0" "GPL-2.0-with-classpath-exception"} - "cddl\\+gpl\\s+license" #{"CDDL-1.0" "GPL-2.0"} - "cddl\\s+1(\\.0)?\\+gpl\\s+license" #{"CDDL-1.1" "GPL-2.0"} - "cddl\\s+1\\.1\\+gpl\\s+license" #{"CDDL-1.1" "GPL-2.0"} - "cddl\\s+\\+\\s+gpl\\s*v2\\s+with\\s+classpath\\s+exception" #{"CDDL-1.0" "GPL-2.0-with-classpath-exception"} - "common\\s+development\\s+and\\s+distribution\\s+license(\\s+\\(cddl\\))?\\s+version\\s+1(\\.0|\\s+|\\z)" #{"CDDL-1.0"} - "common\\s+development\\s+and\\s+distribution\\s+license(\\s+\\(cddl\\))?\\s+version\\s+1\\.1" #{"CDDL-1.1"} - "copyright(\\s+\\(c\\)|©)?\\s+2011\\s+matthew\\s+lee\\s+hinman" #{"MIT"} - "copyright\\s+\\(c\\)\\s+2000-2012\\s+jason\\s+hunter\\s+\\&\\s+brett\\s+mclaughlin" #{"NON-SPDX-JDOM"} ; Note: not an SPDX license - "creative\\s+commons(\\s+legal\\s+code)?\\s+attribution\\s+3\\.0\\s+unported" #{"CC-BY-3.0"} - "creative\\s+commons\\s+attribution-sharealike\\s+4\\.0\\s+international\\s+public\\s+license" #{"CC-BY-SA-4.0"} - "cup\\s+parser\\s+generator\\s+copyright\\s+notice,\\s+license,\\s+and\\s+disclaimer" #{"MIT"} ; See https://www.apache.org/legal/resolved.html#category-a - "do\\s+what\\s+the\\s+fuck\\s+you\\s+want\\s+to\\s+public\\s+license" #{"WTFPL"} - "dual\\s+license\\s+consisting\\s+of\\s+the\\s+cddl\\s+v1(\\.0)?\\s+and\\s+gpl\\s+v2" #{"CDDL-1.0" "GPL-2.0"} - "dual\\s+license\\s+consisting\\s+of\\s+the\\s+cddl\\s+v1\\.1\\s+and\\s+gpl\\s+v2" #{"CDDL-1.1" "GPL-2.0"} - "eclipse\\s+distribution\\s+license\\s+-\\s+v\\s+1\\.0" #{"BSD-3-Clause"} ; See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration#Processed_License_Requests - "eclipse\\s+public\\s+license" #{"EPL-1.0"} ; Assume earliest version - "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*1(\\.0|\\s+|\\z)" #{"EPL-1.0"} - "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*1\\.1" #{"EPL-1.1"} - "eclipse\\s+public\\s+license(\\s*[,-])?\\s*v(ersion)?\\s*2(\\.0|\\s+|\\z)" #{"EPL-2.0"} - "json\\.org" #{"JSON"} - "gnu\\s+affero\\s+general\\s+public\\s+license" #{"AGPL-3.0"} ; Assume earliest version - "gnu\\s+affero\\s+general\\s+public\\s+license(\\s+v(ersion)?)?\\s+3" #{"AGPL-3.0"} - "gnu\\s+general\\s+public\\s+license" #{"GPL-1.0"} ; Assume earliest version - "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+1" #{"GPL-1.0"} - "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2" #{"GPL-2.0"} - "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2(\\s+\\(gpl2\\))?(\\s*[,-])?\\s+with\\s+the(\\s+gnu)?\\s+classpath\\s+exception" #{"GPL-2.0-with-classpath-exception"} - "gnu\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3" #{"GPL-3.0"} - "gnu\\s+lesser\\s+general\\s+public\\s+license" #{"LGPL-2.1"} ; Assume earliest version (note: "lesser" didn't appear until v2.1 - it was "library before that") - "gnu\\s+lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+2\\.1" #{"LGPL-2.1"} - "gnu\\s+lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3" #{"LGPL-3.0"} - "gnu\\s+library\\s+general\\s+public\\s+license" #{"LGPL-2.0"} ; There was only ever one version of the "GNU Library General Public License" (v2.0) - "Library" was renamed to "Lesser" as of v2.1 - "gwt\\s+terms" #{"Apache-2.0"} ; See http://www.gwtproject.org/terms.html - "lesser\\s+general\\s+public\\s+license(\\s*[,-])?(\\s+v(ersion)?)?\\s+3\\s+or\\s+greater" #{"LGPL-3.0"} - "mozilla\\s+public\\s+license" #{"MPL-1.0"} - "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+1(\\.0|\\s+|\\z)" #{"MPL-1.0"} - "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+1.1" #{"MPL-1.1"} - "mozilla\\s+public\\s+license(\\s+v(ersion)?)?\\s+2(\\.0|\\s+|\\z)" #{"MPL-2.0"} - "new\\s+bsd\\s+license" #{"BSD-3-Clause"} - "public\\s+domain" #{"LicenseRef-lice-comb-PUBLIC-DOMAIN"} - "similar\\s+to\\s+apache\\s+license\\s+but\\s+with\\s+the\\s+acknowledgment\\s+clause\\s+removed" #{"Plexus"} ; Note: see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - "the\\s+mx4j\\s+license(\\s*[,-])?\\s+version\\s+1\\.0" #{"Apache-1.1"} ; See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration#Processed_License_Requests - "this\\s+is\\s+free\\s+and\\s+unencumbered\\s+software\\s+released\\s+into\\s+the\\s+public\\s+domain\\." #{"Unlicense"} -} \ No newline at end of file diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index edfcb9c..f0e3cf5 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -21,33 +21,10 @@ dependencies in tools.deps lib-map format." (:require [clojure.string :as s] [dom-top.core :as dom] - [spdx.licenses :as sl] [lice-comb.maven :as lcmvn] [lice-comb.files :as lcf] - [lice-comb.impl.data :as lcd] [lice-comb.impl.expressions-info :as lciei])) -;####TODO: FIGURE OUT HOW TO HANDLE METADATA FOR OVERRIDES / FALLBACKS!!!! -;(def ^:private overrides-d (delay (lcd/load-edn-resource "lice_comb/deps/overrides.edn"))) -;(def ^:private fallbacks-d (delay (lcd/load-edn-resource "lice_comb/deps/fallbacks.edn"))) - -;(defn- check-overrides -; "Checks if an override should be used for the given dep" -; ([ga] (check-overrides ga nil)) -; ([ga v] -; (let [gav (symbol (str ga (when v (str "@" v))))] -; (:licenses (get @overrides-d gav (get @overrides-d ga)))))) ; Lookup overrides both with and without the version - -;(defn- check-fallbacks -;####TODO: UPDATE FOR license-info MAP RATHER THAN ID SET -; "Checks if a fallback should be used for the given dep, given the set of -; detected ids" -; [ga ids] -; (if (or (empty? ids) -; (every? #(not (sl/listed-id? %)) ids)) -; (:licenses (get @fallbacks-d ga {:licenses ids})) -; ids)) - (defn- normalise-dep "Normalises a dep, by removing any classifier suffixes from the artifact-id (e.g. the $blah suffix in com.foo/bar$blah)." @@ -83,26 +60,18 @@ (when dep (let [[ga info] (normalise-dep dep) [group-id artifact-id] (s/split (str ga) #"/") - version (:mvn/version info)] -; (if-let [override (check-overrides ga version)] -; override - (let [pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) - expressions ;(check-fallbacks ga - (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] - expressions - (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))));) ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) - ] - (lciei/prepend-source (dep->string dep) expressions)))));) + version (:mvn/version info) + pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) + expressions (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] + expressions + (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) + (lciei/prepend-source (dep->string dep) expressions)))) (defmethod dep->expressions-info :deps [dep] (when dep - (let [[ga info] (normalise-dep dep) - version (:git/sha info)] -; (if-let [override (check-overrides ga version)] -; override -; (check-fallbacks ga - (lciei/prepend-source (dep->string dep) (lcf/dir->expressions-info (:deps/root info))))));)) + (let [[_ info] (normalise-dep dep)] + (lciei/prepend-source (dep->string dep) (lcf/dir->expressions-info (:deps/root info)))))) (defmethod dep->expressions-info nil [_]) @@ -140,6 +109,4 @@ [] (lcmvn/init!) (lcf/init!) -; @overrides-d -; @fallbacks-d nil) diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index ea07f87..d550ba9 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -127,9 +127,9 @@ (is (valid= #{(lcis/public-domain)} (dep->expressions ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) (is (valid= #{"Apache-2.0"} (dep->expressions ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) (is (valid= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) - (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" - (is (valid= #{"EPL-1.0"} (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) - (is (valid= #{"EPL-1.0"} (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) + (testing "Valid deps - no licenses in deployed artifacts" + (is (nil? (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) + (is (nil? (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) From 194ac22fe5835d2b2b924fc3d61ea08b8096258b Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 7 Sep 2023 17:51:10 -0700 Subject: [PATCH 29/34] :construction: Ongoing work on issue #3 --- src/lice_comb/lein.clj | 12 ++++--- test/lice_comb/lein_test.clj | 70 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 test/lice_comb/lein_test.clj diff --git a/src/lice_comb/lein.clj b/src/lice_comb/lein.clj index 301203d..439c109 100644 --- a/src/lice_comb/lein.clj +++ b/src/lice_comb/lein.clj @@ -25,11 +25,11 @@ (defn- lein-dep->toolsdeps-dep "Converts a leiningen style dependency vector into a (partial) tools.deps style - dependency map. This is partial in that just enough of the tools.deps style - map is constructed for lice-comb.deps to function." + dependency MapEntry. This is partial in that just enough of the tools.deps style + info map (in the value) is constructed for lice-comb.deps to function." [[ga version :as dep]] (when dep - (hash-map ga {:mvn/version version :deps/manifest :mvn}))) ;####TODO: Synthesise :paths key (for paths to JAR files) + [ga {:mvn/version version :deps/manifest :mvn}])) ;####TODO: Synthesise :paths key (for paths to JAR files) (defn dep->expressions-info "Attempt to detect the SPDX license expression(s) (a map) in a Leiningen @@ -52,7 +52,8 @@ that is the Leiningen dep, and the value is the lice-comb expressions-info map for that dep." [deps] - (into {} (dom/real-pmap #(vec [% (dep->expressions-info %)]) deps))) + (when deps + (into {} (dom/real-pmap #(vec [% (dep->expressions-info %)]) deps)))) (defn deps->expressions "Attempt to detect all of the SPDX license expression(s) in a Leiningen style @@ -60,7 +61,8 @@ that is the Leiningen dep, and the value is the set of SPDX license expression(s) for that dep." [deps] - (into {} (dom/real-pmap #(vec [% (dep->expressions %)]) deps))) + (when deps + (into {} (dom/real-pmap #(vec [% (dep->expressions %)]) deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/test/lice_comb/lein_test.clj b/test/lice_comb/lein_test.clj new file mode 100644 index 0000000..86fb625 --- /dev/null +++ b/test/lice_comb/lein_test.clj @@ -0,0 +1,70 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.lein-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.lein :refer [dep->expressions deps->expressions]])) + +(use-fixtures :once fixture) + +; We keep these short, as this is basically just a thin wrapper around lice-comb.deps +(deftest dep->ids-tests + (testing "Nil deps" + (is (nil? (dep->expressions nil)))) + (testing "Invalid deps" + (is (nil? (dep->expressions ['com.github.pmonks/invalid-project "0.0.1"]))) ; Invalid GA + (is (nil? (dep->expressions ['org.clojure/clojure "1.0.0-SNAPSHOT"])))) ; Invalid V + (testing "Valid deps - single license" + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat "1.0.12"]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure "1.10.3"]))) + (is (= #{"BSD-4-Clause"} (dep->expressions ['org.ow2.asm/asm "5.2"]))) + (is (= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance "1.0"]))) + (is (= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation "1.1.1"]))) + (is (= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa "0.3.0"]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal "4.0.250"]))) + (is (= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java "1.0.0"])))) + (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" + (is (nil? (dep->expressions ['slipset/deps-deploy "0.2.0"]))) + (is (nil? (dep->expressions ['borkdude/sci.impl.reflector "0.0.1"])))) + (testing "Valid deps - multi license" + (is (= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-classic "1.2.7"]))) ; Note: implies LGPL-2.1-only, but name is ambiguous + (is (= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} + (dep->expressions ['javax.mail/mail "1.4.7"])))) + (testing "Valid deps - Maven classifiers" +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native "1.3.11}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native "1.3.11"]))))) + +; Note: we can't use valid= or valid-info= here, since the results from deps->expressions are unique +(deftest deps-expressions-test + (testing "Nil and empty deps" + (is (nil? (deps->expressions nil))) + (is (= {} (deps->expressions [])))) + (testing "Single deps" + (is (= {['org.clojure/clojure "1.10.3"] #{"EPL-1.0"}} + (deps->expressions [['org.clojure/clojure "1.10.3"]])))) + (testing "Multiple deps" + (is (= {'[org.clojure/clojure "1.10.3"] #{"EPL-1.0"} + '[org.clojure/spec.alpha "0.2.194"] #{"EPL-1.0"} + '[cheshire/cheshire "5.10.1"] #{"MIT"} + '[com.fasterxml.jackson.core/jackson-core "2.12.4"] #{"Apache-2.0"}} + (deps->expressions [['org.clojure/clojure "1.10.3"] + ['org.clojure/spec.alpha "0.2.194"] + ['cheshire/cheshire "5.10.1"] + ['com.fasterxml.jackson.core/jackson-core "2.12.4"]]))))) From ebc44be6c76d6c4fd1a1d9114e0d03f8385331fb Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 7 Sep 2023 19:03:07 -0700 Subject: [PATCH 30/34] :books: Updates to demo --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c1d2f42..e8dbc02 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,12 @@ $ deps-try com.github.pmonks/lice-comb ;; License name and full text matching (require '[lice-comb.matching :as lcm]) +; Initialise the matching namespace +; Notes: +; 1. This is slow (takes ~1 minute on my laptop), almost all of which is Spdx-Java-Library's initialisation (see https://github.com/spdx/Spdx-Java-Library/issues/193) +; 2. This step is optional, though initialisation will still happen regardless, and when it does you'll incur the same cost +(lcm/init!) + (lcm/name->expressions "Apache") ;=> #{"Apache-2.0"} @@ -79,7 +85,8 @@ $ deps-try com.github.pmonks/lice-comb (lcd/dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.11.1"}]) ;=> #{"EPL-1.0"} -;; Information about matches +;; Information about matches (useful for better understanding how lice-comb arrived at a given set of expressions, and +;; how confident it is in the values it's providing) (lcm/name->expressions-info "Apache-2.0") ;=> {"Apache-2.0" ({:type :declared, :strategy :spdx-expression, :source ("Apache-2.0")})} From 82847fb2fc4fcd2f57ebefe21159cc8155e98de2 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 7 Sep 2023 22:44:32 -0700 Subject: [PATCH 31/34] :art: Clean up fn names and locations and docstrings --- README.md | 16 ++++ resources/lice_comb/names.edn | 4 +- src/lice_comb/deps.clj | 39 ++++----- src/lice_comb/files.clj | 72 ++++++++-------- src/lice_comb/impl/expressions_info.clj | 31 ++++++- src/lice_comb/impl/http.clj | 37 ++++++++ src/lice_comb/impl/matching.clj | 109 +++++++++--------------- src/lice_comb/impl/regex_matching.clj | 36 ++++---- src/lice_comb/impl/utils.clj | 2 +- src/lice_comb/lein.clj | 4 +- src/lice_comb/matching.clj | 80 +++++++++++------ src/lice_comb/maven.clj | 69 +++------------ src/lice_comb/utils.clj | 16 ++-- test/lice_comb/matching_test.clj | 19 ++++- 14 files changed, 291 insertions(+), 243 deletions(-) diff --git a/README.md b/README.md index e8dbc02..8181ee0 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,22 @@ $ deps-try com.github.pmonks/lice-comb ; ({:type :declared, :strategy :spdx-expression, :source ("https://repo.clojars.org/canvas/canvas/0.1.6/canvas-0.1.6.pom" ; "" ; "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0")})} + +;; Pretty print expressions-info +(require '[lice-comb.utils :as lcu]) + +(println (lcu/expressions-info->string (lcd/dep->expressions-info ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) +;=> Apache-2.0: +; Concluded +; Confidence: high +; Strategy: regular expression matching +; Source: +; > com.amazonaws/aws-java-sdk-s3@1.12.129 +; > https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk-s3/1.12.129/aws-java-sdk-s3-1.12.129.pom +; > https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk-pom/1.12.129/aws-java-sdk-pom-1.12.129.pom +; > +; > Apache License, Version 2.0 +nil ``` ### API Documentation diff --git a/resources/lice_comb/names.edn b/resources/lice_comb/names.edn index eaa163d..1511f80 100644 --- a/resources/lice_comb/names.edn +++ b/resources/lice_comb/names.edn @@ -1,6 +1,6 @@ -; Map of name values seen in the wild that are too ambiguous or cursed to support any reasonable form of automated parsing +; Map of name values seen in the wild that are too ambiguous / cursed to support any reasonable form of automated parsing { - ;Seen in https://repo.maven.apache.org/maven2/com/sun/mail/all/1.4.7/all-1.4.7.pom + ; Seen in https://repo.maven.apache.org/maven2/com/sun/mail/all/1.4.7/all-1.4.7.pom "GPLv2+CE" {"GPL-2.0-only WITH Classpath-exception-2.0" ({:type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE")} {:id "GPL-2.0-only" :type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE" "GPLv2")} diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index f0e3cf5..3483cec 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -17,12 +17,13 @@ ; (ns lice-comb.deps - "Functionality related to finding and determining license information from - dependencies in tools.deps lib-map format." + "Functionality related to combing tools.deps dependency maps and lib maps for + license information." (:require [clojure.string :as s] [dom-top.core :as dom] [lice-comb.maven :as lcmvn] [lice-comb.files :as lcf] + [lice-comb.impl.http :as lcihttp] [lice-comb.impl.expressions-info :as lciei])) (defn- normalise-dep @@ -46,12 +47,9 @@ (str ga "@" (:git/sha info) (when-let [tag (:git/tag info)] (str "/" tag)))) (defmulti dep->expressions-info - "Attempt to detect the SPDX license expression(s) (a map) in a tools.deps - style dep (a MapEntry or two-element sequence of - `['groupId/artifactId dep-info]`). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + "Returns an expressions-info map for the given tools.dep dep (a MapEntry or + two-element vector of `['groupId/artifactId dep-info]`), or nil if no + expressions were found." {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) @@ -61,7 +59,7 @@ (let [[ga info] (normalise-dep dep) [group-id artifact-id] (s/split (str ga) #"/") version (:mvn/version info) - pom-uri (lcmvn/pom-uri-for-gav group-id artifact-id version) + pom-uri (lcihttp/gav->pom-uri group-id artifact-id version) expressions (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] expressions (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) @@ -82,24 +80,27 @@ {:dep dep}))) (defn dep->expressions - "Attempt to detect the SPDX license expression(s) (a set) in a tools.deps - style dep (a MapEntry or two-element sequence of - `[groupId/artifactId dep-info]`). - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + "Returns a set of SPDX expressions (Strings) for the given tools.dep dep (a + MapEntry or two-element vector of `['groupId/artifactId dep-info-map]`), or + nil if no expressions were found." [dep] (some-> (dep->expressions-info dep) keys set)) (defn deps-expressions - "Attempt to detect the SPDX license expression(s) in a tools.deps 'lib map', - returning a new lib map with the licenses assoc'ed in (in key - `:lice-comb/license-info`)" + "Takes a tools.dep lib map and returns a new lib map with an expressions-info + map assoc'ed into each dep's info map, in key `:lice-comb/license-info`. + If no license information was found for a given dep, the lib map entry for + that dep will be returned unchanged (it will not have the + `:lice-comb/license-info` key in the info map)." [deps] (when deps - (into {} (dom/real-pmap #(let [[k v] %] [k (assoc v :lice-comb/license-info (dep->expressions-info [k v]))]) deps)))) + (into {} (dom/real-pmap #(if-let [expressions-info (dep->expressions-info %)] + (let [[k v] %] + [k (assoc v :lice-comb/license-info expressions-info)]) + %) + deps)))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index ee8e96f..8d286e8 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -17,8 +17,8 @@ ; (ns lice-comb.files - "Functionality related to finding and determining license information from - files and directories." + "Functionality related to combing files, directories, and ZIP format archives + for license information." (:require [clojure.string :as s] [clojure.java.io :as io] [lice-comb.matching :as lcmtch] @@ -50,11 +50,13 @@ set))) (defn file->expressions-info - "Attempts to determine the SPDX license expression(s) (a map) from the given - file (an InputStream or something that can have an io/input-stream opened on - it). If an InputStream is provided, it must already be open and the associated - filepath must be provided as the second parameter (it is optional in other - cases)." + "Returns an expressions-info map for the given file (an InputStream or + something that can have an io/input-stream opened on it), or nil if no + expressions were found. + + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." ([f] (file->expressions-info f (lciu/filepath f))) ([f filepath] (when (lciu/readable-file? f) @@ -67,11 +69,13 @@ :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is))))))))) ; Default is to assume it's a plain text file containing license text(s) (defn file->expressions - "Attempts to determine the SPDX license expression(s) (a set) from the given - file (an InputStream or something that can have an io/input-stream opened on - it). If an InputStream is provided, it must already be open and the associated - filepath should also be provided as the second parameter (it is optional in - other cases)." + "Returns a set of SPDX expressions (Strings) for the given file (an + InputStream or something that can have an io/input-stream opened on it), or + nil if no expressions were found. + + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." ([f] (file->expressions f (lciu/filepath f))) ([f filepath] (some-> (file->expressions-info f filepath) @@ -79,11 +83,11 @@ set))) (defn zip->expressions-info - "Attempt to detect the SPDX license expression(s) (a map) in a ZIP file. zip may be a - String or a java.io.File, both of which must refer to a ZIP-format compressed - file. + "Returns an expressions-info map for the given ZIP file (a String or a File, + which must refer to a ZIP-format compressed file), or nil if no expressions + were found. - Throws on invalid zip format file." + Throws if the file is not a valid ZIP." [zip] (when (lciu/readable-file? zip) (let [zip-file (io/file zip)] @@ -99,20 +103,20 @@ (when-not (empty? result) (lciei/prepend-source (lciu/filepath zip-file) result)))))))) (defn zip->expressions - "Attempt to detect the SPDX license expression(s) (a set) in a ZIP file. zip may be a - String or a java.io.File, both of which must refer to a ZIP-format compressed - file. + "Returns a set of SPDX expressions (Strings) for the given ZIP file (a String + or a File, which must refer to a ZIP-format compressed file), or nil if no + expressions were found. - Throws on invalid zip format file." + Throws if the file is not a valid ZIP." [zip] (some-> (zip->expressions-info zip) keys set)) (defn- zip-compressed-files - "Returns all probable ZIP compressed files in the given directory, - recursively, as a set of java.io.File objects. dir may be a String or a - java.io.File, either of which must refer to a readable directory." + "Returns a set of all probable ZIP compressed files (Files) in the given + directory, recursively, or nil if there are none. dir may be a String or a + java.io.File, and must refer to a readable directory." [dir] (when (lciu/readable-dir? dir) (some-> (seq (filter #(and (.isFile ^java.io.File %) @@ -122,16 +126,13 @@ set))) (defn dir->expressions-info - "Attempt to detect the SPDX license expression(s) (a set) in a directory. dir - may be a String or a java.io.File, both of which must refer to a - readable directory. + "Returns an expressions-info map for the given dir (a String or a File, + which must refer to a readable directory), or nil if no expressions were + found. The optional `opts` map has these keys: * `include-zips?` (boolean, default false) - controls whether zip compressed - files found in the directory are included in the scan or not - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + files found in the directory are recursively included in the scan or not" ([dir] (dir->expressions-info dir nil)) ([dir {:keys [include-zips?] :or {include-zips? false}}] (when (lciu/readable-dir? dir) @@ -143,16 +144,13 @@ file-expressions)))))) (defn dir->expressions - "Attempt to detect the SPDX license expression(s) (a map) in a directory. dir - may be a String or a java.io.File, both of which must refer to a - readable directory. + "Returns a set of SPDX expressions (Strings) for the given dir (a String or + a File, which must refer to a readable directory), or nil if no expressions + were found. The optional `opts` map has these keys: * `include-zips?` (boolean, default false) - controls whether zip compressed - files found in the directory are included in the scan or not - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + files found in the directory are recursively included in the scan or not" ([dir] (dir->expressions dir nil)) ([dir opts] (some-> (dir->expressions-info dir opts) diff --git a/src/lice_comb/impl/expressions_info.clj b/src/lice_comb/impl/expressions_info.clj index f7bb77b..4346cc3 100644 --- a/src/lice_comb/impl/expressions_info.clj +++ b/src/lice_comb/impl/expressions_info.clj @@ -22,8 +22,8 @@ (:require [clojure.string :as s])) (defn prepend-source - "Prepends the given source s (a String) onto all metadata sub-maps in m (a - lice-comb expressions-info map)." + "Prepends the given source s (a String) onto the :source sequence of all + expression-info sub-maps in m (an expressions-info map)." [s m] (if (or (s/blank? s) (empty? m)) m @@ -40,11 +40,34 @@ m)))) (defn merge-maps - "Merges any number of lice-comb expressions-info maps, by concatenating and - de-duping values for the same key (expression)." + "Merges any number of expressions-info maps, by concatenating and de-duping + values for the same key (expression)." [& maps] (let [maps (filter identity maps)] (when-not (empty? maps) (let [grouped-maps (group-by first (mapcat identity maps))] (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) (keys grouped-maps))))))) + +(def ^:private confidence-sort { + :low 0 + :medium 1 + :high 2}) + +(defn sort-confidences + "Sorts a sequence of confidences from low to high." + [cs] + (when cs + (sort-by confidence-sort cs))) + +(defn lowest-confidence + "Returns the lowest confidence in a sequence of confidences." + [cs] + (when cs + (first (sort-confidences cs)))) + +(defn highest-confidence + "Returns the highest confidence in a sequence of confidences." + [cs] + (when cs + (last (sort-confidences cs)))) diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj index 935c109..49083bf 100644 --- a/src/lice_comb/impl/http.clj +++ b/src/lice_comb/impl/http.clj @@ -21,6 +21,7 @@ the public API of lice-comb and may change without notice." (:require [clojure.string :as s] [clojure.java.io :as io] + [clojure.java.shell :as sh] [hato.client :as hc] [lice-comb.impl.utils :as lciu])) @@ -74,6 +75,41 @@ (catch Exception _ nil)))) +(def ^:private local-maven-repo-d + (delay + (try + ; The command: + ; mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout + ; determines where the local repository is located. + (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] + (if (zero? (:exit sh-result)) + (s/trim (:out sh-result)) + (str (System/getProperty "user.home") "/.m2/repository"))) + (catch java.io.IOException _ + (str (System/getProperty "user.home") "/.m2/repository"))))) + +; TODO: make this configurable +(def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) + +(defn gav->pom-uri + "Returns a java.net.URI pointing to the POM for the given GAV (a map), or nil + if one cannot be found. The returned URI is guaranteed to be resolvable - + either to a file that exists in the local Maven cache, or to an HTTP- + accessible resource on a remote Maven repository (i.e. Maven Central or + Clojars) that resolves." + ([{:keys [group-id artifact-id version]}] (gav->pom-uri group-id artifact-id version)) + ([group-id artifact-id version] + (when (and (not (s/blank? group-id)) + (not (s/blank? artifact-id)) + (not (s/blank? version))) + (let [gav-path (str (s/replace group-id "." "/") "/" artifact-id "/" version "/" artifact-id "-" version ".pom") + local-pom (io/file (str @local-maven-repo-d "/" gav-path))] + (if (and (.exists local-pom) + (.isFile local-pom)) + (.toURI local-pom) + (when-let [remote-uri (first (filter uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))] + (java.net.URI. remote-uri))))))) + (defn init! "Initialises this namespace upon first call (and does nothing on subsequent calls), returning nil. Consumers of this namespace are not required to call @@ -81,4 +117,5 @@ allow explicit control of the cost of initialisation to callers who need it." [] @http-client-d + @local-maven-repo-d nil) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index f98dc5c..d6a623b 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -109,19 +109,8 @@ fix-mpl-2)) (defmulti text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a map) - within the given license text (a String, Reader, InputStream, or something - that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). - The result has metadata attached that describes how the identifiers were - determined. - - Notes: - * this function implements the SPDX matching guidelines (via clj-spdx). - See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ - * the caller is expected to open & close a Reader or InputStream passed to - this function (e.g. using clojure.core/with-open) - * you cannot pass a String representation of a filename to this method - you - should pass filenames through clojure.java.io/file first" + "Returns an expressions-map for the given license text, or nil if no matches + are found." {:arglists '([text])} type) @@ -151,55 +140,30 @@ (text->ids r)))) (defn uri->ids - "Returns the SPDX license and/or exception identifiers (a map) for the given - uri, or nil if there aren't any. It does this via two steps: - 1. Seeing if the given URI is in the license or exception list, and returning - the ids of the associated licenses and/or exceptions if so - 2. Attempting to retrieve the plain text content of the given URI and - performing full SPDX license matching on the result if there was one - - Notes on step 1: - 1. this does not perform exact matching; rather it simplifies URIs in various - ways to avoid irrelevant differences, including performing a - case-insensitive comparison, ignoring protocol differences (http vs https), - ignoring extensions representing MIME types (.txt vs .html, etc.), etc. - See lice-comb.impl.utils/simplify-uri for exact details. - 2. URIs in the SPDX license and exception lists are not unique - the same URI - may represent multiple licenses and/or exceptions. - - The result has metadata attached that describes how the identifiers were - determined." + "Returns an expressions-map for the given license uri, or nil if no matches + are found." [uri] (when-not (s/blank? uri) (lciei/prepend-source uri (manual-fixes (let [suri (lciu/simplify-uri uri)] - ; 1. see if the URI string matches any of the URIs in the SPDX license list (using "simplified" URIs) - (if-let [ids (get @lcis/index-uri-to-id-d suri)] - (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids)) - ; 2. attempt to retrieve the text/plain contents of the uri and perform full license matching on it + (or ; 1. Does the simplified URI match any of the simplified URIs in the SPDX license or exception lists? + (when-let [ids (get @lcis/index-uri-to-id-d suri)] + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids))) + + ; 2. attempt to retrieve the text/plain contents of the uri and perform license text matching on it (when-let [license-text (lcihttp/get-text uri)] (when-let [ids (text->ids license-text)] ids)))))))) (defn- string->ids-info - "Converts the given String into a sequence of singleton maps (NOT A LICE-COMB - EXPRESSION INFO MAP!), each of which has a key is that is an SPDX identifier - (either a listed SPDX license or exception id), and whose value is a list of - meta-information about how that identifier was found. The result sequence is - ordered in the same order of appearance as the source values in s. - - If no listed SPDX license or exception identifiers are found, returns a - singleton sequence containing a map with a lice-comb specific 'unlisted' - LicenseRef. - - This involves: - 1. Seeing if it's a listed license or exception id - 2. Seeing if it's a listed license or exception name - 3. Checking if the value is a URI, and if so performing URI matching on it - 4. Using regexes to attempt to identify the license(s) and/or - exception(s) - 5. Returning a lice-comb specific 'unlisted' LicenseRef" + "Converts the given string (a fragment of a license name) into a sequence of + singleton expressions-info maps (one per expression), ordered in the same + order of appearance as they appear in s. + + If no listed SPDX license or exception identifiers are found in s, returns a + sequence containing a single expressions-info map with a lice-comb specific + 'unlisted' LicenseRef that encodes s." [s] (when-not (s/blank? s) (let [s (s/trim s) @@ -232,8 +196,8 @@ (seq (filter #(or (not (string? %)) (not (s/blank? %))) coll)))) (defn- map-split-and-interpose - "Maps over the given sequence, splitting strings using the given regex - and interposing the given value, returning a (flattened) sequence." + "Maps over the given sequence, splitting strings using the given regex re and + interposing the given value int, returning a (flattened) sequence." [re int coll] (mapcat #(if-not (string? %) [%] @@ -259,6 +223,14 @@ (def ^:private push conj) ; With lists-as-stacks conj == push +(defn- calculate-confidence-for-expression + "Calculate the confidence for an expression, as the lowest confidence in the + expression-infos for the identifiers that make up the expression" + [expression-infos] + (if-let [confidence (lciei/lowest-confidence (filter identity (map :confidence expression-infos)))] + confidence + :high)) ; For when none of the components have a confidence (i.e. they're all :type :declared) + (defn- process-expression-element "Processes a single new expression element e (either a keyword representing an SPDX operator, or a map representing an SPDX identifier) in the context of @@ -273,6 +245,7 @@ (case (count (take-while keyword? s)) ; No keywords? Push e onto s 0 (push s e) + ; One keyword? See if we should "collapse" the prior value, the keyword and e into an SPDX expression fragment and push the result onto s 1 (let [kw (peek s) operator (s/upper-case (name kw)) @@ -283,17 +256,19 @@ (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s (se/listed-id? (first (keys e)))) - (let [k (s/join " " [(first (keys prior)) operator (first (keys e))]) - v (distinct (concat (list {:type :concluded :confidence :low :strategy :expression-inference}) - (first (vals prior)) - (first (vals e))))] + (let [k (s/join " " [(first (keys prior)) operator (first (keys e))]) + expression-infos (concat (first (vals prior)) (first (vals e))) + v (distinct (concat (list {:type :concluded :confidence (calculate-confidence-for-expression expression-infos) :strategy :expression-inference}) + expression-infos))] (push s-minus-2 {k v})) (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on + ; Many keywords? That's invalid (since we dedupe them when they get pushed on, so this means they're different), so drop all of them and push e onto s (push (drop-while keyword? s) e)))) -(defn- build-spdx-expressions-map - "Builds a single SPDX expressions map from the given list of keywords and SPDX expession maps." +(defn- build-expressions-info-map + "Builds an expressions-info map from the given sequence of keywords and SPDX + expression maps." [l] (loop [result '() f (first l) @@ -302,11 +277,8 @@ (recur (process-expression-element result f) (first r) (rest r)) (manual-fixes (into {} result))))) -(defn attempt-to-build-expressions - "Attempts to build SPDX expression(s) (a map) from the given name. - - The keys in the maps are the detected SPDX license and exception identifiers, - and each value contains information about how that identifiers was determined." +(defn name->expressions-info + "Returns an expressions-info map for the given license name." [name] (when-not (s/blank? name) (let [name (s/trim name)] @@ -314,17 +286,14 @@ (or ; 1. Is it a cursed name? (get @cursed-names-d name) - ; 2. Attempt to construct an SPDX expression from the name + ; 2. Construct an expressions-info map from the name (some->> (split-on-operators name) (drop-while keyword?) (lc3/rdrop-while keyword?) (map #(if (keyword? %) % (string->ids-info %))) flatten - (filter identity) - (drop-while keyword?) - (lc3/rdrop-while keyword?) seq - build-spdx-expressions-map)))))) + build-expressions-info-map)))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj index d45b2f2..afbb302 100644 --- a/src/lice_comb/impl/regex_matching.clj +++ b/src/lice_comb/impl/regex_matching.clj @@ -61,12 +61,14 @@ [m] (when m (let [version (get-rencgs m ["version"]) - confidence (if (or (and (s/blank? version) - (not (s/blank? (:latest-ver m)))) - (and (:pad-ver? m) - (not (s/includes? version ".")))) - :low ; We required a version but either didn't get one or it was incomplete - :medium) ; We didn't require a version, or it was complete + confidence (if (s/blank? (:latest-ver m)) + :high ; We didn't need a version + (if (s/blank? version) + :low ; Version not provided at all + (if (and (:pad-ver? m) + (not (s/includes? version "."))) + :medium ; We got a partial version + :high))) ; We got a full version version (if (s/blank? version) (:latest-ver m) version) @@ -101,9 +103,9 @@ clause-count1 clause-count2)) [clause-count confidence] (case preferred-clause-count - ("2" "simplified") ["2" :medium] - ("3" "new" "revised" "modified" "aduna") ["3" :medium] - ("4" "original") ["4" :medium] + ("2" "simplified") ["2" :high] + ("3" "new" "revised" "modified" "aduna") ["3" :high] + ("4" "original") ["4" :high] ["4" :low]) ; Note: we default to 4 clause, since it was the original form of the BSD license suffix (case (get-rencgs m ["suffix"]) "patent" "Patent" @@ -124,7 +126,7 @@ id-with-suffix (str base-id "-" suffix)] (if (contains? @lcis/license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it [id-with-suffix confidence] - [(assert-listed-id base-id) confidence]))) + [(assert-listed-id base-id) (if (= confidence :low) :low :medium)]))) ; The suffix we got wasn't valid, which knocks down confidence (defn- cc-id-constructor "An SPDX id constructor specific to the Creative Commons family of licenses." @@ -134,10 +136,11 @@ sa? (not (s/blank? (get-rencgs m ["sharealike"]))) version (get-rencgs m ["version"] "") version (s/replace version #"\p{Punct}+" ".") - confidence (if (or (s/blank? version) - (not (s/includes? version "."))) + confidence (if (s/blank? version) :low - :medium) + (if (s/includes? version ".") + :high + :medium)) version (if (s/blank? version) (:latest-ver m) version) @@ -173,10 +176,11 @@ (contains? m "gpl") "GPL") version (get-rencgs m ["version"] "") version (s/replace version #"\p{Punct}+" ".") - confidence (if (or (s/blank? version) - (not (s/includes? version "."))) + confidence (if (s/blank? version) :low - :medium) + (if (s/includes? version ".") + :high + :medium)) version (if (s/blank? version) (:latest-ver m) version) diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index ed0686c..4266011 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -141,7 +141,7 @@ [_]) (defmethod readable-file? java.io.File - [f] + [^java.io.File f] (and f (.exists f) (.canRead f) diff --git a/src/lice_comb/lein.clj b/src/lice_comb/lein.clj index 439c109..edc56ca 100644 --- a/src/lice_comb/lein.clj +++ b/src/lice_comb/lein.clj @@ -17,8 +17,8 @@ ; (ns lice-comb.lein - "Functionality related to finding and determining license information from - dependencies in Leiningen's dependency vector format." + "Functionality related to combing Leiningen dependency sequences for license + information." (:require [dom-top.core :as dom] [lice-comb.deps :as lcd] [lice-comb.impl.expressions-info :as lciei])) diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj index c0d614c..97579bd 100644 --- a/src/lice_comb/matching.clj +++ b/src/lice_comb/matching.clj @@ -17,8 +17,39 @@ ; (ns lice-comb.matching - "Matching functionality, some of which is provided by - https://github.com/pmonks/clj-spdx" + "The core matching functionality within lice-comb. Matching is provided for + three categories of input, and uses a different process for each: + 1. License names + 2. License uris + 3. License texts + + Each matching fn has two variants: + 1. A 'simple' version that returns a set of SPDX expressions (Strings) + 2. An 'info' version that returns an 'expressions-info map' + + An expressions-info map has this structure: + * key: an SPDX expression (String), which may be a single SPDX license + identifier) + * value: a sequence of 'expression-info' maps + + Each lice-comb expression-info map has this structure: + * :id (String, optional): + The SPDX identifier within the expression that this info map refers to. + * :type (either :declared or :concluded, mandatory): + Whether this identifier was unambiguously declared within the input or + was instead concluded by lice-comb (see SPDX specification for more detail + on the definition of these two terms). + * :confidence (one of: :high, :medium, :low, only provided when :type = :concluded): + Indicates the approximate confidence lice-comb has in its conclusions for + this particular SPDX identifier. + * :strategy (a keyword, mandatory): + The strategy lice-comb used to determine this particular SPDX identifier. + See the source for lice-comb.utils for an up-to-date list of all possible + values. + * :source (a sequence of Strings): + The list of sources used to arrive at this SPDX identifier, starting from + the most general (the input) to the most specific (the smallest subset of + the input that was used to make this determination)." (:require [clojure.string :as s] [spdx.licenses :as sl] [spdx.exceptions :as se] @@ -57,12 +88,9 @@ :else id))) (defn text->ids-info - "Attempts to determine the SPDX license and/or exception identifier(s) (a map) - within the given license text (a String, Reader, InputStream, or something - that is accepted by clojure.java.io/reader - File, URL, URI, Socket, etc.). - - The keys in the maps are the detected SPDX license and exception identifiers, - and each value contains information about how that identifiers was determined. + "Returns an expressions-info map for the given license text (a String, Reader, + InputStream, or something that is accepted by clojure.java.io/reader - File, + URL, URI, Socket, etc.), or nil if no expressions were found. Notes: * this function implements the SPDX matching guidelines (via clj-spdx). @@ -70,18 +98,15 @@ * the caller is expected to open & close a Reader or InputStream passed to this function (e.g. using clojure.core/with-open) * you cannot pass a String representation of a filename to this method - you - should pass filenames through clojure.java.io/file first - - The result has metadata attached that describes how the identifiers were - determined." + should pass filenames through clojure.java.io/file (or similar) first" [text] (lcim/text->ids text)) (defn text->ids - "Attempts to determine the SPDX license and/or exception identifier(s) (a set - of Strings) within the given license text (a String, Reader, InputStream, or - something that is accepted by clojure.java.io/reader - File, URL, URI, Socket, - etc.). + "Returns a set of SPDX expressions (Strings) for the given license text (a + String, Reader, InputStream, or something that is accepted by + clojure.java.io/reader - File, URL, URI, Socket, etc.), or nil if no + expressions were found. Notes: * this function implements the SPDX matching guidelines (via clj-spdx). @@ -89,17 +114,22 @@ * the caller is expected to open & close a Reader or InputStream passed to this function (e.g. using clojure.core/with-open) * you cannot pass a String representation of a filename to this method - you - should pass filenames through clojure.java.io/file first - - The result has metadata attached that describes how the identifiers were - determined." + should pass filenames through clojure.java.io/file (or similar) first" [text] (some-> (text->ids-info text) keys set)) (defn uri->ids-info - "Returns the SPDX license and/or exception identifiers (a map) for the given + "Returns an exceptions-info map for the given license uri (a String, URL, or + URI). + + Notes: + * This is done + + + + Returns the SPDX license and/or exception identifiers (a map) for the given uri, or nil if there aren't any. It does this via two steps: 1. Seeing if the given URI is in the license or exception list, and returning the ids of the associated licenses and/or exceptions if so @@ -142,10 +172,10 @@ set)) (defn name->expressions-info - "Attempts to determine the SPDX license expression(s) (a map) from the given - 'license name' (a String), or nil if there aren't any. This involves: + "Returns a lice-comb expressions-info map for the given 'license name' (a + String), or nil if there isn't one. This involves: 1. Determining whether the name is a valid SPDX license expression, and if so - normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it + normalising it (see clj-spdx's spdx.expressions/normalise fn) 2. Checking if the name is actually a URI, and if so performing URL matching on it (as per url->ids-info) 3. attempting to construct one or more SPDX license expressions from the @@ -165,7 +195,7 @@ ids {(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted ; 3. Attempt to build SPDX expression(s) from the name - (lcim/attempt-to-build-expressions name)))))) + (lcim/name->expressions-info name)))))) (defn name->expressions "Attempts to determine the SPDX license expression(s) (a set of Strings) from diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index 8d172c3..857f1b1 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -17,12 +17,10 @@ ; (ns lice-comb.maven - "Functionality related to finding and determining license information from - Maven POMs." + "Functionality related to combing Maven POMs for license information." (:require [clojure.string :as s] [clojure.java.io :as io] [clojure.data.xml :as xml] - [clojure.java.shell :as sh] [clojure.tools.logging :as log] [xml-in.core :as xi] [lice-comb.matching :as lcmtch] @@ -31,38 +29,6 @@ [lice-comb.impl.http :as lcihttp] [lice-comb.impl.utils :as lciu])) -(def ^:private local-maven-repo-d - (delay - (try - ; The command: - ; mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout - ; determines where the local repository is located. - (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] - (if (zero? (:exit sh-result)) - (s/trim (:out sh-result)) - (str (System/getProperty "user.home") "/.m2/repository"))) - (catch java.io.IOException _ - (str (System/getProperty "user.home") "/.m2/repository"))))) - -; TODO: make this configurable -(def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) - -(defn pom-uri-for-gav - "Attempts to locate the POM for the given GAV, which is a URI that may point - to a file in the local Maven repository or a remote Maven repository (e.g. on - Maven Central or Clojars)." - ([{:keys [group-id artifact-id version]}] (pom-uri-for-gav group-id artifact-id version)) - ([group-id artifact-id version] - (when (and (not (s/blank? group-id)) - (not (s/blank? artifact-id)) - (not (s/blank? version))) - (let [gav-path (str (s/replace group-id "." "/") "/" artifact-id "/" version "/" artifact-id "-" version ".pom") - local-pom (io/file (str @local-maven-repo-d "/" gav-path))] - (if (and (.exists local-pom) - (.isFile local-pom)) - (.toURI local-pom) - (first (filter lcihttp/uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))))))) - (defn- licenses-from-pair "Attempts to determine the license(s) (a map) from a POM license name/URL pair. Returns nil if no matches were found." @@ -101,16 +67,13 @@ (xml-find-first-string xml ks2))) (defmulti pom->expressions-info - "Attempt to detect the license expression(s) (a map) reported in a pom.xml - file. pom may be a java.io.InputStream, or anything that can be opened by - clojure.java.io/input-stream. + "Returns an expressions-info map for the given POM file (an InputStream or + something that can have an io/input-stream opened on it), or nil if no + expressions were found. - Note that if an InputStream is provided: - 1. it's the caller's responsibility to open and close it - 2. a filepath *must* be provided along with the stream (the 2nd arg) - - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." {:arglists '([pom] [pom filepath])} (fn [& args] (type (first args)))) @@ -142,7 +105,7 @@ :artifact-id (lciu/strim (first (xi/find-first parent-no-ns [:artifactId]))) :version (lciu/strim (first (xi/find-first parent-no-ns [:version])))}))] (when-not (empty? parent-gav) - (pom->expressions-info (pom-uri-for-gav parent-gav)))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + (pom->expressions-info (lcihttp/gav->pom-uri parent-gav)))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep (defmethod pom->expressions-info :default ([pom] (pom->expressions-info pom (lciu/filepath pom))) @@ -154,16 +117,13 @@ (log/info (str "'" filepath "'") "contains no license information")))))) (defn pom->expressions - "Attempt to detect the license expression(s) (a set) reported in a pom.xml - file. pom may be a java.io.InputStream, or anything that can be opened by - clojure.java.io/input-stream. - - Note that if an InputStream is provided: - 1. it's the caller's responsibility to open and close it - 2. a filepath *must* be provided along with the stream (the 2nd arg) + "Returns a set of SPDX expressions (Strings) for the given POM file (an + InputStream or something that can have an io/input-stream opened on it), or + nil if no expressions were found. - The result has metadata attached that describes how the identifiers in the - expression(s) were determined." + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." ([pom] (pom->expressions pom (lciu/filepath pom))) ([pom filepath] (some-> (pom->expressions-info pom filepath) @@ -177,5 +137,4 @@ allow explicit control of the cost of initialisation to callers who need it." [] (lcmtch/init!) - @local-maven-repo-d nil) diff --git a/src/lice_comb/utils.clj b/src/lice_comb/utils.clj index 402dc9b..8736849 100644 --- a/src/lice_comb/utils.clj +++ b/src/lice_comb/utils.clj @@ -32,7 +32,7 @@ :unlisted "fallback to unlisted LicenseRef" :manual-verification "manual verification"}) -(defn- info-keyfn +(defn- expression-info-keyfn "sort-by keyfn for lice-comb info maps" [metadata] (str (case (:id metadata) @@ -61,12 +61,12 @@ :unlisted "8" :manual-verification "9"))) -(defn- license-info-element->string - "Converts the info list for the given identifier into a human-readable - string, using the information in license-info map m." +(defn- expression-info->string + "Converts the given expression-info map into a human-readable string, using + the information in license-info map m." [m id] (str id ":\n" - (when-let [info-list (sort-by info-keyfn (seq (get m id)))] + (when-let [info-list (sort-by expression-info-keyfn (seq (get m id)))] (s/join "\n" (map #(str " " (when-let [md-id (:id %)] (when (not= id md-id) (str md-id " "))) (case (:type %) @@ -77,11 +77,11 @@ (when-let [source (seq (:source %))] (str "\n Source:\n > " (s/join "\n > " source)))) info-list))))) -(defn license-info->string - "Converts lice-comb license-info map m into a human-readable string. This +(defn expressions-info->string + "Converts the given expressions-info map into a human-readable string. This function is mostly intended for debugging / developer discovery purposes, and the content and format of the output may change without warning." [m] (when m (let [ids (sort (keys m))] - (s/join "\n\n" (map (partial license-info-element->string m) ids))))) + (s/join "\n\n" (map (partial expression-info->string m) ids))))) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 7954ecf..afa4202 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -721,11 +721,22 @@ (testing "SPDX expressions" (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0 WITH Classpath-exception-2.0")})} (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) + (testing "License ids that aren't SPDX ids" + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "Apache Software License version 2.0")})} + (name->expressions-info "Apache Software License version 2.0"))) + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :medium :strategy :regex-matching :source (list "Apache License 2")})} + (name->expressions-info "Apache License 2"))) + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "Apache")})} + (name->expressions-info "Apache")))) (testing "Single expressions that are not valid SPDX" (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :low :strategy :expression-inference :source (list "GNU General Public License, version 2 with the GNU Classpath Exception")} - {:id "GPL-2.0-only" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "GNU General Public License, version 2")} - {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "the GNU Classpath Exception" "Classpath Exception")})} - (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception")))) + {:id "GPL-2.0-only" :type :concluded :confidence :medium :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "GNU General Public License, version 2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "the GNU Classpath Exception" "Classpath Exception")})} + (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :high :strategy :expression-inference :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0")} + {:id "GPL-2.0-only" :type :concluded :confidence :high :strategy :regex-matching :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0" "GNU General Public License, version 2.0")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0" "the Classpath Exception 2.0" "Classpath Exception 2.0")})} + (name->expressions-info "GNU General Public License, version 2.0 with the Classpath Exception 2.0")))) (testing "Multiple expressions" (is (valid-info= {"BSD-4-Clause" (list {:id "BSD-4-Clause" :type :concluded :confidence :low :strategy :regex-matching :source (list "MIT / BSD" "BSD")}) "MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT / BSD" "MIT")})} @@ -733,7 +744,7 @@ (testing "Some names from Clojars" (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) - (is (valid-info= {"EPL-2.0" (list {:id "EPL-2.0" :type :concluded :confidence :medium :strategy :regex-matching :source (list "Eclipse Public License - v 2.0")})} + (is (valid-info= {"EPL-2.0" (list {:id "EPL-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "Eclipse Public License - v 2.0")})} (name->expressions-info "Eclipse Public License - v 2.0"))))) (deftest uri->ids-tests From f5db753d502367c49c8b2f009b1a5a9256f311c0 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 7 Sep 2023 22:49:32 -0700 Subject: [PATCH 32/34] :books: Fix formatting --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8181ee0..ca6bc8f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # lice-comb -A Clojure library for software _lice_nse detection. It does this by _comb_ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, and attempting to detect what SPDX license expression(s) they contain. +A Clojure library for software *lice*nse detection. It does this by *comb*ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, and attempting to detect what SPDX license expression(s) they contain. This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). From 21cb69ca4de1d70662b92644f33ff3ba2d4d040a Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Fri, 8 Sep 2023 11:21:28 -0700 Subject: [PATCH 33/34] :arrow_up: Upgrade dependencies --- .github/workflows/ci.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/docs.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0d33e0..d0f35df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@12.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index efe2fb5..3c059d4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@12.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e38ad38..e2cf762 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@12.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 From 787dc8478738ead0a05ab26816216e901b2c4cf5 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Sat, 9 Sep 2023 10:47:19 -0700 Subject: [PATCH 34/34] :art: More tweaks --- README.md | 10 +++++----- src/lice_comb/impl/matching.clj | 4 ++-- test/lice_comb/matching_test.clj | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ca6bc8f..fe25e56 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # lice-comb -A Clojure library for software *lice*nse detection. It does this by *comb*ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, and attempting to detect what SPDX license expression(s) they contain. +A Clojure library for software *lice*nse detection. It does this by *comb*ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, attempting to detect what license(s) they contain, and then normalising them into [SPDX license expression(s)](https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/). This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). @@ -49,7 +49,7 @@ $ deps-try com.github.pmonks/lice-comb ### Demo ```clojure -;; License name and full text matching +;; License name, uri and full text matching (require '[lice-comb.matching :as lcm]) ; Initialise the matching namespace @@ -61,15 +61,15 @@ $ deps-try com.github.pmonks/lice-comb (lcm/name->expressions "Apache") ;=> #{"Apache-2.0"} -(lcm/name->expressions "The MIT license") -;=> #{"MIT"} - (lcm/name->expressions "GNU Public License 2.0 w/ the GNU Classpath Exception") ;=> #{"GPL-2.0-only WITH Classpath-exception-2.0"} (lcm/text->ids (slurp "https://www.apache.org/licenses/LICENSE-2.0.txt")) ;=> #{"Apache-2.0"} +(lcm/uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt") +;=> #{"Apache-2.0"} + ;; License extraction from Maven poms, including ones that aren't locally downloaded (require '[lice-comb.maven :as lcmvn]) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index d6a623b..243ee30 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -149,7 +149,7 @@ (let [suri (lciu/simplify-uri uri)] (or ; 1. Does the simplified URI match any of the simplified URIs in the SPDX license or exception lists? (when-let [ids (get @lcis/index-uri-to-id-d suri)] - (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list uri)})) ids))) + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-listed-uri :source (list uri)})) ids))) ; 2. attempt to retrieve the text/plain contents of the uri and perform license text matching on it (when-let [license-text (lcihttp/get-text uri)] @@ -175,7 +175,7 @@ ; 2. Is it the name of one or more SPDX licenses or exceptions? (when-let [ids (get @lcis/index-name-to-id-d (s/lower-case s))] - (map #(hash-map % (list {:id % :type :concluded :confidence :medium :strategy :spdx-listed-name :source (list s)})) ids)) + (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-listed-name :source (list s)})) ids)) ; 3. Might it be a URI? (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) (when-let [ids (uri->ids s)] diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index afa4202..0945dea 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -742,7 +742,7 @@ "MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT / BSD" "MIT")})} (name->expressions-info "MIT / BSD")))) (testing "Some names from Clojars" - (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :medium :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} + (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :high :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) (is (valid-info= {"EPL-2.0" (list {:id "EPL-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "Eclipse Public License - v 2.0")})} (name->expressions-info "Eclipse Public License - v 2.0")))))