diff --git a/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md b/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md new file mode 100644 index 0000000..1dfbd7a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/Invalid_id_constructed.md @@ -0,0 +1,13 @@ +--- +name: 🐛 Invalid SPDX identifier constructed +about: When the library constructs an invalid SPDX identifier. 😢 + +--- + +## `lice-comb` API(s) you were calling, if known: + +_e.g. `lice-comb.deps/deps-licenses`_ + +## Input data that you provided to that API: + +_e.g. a license name, or the URI of a file containing the license text, or the `tools.deps` coordinate of the dependency, etc._ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 92fdba4..d0f35df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,12 +17,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-java@v3 with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/dependencies.yml b/.github/workflows/dependencies.yml index 57bb42b..d971671 100644 --- a/.github/workflows/dependencies.yml +++ b/.github/workflows/dependencies.yml @@ -11,5 +11,5 @@ jobs: container: image: uochan/antq steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: java -jar /tmp/antq/antq.jar --skip=pom --error-format="::error file={{file}}::{{message}}" diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6128aff..3c059d4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,14 +10,14 @@ jobs: environment: clojars steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # Make sure we get the full history, or else the version number gets screwed up - uses: actions/setup-java@v3 with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ad96b57..e2cf762 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,12 +9,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-java@v3 with: distribution: 'temurin' java-version: 17 - - uses: DeLaGuardo/setup-clojure@11.0 + - uses: DeLaGuardo/setup-clojure@12.1 with: cli: latest - uses: actions/cache@v3 diff --git a/.gitignore b/.gitignore index 7c96961..131b017 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea +.nvd *.iml pom.xml pom.xml.asc diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..223bc5f --- /dev/null +++ b/NOTICE @@ -0,0 +1,4 @@ +lice-comb +Copyright © 2021 Peter Monks (https://github.com/pmonks) + +This project contains source code for rdrop-while, which is copyright Joshua Suskalo (https://github.com/IGJoshua) 2023 and licensed as "CC0-1.0 OR MIT". For details, see https://discord.com/channels/729136623421227082/732641743723298877/1141786961875583097. diff --git a/README.md b/README.md index dfb2c8a..fe25e56 100644 --- a/README.md +++ b/README.md @@ -9,36 +9,134 @@ # lice-comb -A Clojure library for software license detection. It does this by combing through text, files, and even entire directory structures, and attempting to detect what license(s) they contain. +A Clojure library for software *lice*nse detection. It does this by *comb*ing through tools.deps and Leiningen dependencies, directory structures, and JAR & ZIP files, attempting to detect what license(s) they contain, and then normalising them into [SPDX license expression(s)](https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/). This library leverages, and is inspired by, the *excellent* [SPDX project](https://spdx.dev/). It's a great shame that it doesn't have greater traction in the Java & Clojure (and wider open source) communities. If you're new to SPDX and would prefer to read a primer rather than dry specification documents, I can thoroughly recommend [David A. Wheeler's SPDX Tutorial](https://github.com/david-a-wheeler/spdx-tutorial#spdx-tutorial). -## Using the library +## System Requirements -### Documentation +* `lice-comb` (all versions) requires an internet connection. -[API documentation is available here](https://pmonks.github.io/lice-comb/). +* `lice-comb` (all versions) assumes Maven is installed and in the `PATH` (but has fallback logic if it isn't available). -[An FAQ is available here](https://github.com/pmonks/lice-comb/wiki/FAQ). +* `lice-comb` (v2.0+) requires JDK 11 or higher. + +## Installation + +`lice-comb` is available as a Maven artifact from [Clojars](https://clojars.org/com.github.pmonks/lice-comb). + +### Trying it Out + +#### Clojure CLI -### Dependency +```shell +$ # Where #.#.# is replaced with an actual version number (see badge above) +$ clj -Sdeps '{:deps {com.github.pmonks/lice-comb {:mvn/version "#.#.#"}}}' +``` + +#### Leiningen + +```shell +$ lein try com.github.pmonks/lice-comb +``` -Express the correct maven dependencies in your `deps.edn`: +#### deps-try -```edn -{:deps {com.github.pmonks/lice-comb {:mvn/version "LATEST_CLOJARS_VERSION"}}} +```shell +$ deps-try com.github.pmonks/lice-comb ``` -### Require one or more of the namespaces +### Demo ```clojure -(ns your.ns - (:require [lice-comb.deps :as lcd] - [lice-comb.files :as lcf] - [lice-comb.maven :as lcm] - [lice-comb.spdx :as lcs])) +;; License name, uri and full text matching +(require '[lice-comb.matching :as lcm]) + +; Initialise the matching namespace +; Notes: +; 1. This is slow (takes ~1 minute on my laptop), almost all of which is Spdx-Java-Library's initialisation (see https://github.com/spdx/Spdx-Java-Library/issues/193) +; 2. This step is optional, though initialisation will still happen regardless, and when it does you'll incur the same cost +(lcm/init!) + +(lcm/name->expressions "Apache") +;=> #{"Apache-2.0"} + +(lcm/name->expressions "GNU Public License 2.0 w/ the GNU Classpath Exception") +;=> #{"GPL-2.0-only WITH Classpath-exception-2.0"} + +(lcm/text->ids (slurp "https://www.apache.org/licenses/LICENSE-2.0.txt")) +;=> #{"Apache-2.0"} + +(lcm/uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt") +;=> #{"Apache-2.0"} + +;; License extraction from Maven poms, including ones that aren't locally downloaded +(require '[lice-comb.maven :as lcmvn]) + +(lcmvn/pom->expressions (str (System/getProperty "user.home") "/.m2/repository/org/clojure/clojure/1.11.1/clojure-1.11.1.pom")) +;=> #{"EPL-1.0"} + +(lcmvn/pom->expressions "https://repo1.maven.org/maven2/org/springframework/spring-core/6.0.11/spring-core-6.0.11.pom") +;=> #{"Apache-2.0"} + +;; License extraction from tools.deps dependency maps +(require '[lice-comb.deps :as lcd]) + +(lcd/dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.11.1"}]) +;=> #{"EPL-1.0"} + +;; Information about matches (useful for better understanding how lice-comb arrived at a given set of expressions, and +;; how confident it is in the values it's providing) +(lcm/name->expressions-info "Apache-2.0") +;=> {"Apache-2.0" ({:type :declared, :strategy :spdx-expression, :source ("Apache-2.0")})} + +(lcm/name->expressions-info "GNU Public License 2.0 or later w/ the GNU Classpath Exception") +;=> {"GPL-2.0-or-later WITH Classpath-exception-2.0" +; ({:type :concluded, :confidence :low, :strategy :expression-inference, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception")} +; {:id "GPL-2.0-or-later", :type :concluded, :confidence :medium, :strategy :regex-matching, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception" +; "GNU Public License 2.0 or later")} +; {:id "Classpath-exception-2.0", :type :concluded, :confidence :low, :strategy :regex-matching, :source ("GNU Public License 2.0 or later w/ the GNU Classpath Exception" +; "the GNU Classpath Exception" +; "Classpath Exception")})} + +(lcmvn/pom->expressions-info "https://repo.clojars.org/canvas/canvas/0.1.6/canvas-0.1.6.pom") +;=> {"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" +; ({:type :declared, :strategy :spdx-expression, :source ("https://repo.clojars.org/canvas/canvas/0.1.6/canvas-0.1.6.pom" +; "" +; "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0")})} + +;; Pretty print expressions-info +(require '[lice-comb.utils :as lcu]) + +(println (lcu/expressions-info->string (lcd/dep->expressions-info ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) +;=> Apache-2.0: +; Concluded +; Confidence: high +; Strategy: regular expression matching +; Source: +; > com.amazonaws/aws-java-sdk-s3@1.12.129 +; > https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk-s3/1.12.129/aws-java-sdk-s3-1.12.129.pom +; > https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk-pom/1.12.129/aws-java-sdk-pom-1.12.129.pom +; > +; > Apache License, Version 2.0 +nil ``` +### API Documentation + +[API documentation is available here](https://pmonks.github.io/lice-comb/), or [here on cljdoc](https://cljdoc.org/d/com.github.pmonks/lice-comb/). + +[An FAQ is available here](https://github.com/pmonks/lice-comb/wiki/FAQ). + +## Upgrading + +### 1.x -> 2.x + +The implementation of [issue #3](https://github.com/pmonks/lice-comb/issues/3) resulted in a number of unavoidable breaking changes, including: + +* A wholesale change from returning sets of SPDX identifiers to returning sets of SPDX expressions +* The creation of [a dedicated SPDX-specific library (`clj-spdx`)](https://github.com/pmonks/clj-spdx) that leverages [the official SPDX Java library](https://github.com/spdx/Spdx-Java-Library) + ## Contributor Information [Contributor FAQ](https://github.com/pmonks/lice-comb/wiki/FAQ#contributor-faqs) diff --git a/deps.edn b/deps.edn index 2859165..366757f 100644 --- a/deps.edn +++ b/deps.edn @@ -19,12 +19,16 @@ {:paths ["src" "resources"] :deps {org.clojure/tools.logging {:mvn/version "1.2.4"} + commons-validator/commons-validator {:mvn/version "1.7"} org.clojure/data.xml {:mvn/version "0.2.0-alpha8"} - cheshire/cheshire {:mvn/version "5.11.0"} clj-xml-validation/clj-xml-validation {:mvn/version "1.0.2"} - camel-snake-kebab/camel-snake-kebab {:mvn/version "0.4.3"} - tolitius/xml-in {:mvn/version "0.1.1"}} + tolitius/xml-in {:mvn/version "0.1.1"} + hato/hato {:mvn/version "0.9.0"} + dev.weavejester/medley {:mvn/version "1.7.0"} + dom-top/dom-top {:mvn/version "1.0.8"} + miikka/clj-base62 {:mvn/version "0.1.1"} + com.github.pmonks/clj-spdx {:mvn/version "1.0.95"} + com.github.pmonks/rencg {:mvn/version "1.0.34"}} :aliases - {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.9.4" :git/sha "76b78fe"} - com.github.pmonks/pbr {:mvn/version "RELEASE"}} + {:build {:deps {com.github.pmonks/pbr {:mvn/version "RELEASE"}} :ns-default pbr.build}}} diff --git a/pbr.clj b/pbr.clj index 911d20d..cca13cc 100644 --- a/pbr.clj +++ b/pbr.clj @@ -19,7 +19,7 @@ (def lib 'com.github.pmonks/lice-comb) #_{:clj-kondo/ignore [:unresolved-namespace]} -(def version (format "1.0.%s" (b/git-count-revs nil))) +(def version (format "2.0.%s" (b/git-count-revs nil))) (defn set-opts [opts] diff --git a/resources/lice_comb/names.edn b/resources/lice_comb/names.edn new file mode 100644 index 0000000..1511f80 --- /dev/null +++ b/resources/lice_comb/names.edn @@ -0,0 +1,8 @@ +; Map of name values seen in the wild that are too ambiguous / cursed to support any reasonable form of automated parsing +{ + ; Seen in https://repo.maven.apache.org/maven2/com/sun/mail/all/1.4.7/all-1.4.7.pom + "GPLv2+CE" {"GPL-2.0-only WITH Classpath-exception-2.0" + ({:type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE")} + {:id "GPL-2.0-only" :type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE" "GPLv2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :high :strategy :manual-verification :source ("GPLv2+CE" "CE")})} +} \ No newline at end of file diff --git a/src/lice_comb/data.clj b/src/lice_comb/data.clj deleted file mode 100644 index 34795b1..0000000 --- a/src/lice_comb/data.clj +++ /dev/null @@ -1,27 +0,0 @@ -; -; Copyright © 2021 Peter Monks -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; SPDX-License-Identifier: Apache-2.0 -; - -(ns lice-comb.data - "Data handling functionality." - (:require [lice-comb.utils :as u])) - -(defn uri-for-data - "Returns a URI (as a string) for the given data file. May be a local file path or a URI to a remote resource." - [file] - (when file - (str (u/getenv "LICE_COMB_DATA_DIR" "https://raw.githubusercontent.com/pmonks/lice-comb/data") file))) diff --git a/src/lice_comb/deps.clj b/src/lice_comb/deps.clj index 09c6480..3483cec 100644 --- a/src/lice_comb/deps.clj +++ b/src/lice_comb/deps.clj @@ -17,88 +17,97 @@ ; (ns lice-comb.deps - "deps (in tools.deps lib-map format) related functionality." - (:require [clojure.string :as s] - [clojure.reflect :as cr] - [clojure.edn :as edn] - [lice-comb.spdx :as spdx] - [lice-comb.maven :as mvn] - [lice-comb.files :as f] - [lice-comb.data :as d] - [lice-comb.utils :as u])) - -(def ^:private overrides-uri (d/uri-for-data "/deps/overrides.edn")) -(def ^:private overrides (try - (edn/read-string (slurp overrides-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " overrides-uri ". Please check your internet connection and try again.") {} e))))) - -(def ^:private fallbacks-uri (d/uri-for-data "/deps/fallbacks.edn")) -(def ^:private fallbacks (try - (edn/read-string (slurp fallbacks-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " fallbacks-uri ". Please check your internet connection and try again.") {} e))))) - -(defn- check-overrides - "Checks if an override should be used for the given dep" - ([ga] (check-overrides ga nil)) - ([ga v] - (let [gav (symbol (str ga (when v (str "@" v))))] - (:licenses (get overrides gav (get overrides ga)))))) ; Lookup overrides both with and without the version - -(defn- check-fallbacks - "Checks if a fallback should be used for the given dep, given the set of detected ids" - [ga ids] - (if (or (empty? ids) - (every? #(not (spdx/spdx-id? %)) ids)) - (:licenses (get fallbacks ga {:licenses ids})) - ids)) + "Functionality related to combing tools.deps dependency maps and lib maps for + license information." + (:require [clojure.string :as s] + [dom-top.core :as dom] + [lice-comb.maven :as lcmvn] + [lice-comb.files :as lcf] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.expressions-info :as lciei])) (defn- normalise-dep - "Normalises a dep, by removing any classifier suffixes from the artifact-id (e.g. the $blah suffix in com.foo/bar$blah)." + "Normalises a dep, by removing any classifier suffixes from the artifact-id + (e.g. the $blah suffix in com.foo/bar$blah)." [[ga info]] (when ga [(symbol (first (s/split (str ga) #"\$"))) info])) -(defmulti dep->ids - "Attempt to detect the license(s) in a tools.deps style dep (a MapEntry or two-element sequence of [groupId/artifactId dep-info])." +(defmulti ^:private dep->string + "Converts a dep to a string." + {:arglists '([[ga info]])} + (fn [[_ info]] (:deps/manifest info))) + +(defmethod ^:private dep->string :mvn + [[ga info]] + (str ga "@" (:mvn/version info))) + +(defmethod ^:private dep->string :deps + [[ga info]] + (str ga "@" (:git/sha info) (when-let [tag (:git/tag info)] (str "/" tag)))) + +(defmulti dep->expressions-info + "Returns an expressions-info map for the given tools.dep dep (a MapEntry or + two-element vector of `['groupId/artifactId dep-info]`), or nil if no + expressions were found." {:arglists '([[ga info]])} (fn [[_ info]] (:deps/manifest info))) -(defmethod dep->ids :mvn +(defmethod dep->expressions-info :mvn [dep] (when dep (let [[ga info] (normalise-dep dep) [group-id artifact-id] (s/split (str ga) #"/") - version (:mvn/version info)] - (if-let [override (check-overrides ga version)] - override - (let [pom-uri (mvn/pom-uri-for-gav group-id artifact-id version) - license-ids (check-fallbacks ga - (if-let [license-ids (mvn/pom->ids pom-uri)] - license-ids - (u/nset (mapcat f/zip->ids (:paths info)))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) too - license-ids))))) + version (:mvn/version info) + pom-uri (lcihttp/gav->pom-uri group-id artifact-id version) + expressions (if-let [expressions (lcmvn/pom->expressions-info pom-uri)] + expressions + (into {} (dom/real-pmap lcf/zip->expressions-info (:paths info))))] ; If we didn't find any licenses in the dep's POM, check the dep's JAR(s) + (lciei/prepend-source (dep->string dep) expressions)))) -(defmethod dep->ids :deps +(defmethod dep->expressions-info :deps [dep] (when dep - (let [[ga info] (normalise-dep dep) - version (:git/sha info)] - (if-let [override (check-overrides ga version)] - override - (check-fallbacks ga (f/dir->ids (:deps/root info))))))) + (let [[_ info] (normalise-dep dep)] + (lciei/prepend-source (dep->string dep) (lcf/dir->expressions-info (:deps/root info)))))) -(defmethod dep->ids nil +(defmethod dep->expressions-info nil [_]) -(defmethod dep->ids :default +(defmethod dep->expressions-info :default [dep] - (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) {:dep dep}))) + (throw (ex-info (str "Unexpected manifest type '" (:deps/manifest (second dep)) "' for dependency " dep) + {:dep dep}))) -(defn deps-licenses - "Attempt to detect the license(s) in a tools.deps 'lib map', returning a new lib map with the licenses assoc'ed in (in key :lice-comb/licenses)" +(defn dep->expressions + "Returns a set of SPDX expressions (Strings) for the given tools.dep dep (a + MapEntry or two-element vector of `['groupId/artifactId dep-info-map]`), or + nil if no expressions were found." + [dep] + (some-> (dep->expressions-info dep) + keys + set)) + +(defn deps-expressions + "Takes a tools.dep lib map and returns a new lib map with an expressions-info + map assoc'ed into each dep's info map, in key `:lice-comb/license-info`. + If no license information was found for a given dep, the lib map entry for + that dep will be returned unchanged (it will not have the + `:lice-comb/license-info` key in the info map)." [deps] (when deps - (into {} - (pmap #(let [[k v] %] [k (assoc v :lice-comb/licenses (dep->ids [k v]))]) deps)))) + (into {} (dom/real-pmap #(if-let [expressions-info (dep->expressions-info %)] + (let [[k v] %] + [k (assoc v :lice-comb/license-info expressions-info)]) + %) + deps)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcmvn/init!) + (lcf/init!) + nil) diff --git a/src/lice_comb/files.clj b/src/lice_comb/files.clj index 6999f1e..8d286e8 100644 --- a/src/lice_comb/files.clj +++ b/src/lice_comb/files.clj @@ -17,64 +17,152 @@ ; (ns lice-comb.files - "Files related functionality." - (:require [clojure.string :as s] - [clojure.set :as set] - [clojure.java.io :as io] - [lice-comb.spdx :as spdx] - [lice-comb.maven :as mvn] - [lice-comb.utils :as u])) + "Functionality related to combing files, directories, and ZIP format archives + for license information." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [lice-comb.matching :as lcmtch] + [lice-comb.maven :as lcmvn] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.utils :as lciu])) (def ^:private probable-license-filenames #{"pom.xml" "license" "license.txt" "copying" "unlicense"}) ;TODO: consider "license.md" and #".+\.spdx" (see https://github.com/spdx/spdx-maven-plugin for why the latter is important)... +; This is public because it's used in the tests (defn probable-license-file? - "Returns true if the given file-like thing (String, File, ZipEntry) is a probable license file, false otherwise." + "Returns true if the given file-like thing (String, File, ZipEntry) is a + probable license file, false otherwise." [f] (and (not (nil? f)) - (let [fname (s/lower-case (u/filename f))] + (let [fname (s/lower-case (lciu/filename f))] (and (not (s/blank? fname)) (or (contains? probable-license-filenames fname) (s/ends-with? fname ".pom")))))) +; This is public because it's used in the tests (defn probable-license-files - "Returns all probable license files in the given directory, recursively, as a set of java.io.File objects. dir may be a String or a java.io.File, both of which must refer to a directory." + "Returns all probable license files in the given directory, recursively, as a + set of java.io.File objects. dir may be a String or a java.io.File, either of + which must refer to a readable directory." [dir] - (when dir - (let [dir (io/file dir)] - (if (.exists dir) ; Note: we have to do this, because file-seq does weird things when handed a file that doesn't exist - (if (.isDirectory dir) - (u/nset (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) - (throw (java.nio.file.NotDirectoryException. (str dir)))) - (throw (java.io.FileNotFoundException. (str dir))))))) - -(defn file->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given file (an InputStream or something that can have an io/input-stream opened on it). - If an InputStream is provided, the associated filename MUST also be provided as the second parameter." - ([f] (file->ids f (u/filename f))) - ([f fname] - (when (and f fname) - (let [fname (s/lower-case fname)] - (cond (= fname "pom.xml") (mvn/pom->ids f) - (s/ends-with? fname ".pom") (mvn/pom->ids f) - :else (spdx/text->ids f)))))) - -(defn dir->ids - "Attempt to detect the license(s) in a directory. dir may be a String or a java.io.File, both of which must refer to a directory." - [dir] - (when dir - (u/nset (mapcat file->ids (probable-license-files dir))))) + (when (lciu/readable-dir? dir) + (some-> (seq (filter #(and (.isFile ^java.io.File %) (probable-license-file? %)) (file-seq (io/file dir)))) + set))) + +(defn file->expressions-info + "Returns an expressions-info map for the given file (an InputStream or + something that can have an io/input-stream opened on it), or nil if no + expressions were found. + + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." + ([f] (file->expressions-info f (lciu/filepath f))) + ([f filepath] + (when (lciu/readable-file? f) + (let [fname (lciu/filename filepath) + lfname (s/lower-case fname)] + (lciei/prepend-source filepath + (cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname) + (s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname) + (instance? java.io.InputStream f) (doall (lcmtch/text->ids-info f)) + :else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is))))))))) ; Default is to assume it's a plain text file containing license text(s) + +(defn file->expressions + "Returns a set of SPDX expressions (Strings) for the given file (an + InputStream or something that can have an io/input-stream opened on it), or + nil if no expressions were found. -(defn zip->ids - "Attempt to detect the license(s) in a ZIP file. zip may be a String or a java.io.File, both of which must refer to a ZIP-format compressed file." + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." + ([f] (file->expressions f (lciu/filepath f))) + ([f filepath] + (some-> (file->expressions-info f filepath) + keys + set))) + +(defn zip->expressions-info + "Returns an expressions-info map for the given ZIP file (a String or a File, + which must refer to a ZIP-format compressed file), or nil if no expressions + were found. + + Throws if the file is not a valid ZIP." [zip] - (when zip + (when (lciu/readable-file? zip) (let [zip-file (io/file zip)] - (java.util.zip.ZipFile. zip-file) ; This forces validation of the zip file - ZipInputStream does not reliably perform validation + (java.util.zip.ZipFile. zip-file) ; This no-op forces validation of the zip file - ZipInputStream does not reliably perform validation (with-open [zip-is (java.util.zip.ZipInputStream. (io/input-stream zip-file))] - (loop [licenses nil - entry (.getNextEntry zip-is)] + (loop [result {} + entry (.getNextEntry zip-is)] (if entry (if (probable-license-file? entry) - (recur (set/union licenses (file->ids zip-is (u/filename entry))) (.getNextEntry zip-is)) - (recur licenses (.getNextEntry zip-is))) - licenses)))))) + (recur (merge result (file->expressions-info zip-is (lciu/filename entry))) + (.getNextEntry zip-is)) + (recur result (.getNextEntry zip-is))) + (when-not (empty? result) (lciei/prepend-source (lciu/filepath zip-file) result)))))))) + +(defn zip->expressions + "Returns a set of SPDX expressions (Strings) for the given ZIP file (a String + or a File, which must refer to a ZIP-format compressed file), or nil if no + expressions were found. + + Throws if the file is not a valid ZIP." + [zip] + (some-> (zip->expressions-info zip) + keys + set)) + +(defn- zip-compressed-files + "Returns a set of all probable ZIP compressed files (Files) in the given + directory, recursively, or nil if there are none. dir may be a String or a + java.io.File, and must refer to a readable directory." + [dir] + (when (lciu/readable-dir? dir) + (some-> (seq (filter #(and (.isFile ^java.io.File %) + (or (s/ends-with? (str %) ".zip") + (s/ends-with? (str %) ".jar"))) + (file-seq (io/file dir)))) + set))) + +(defn dir->expressions-info + "Returns an expressions-info map for the given dir (a String or a File, + which must refer to a readable directory), or nil if no expressions were + found. + + The optional `opts` map has these keys: + * `include-zips?` (boolean, default false) - controls whether zip compressed + files found in the directory are recursively included in the scan or not" + ([dir] (dir->expressions-info dir nil)) + ([dir {:keys [include-zips?] :or {include-zips? false}}] + (when (lciu/readable-dir? dir) + (lciei/prepend-source (lciu/filepath dir) + (let [file-expressions (into {} (map file->expressions-info (probable-license-files dir)))] + (if include-zips? + (let [zip-expressions (into {} (map #(try (zip->expressions-info %) (catch Exception _ nil)) (zip-compressed-files dir)))] + (merge file-expressions zip-expressions)) + file-expressions)))))) + +(defn dir->expressions + "Returns a set of SPDX expressions (Strings) for the given dir (a String or + a File, which must refer to a readable directory), or nil if no expressions + were found. + + The optional `opts` map has these keys: + * `include-zips?` (boolean, default false) - controls whether zip compressed + files found in the directory are recursively included in the scan or not" + ([dir] (dir->expressions dir nil)) + ([dir opts] + (some-> (dir->expressions-info dir opts) + keys + set))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcmtch/init!) + (lcmvn/init!) + nil) diff --git a/src/lice_comb/impl/3rd_party.clj b/src/lice_comb/impl/3rd_party.clj new file mode 100644 index 0000000..72067cc --- /dev/null +++ b/src/lice_comb/impl/3rd_party.clj @@ -0,0 +1,37 @@ +;;;; lice_comb.impl.3rd_party.clj +;;; +;;; Code obtained from third party sources, but not available via standard +;;; package-consumption mechanisms (i.e. as Maven artifacts) +;;; +;;; Copyright and license information is on a per-code-snippet basis, and +;;; is communicated inline via further comments. +;;; +(ns lice-comb.impl.3rd-party) + +;; rdrop-while is copyright © Joshua Suskalo (https://github.com/IGJoshua) 2023 and licensed as "CC0-1.0 OR MIT" +;; +;; Source: https://discord.com/channels/729136623421227082/732641743723298877/1141786961875583097 +;; Link to request access: https://discord.gg/discljord +;; +;; Note that the lice-comb project elects to consume this code under the MIT license +(defn rdrop-while + "As for clojure.core/drop-while, but drops from the end of the + sequence backwards, rather than the front forwards. More efficient + when provided with a vector rather than a list." + ([pred coll] + (if (reversible? coll) + (take (- (count coll) (count (take-while pred (rseq coll)))) coll) + (reverse (drop-while pred (reverse coll))))) + ([pred] + (fn [rf] + (let [stash (volatile! [])] + (fn + ([] (rf)) + ([acc] (rf acc)) + ([acc elt] + (if (pred elt) + (do (vswap! stash conj elt) + acc) + (let [res (reduce rf acc (conj @stash elt))] + (vreset! stash []) + res)))))))) diff --git a/src/lice_comb/impl/data.clj b/src/lice_comb/impl/data.clj new file mode 100644 index 0000000..cd490d9 --- /dev/null +++ b/src/lice_comb/impl/data.clj @@ -0,0 +1,53 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.data + "Data handling functionality. Note: this namespace is not part of the public + API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.reflect :as cr] + [clojure.edn :as edn])) + +(defn load-string-resource + "Loads the given classpath resources from the classpath, returning it as a + String. Throws ex-info on error. + + Notes: + * Classpath resource paths must not start with a forward slash ('/'). + * The JVM does not support hyphens ('-') in classpath resource path elements. + Use underscore ('_') instead. + * Unlike during class loading, Clojure does not automatically switch hyphens + in classpath resource path elements to underscores. This inconsistency can + be a time-wasting trap." + [path] + (when-not (s/blank? path) + (try + (if-let [resource (io/resource path)] + (slurp resource) + (throw (ex-info (str "No resource found in classpath at " path) {}))) + (catch clojure.lang.ExceptionInfo ie + (throw ie)) + (catch Exception e + (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " path) {} e)))))) + +(defn load-edn-resource + "Loads and parses the given EDN file from the classpath." + [path] + (when-let [edn-string (load-string-resource path)] + (edn/read-string edn-string))) diff --git a/src/lice_comb/impl/expressions_info.clj b/src/lice_comb/impl/expressions_info.clj new file mode 100644 index 0000000..4346cc3 --- /dev/null +++ b/src/lice_comb/impl/expressions_info.clj @@ -0,0 +1,73 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.expressions-info + "lice-comb expressions-info map helper functionality. Note: this namespace is + not part of the public API of lice-comb and may change without notice." + (:require [clojure.string :as s])) + +(defn prepend-source + "Prepends the given source s (a String) onto the :source sequence of all + expression-info sub-maps in m (an expressions-info map)." + [s m] + (if (or (s/blank? s) (empty? m)) + m + (into {} (map #(if (sequential? (val %)) + (let [id (key %) + metadata-list (val %)] + (hash-map id (map (fn [x] (assoc x :source (let [old-source (seq (:source x)) + new-source (if (not= s (first old-source)) ; Only add s if it isn't already there + (conj old-source s) + old-source)] + new-source))) + metadata-list))) + %) + m)))) + +(defn merge-maps + "Merges any number of expressions-info maps, by concatenating and de-duping + values for the same key (expression)." + [& maps] + (let [maps (filter identity maps)] + (when-not (empty? maps) + (let [grouped-maps (group-by first (mapcat identity maps))] + (into {} (map #(vec [% (seq (distinct (mapcat second (get grouped-maps %))))]) + (keys grouped-maps))))))) + +(def ^:private confidence-sort { + :low 0 + :medium 1 + :high 2}) + +(defn sort-confidences + "Sorts a sequence of confidences from low to high." + [cs] + (when cs + (sort-by confidence-sort cs))) + +(defn lowest-confidence + "Returns the lowest confidence in a sequence of confidences." + [cs] + (when cs + (first (sort-confidences cs)))) + +(defn highest-confidence + "Returns the highest confidence in a sequence of confidences." + [cs] + (when cs + (last (sort-confidences cs)))) diff --git a/src/lice_comb/impl/http.clj b/src/lice_comb/impl/http.clj new file mode 100644 index 0000000..49083bf --- /dev/null +++ b/src/lice_comb/impl/http.clj @@ -0,0 +1,121 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.http + "HTTP helper functionality. Note: this namespace is not part of + the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.java.shell :as sh] + [hato.client :as hc] + [lice-comb.impl.utils :as lciu])) + +(def ^:private http-client-d (delay (hc/build-http-client {:connect-timeout 1000 + :redirect-policy :always + :cookie-policy :none}))) + +(defn uri-resolves? + "Does the given URI resolve (i.e. does the resource it points to exist)? + + Note: does not throw - returns false on errors." + [uri] + (boolean + (when (lciu/valid-http-uri? (str uri)) + (try + (when-let [response (hc/head (str uri) + {:http-client @http-client-d + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (= 200 (:status response))) + (catch Exception _ + false))))) + +(defn- cdn-uri + "Converts raw URIs into CDN URIs, for these 'known' hosts: + + * github.com e.g. https://github.com/pmonks/lice-comb/blob/main/LICENSE -> https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE + + If the given URI is not known, returns the input unchanged." + [uri] + (if-let [^java.net.URL uri-obj (try (io/as-url uri) (catch Exception _ nil))] + (case (s/lower-case (.getHost uri-obj)) + "github.com" (-> uri + (s/replace #"(?i)github\.com" "raw.githubusercontent.com") + (s/replace "/blob/" "/")) + uri) ; Default case + uri)) + +(defn get-text + "Attempts to get plain text as a String from the given URI, returning nil if + unable to do so (including for error conditions - there is no way to + disambiguate errors from non-text content, for example)." + [uri] + (when (lciu/valid-http-uri? uri) + (try + (when-let [response (hc/get (cdn-uri uri) + {:http-client @http-client-d + :accept "text/plain;q=1,*/*;q=0" ; Kindly request that the server only return text/plain... ...even though this gets ignored a lot of the time 🙄 + :header {"user agent" "com.github.pmonks/lice-comb"}})] + (when (= :text/plain (:content-type response)) + (:body response))) + (catch Exception _ + nil)))) + +(def ^:private local-maven-repo-d + (delay + (try + ; The command: + ; mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout + ; determines where the local repository is located. + (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] + (if (zero? (:exit sh-result)) + (s/trim (:out sh-result)) + (str (System/getProperty "user.home") "/.m2/repository"))) + (catch java.io.IOException _ + (str (System/getProperty "user.home") "/.m2/repository"))))) + +; TODO: make this configurable +(def ^:private remote-maven-repos #{"https://repo.maven.apache.org/maven2" "https://repo.clojars.org"}) + +(defn gav->pom-uri + "Returns a java.net.URI pointing to the POM for the given GAV (a map), or nil + if one cannot be found. The returned URI is guaranteed to be resolvable - + either to a file that exists in the local Maven cache, or to an HTTP- + accessible resource on a remote Maven repository (i.e. Maven Central or + Clojars) that resolves." + ([{:keys [group-id artifact-id version]}] (gav->pom-uri group-id artifact-id version)) + ([group-id artifact-id version] + (when (and (not (s/blank? group-id)) + (not (s/blank? artifact-id)) + (not (s/blank? version))) + (let [gav-path (str (s/replace group-id "." "/") "/" artifact-id "/" version "/" artifact-id "-" version ".pom") + local-pom (io/file (str @local-maven-repo-d "/" gav-path))] + (if (and (.exists local-pom) + (.isFile local-pom)) + (.toURI local-pom) + (when-let [remote-uri (first (filter uri-resolves? (map #(str % "/" gav-path) remote-maven-repos)))] + (java.net.URI. remote-uri))))))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + @http-client-d + @local-maven-repo-d + nil) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj new file mode 100644 index 0000000..243ee30 --- /dev/null +++ b/src/lice_comb/impl/matching.clj @@ -0,0 +1,310 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.matching + "Matching helper functionality. Note: this namespace is not part of + the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.set :as set] + [clojure.java.io :as io] + [spdx.exceptions :as se] + [spdx.matching :as sm] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.regex-matching :as lcirm] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.3rd-party :as lc3] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.data :as lcid] + [lice-comb.impl.utils :as lciu])) + +(def ^:private cursed-names-d (delay (lcid/load-edn-resource "lice_comb/names.edn"))) + +(def ^:private direct-replacements-map { + #{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"} + #{"GPL-2.0-or-later" "Classpath-exception-2.0"} #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} + #{"GPL-3.0-only" "Classpath-exception-2.0"} #{"GPL-3.0-only WITH Classpath-exception-2.0"} + #{"GPL-3.0-or-later" "Classpath-exception-2.0"} #{"GPL-3.0-or-later WITH Classpath-exception-2.0"} + }) + +(defn- direct-replacements + "Self-evident direct replacements." + [ids] + (get direct-replacements-map ids ids)) + +(def ^:private gpl-ids-with-only-or-later #{"AGPL-1.0" + "AGPL-3.0" + "GFDL-1.1" + "GFDL-1.2" + "GFDL-1.3" + "GPL-1.0" + "GPL-2.0" + "GPL-3.0" + "LGPL-2.0" + "LGPL-2.1" + "LGPL-3.0"}) + +(defn- dis + "Remove the given key(s) from the associative collection (set or map)." + [associative & ks] + (cond (set? associative) (apply disj associative ks) + (map? associative) (apply dissoc associative ks))) + +(defn- fix-gpl-only-or-later + "If the keys of ids includes both an 'only' and an 'or-later' variant of the + same underlying GNU family identifier, remove the 'only' variant." + [ids] + (loop [result ids + f (first gpl-ids-with-only-or-later) + r (rest gpl-ids-with-only-or-later)] + (if f + (recur (if (and (contains? result (str f "-only")) + (contains? result (str f "-or-later"))) + (dis result (str f "-only")) + result) + (first r) + (rest r)) + result))) + +(defn- fix-public-domain-cc0 + "If the keys of ids includes both CC0-1.0 and lice-comb's public domain + LicenseRef, remove the LicenseRef as it's redundant." + [ids] + (if (and (contains? ids (lcis/public-domain)) + (contains? ids "CC0-1.0")) + (dis ids (lcis/public-domain)) + ids)) + +(defn- fix-mpl-2 + "If the keys of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception, + remove the MPL-2.0-no-copyleft-exception as it's redundant." + [ids] + (if (and (contains? ids "MPL-2.0") + (contains? ids "MPL-2.0-no-copyleft-exception")) + (dis ids "MPL-2.0-no-copyleft-exception") + ids)) + +(defn manual-fixes + "Manually fix certain invalid combinations of license identifiers in a set or + map." + [ids] + (some-> ids + direct-replacements + fix-gpl-only-or-later + fix-public-domain-cc0 + fix-mpl-2)) + +(defmulti text->ids + "Returns an expressions-map for the given license text, or nil if no matches + are found." + {:arglists '([text])} + type) + +(defmethod text->ids java.lang.String + [s] + ; These clj-spdx APIs are *expensive*, so we paralellise them + (let [f-lic (future (sm/licenses-within-text s @lcis/license-ids-d)) + f-exc (future (sm/exceptions-within-text s @lcis/exception-ids-d)) + ids (set/union @f-lic @f-exc)] + (when ids + (manual-fixes (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-text-matching})) ids)))))) + +(defmethod text->ids java.io.Reader + [r] + (let [sw (java.io.StringWriter.)] + (io/copy r sw) + (text->ids (str sw)))) + +(defmethod text->ids java.io.InputStream + [is] + (text->ids (io/reader is))) + +(defmethod text->ids :default + [src] + (when src + (with-open [r (io/reader src)] + (text->ids r)))) + +(defn uri->ids + "Returns an expressions-map for the given license uri, or nil if no matches + are found." + [uri] + (when-not (s/blank? uri) + (lciei/prepend-source uri + (manual-fixes + (let [suri (lciu/simplify-uri uri)] + (or ; 1. Does the simplified URI match any of the simplified URIs in the SPDX license or exception lists? + (when-let [ids (get @lcis/index-uri-to-id-d suri)] + (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-listed-uri :source (list uri)})) ids))) + + ; 2. attempt to retrieve the text/plain contents of the uri and perform license text matching on it + (when-let [license-text (lcihttp/get-text uri)] + (when-let [ids (text->ids license-text)] + ids)))))))) + +(defn- string->ids-info + "Converts the given string (a fragment of a license name) into a sequence of + singleton expressions-info maps (one per expression), ordered in the same + order of appearance as they appear in s. + + If no listed SPDX license or exception identifiers are found in s, returns a + sequence containing a single expressions-info map with a lice-comb specific + 'unlisted' LicenseRef that encodes s." + [s] + (when-not (s/blank? s) + (let [s (s/trim s) + ids (or ; 1. Is it an SPDX license or exception id? + (when-let [id (get @lcis/spdx-ids-d (s/lower-case s))] + (if (= id s) + (list {id (list {:id id :type :declared :strategy :spdx-listed-identifier-exact-match :source (list s)})}) + (list {id (list {:id id :type :concluded :confidence :high :strategy :spdx-listed-identifier-case-insensitive-match :source (list s)})}))) + + ; 2. Is it the name of one or more SPDX licenses or exceptions? + (when-let [ids (get @lcis/index-name-to-id-d (s/lower-case s))] + (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-listed-name :source (list s)})) ids)) + + ; 3. Might it be a URI? (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central) + (when-let [ids (uri->ids s)] + (map #(hash-map (key %) (val %)) ids)) + + ; 4. Attempt regex name matching + (lcirm/matches s) + + ; 5. No clue, so return a single unlisted SPDX LicenseRef + (let [id (lcis/name->unlisted s)] + (list {id (list {:id id :type :concluded :confidence :low :strategy :unlisted :source (list s)})})))] + (map (partial lciei/prepend-source s) ids)))) + +(defn- filter-blanks + "Filter blank strings out of coll" + [coll] + (when (seq coll) + (seq (filter #(or (not (string? %)) (not (s/blank? %))) coll)))) + +(defn- map-split-and-interpose + "Maps over the given sequence, splitting strings using the given regex re and + interposing the given value int, returning a (flattened) sequence." + [re int coll] + (mapcat #(if-not (string? %) + [%] + (let [splits (s/split % re)] + (if (nil? int) + splits + (interpose int splits)))) + coll)) + +(defn split-on-operators + "Case insensitively splits a string based on license operators (and, + or, with), but only if they're not also part of a license name (e.g. + 'Common Development and Distribution License', 'GNU General Public + License version 2.0 or (at your option) any later version', etc.)." + [s] + (when-not (s/blank? s) + (->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b") + (map-split-and-interpose #"(?i)(\band\b|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and) + (map-split-and-interpose #"(?i)\bor\b(?!\s*(-?later|lator|newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?)))" :or) + (map-split-and-interpose #"(?i)\b(with\b|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with) + filter-blanks + (map #(if (string? %) (s/trim %) %))))) + +(def ^:private push conj) ; With lists-as-stacks conj == push + +(defn- calculate-confidence-for-expression + "Calculate the confidence for an expression, as the lowest confidence in the + expression-infos for the identifiers that make up the expression" + [expression-infos] + (if-let [confidence (lciei/lowest-confidence (filter identity (map :confidence expression-infos)))] + confidence + :high)) ; For when none of the components have a confidence (i.e. they're all :type :declared) + +(defn- process-expression-element + "Processes a single new expression element e (either a keyword representing + an SPDX operator, or a map representing an SPDX identifier) in the context of + stack (list) s." + [s e] + (if (keyword? e) + ; e is a keyword (SPDX operator): only push a keyword if the prior element was an id, or it's different to the prior keyword + (if (= (peek s) e) + s + (push s e)) + ; e is a singleton map with an SPDX identifier as a key: depending on how many keywords are currently at the top of s... + (case (count (take-while keyword? s)) + ; No keywords? Push e onto s + 0 (push s e) + + ; One keyword? See if we should "collapse" the prior value, the keyword and e into an SPDX expression fragment and push the result onto s + 1 (let [kw (peek s) + operator (s/upper-case (name kw)) + s-minus-1 (pop s) + prior (peek s-minus-1) + s-minus-2 (pop s-minus-1)] + (if (nil? prior) + (push s-minus-2 e) ; s had one keyword on it (which is invalid), so drop it and push e on + (if (or (not= :with kw) ; If the prior keyword was :and or :or, or :with and the current element is a listed exception id, build an SPDX expression fragment and push the result onto s + (se/listed-id? (first (keys e)))) + (let [k (s/join " " [(first (keys prior)) operator (first (keys e))]) + expression-infos (concat (first (vals prior)) (first (vals e))) + v (distinct (concat (list {:type :concluded :confidence (calculate-confidence-for-expression expression-infos) :strategy :expression-inference}) + expression-infos))] + (push s-minus-2 {k v})) + (push s-minus-1 e)))) ; We had a :with operator without a valid exception id following it, so simply drop the :with keyword from the stack and push the current element on + + ; Many keywords? That's invalid (since we dedupe them when they get pushed on, so this means they're different), so drop all of them and push e onto s + (push (drop-while keyword? s) e)))) + +(defn- build-expressions-info-map + "Builds an expressions-info map from the given sequence of keywords and SPDX + expression maps." + [l] + (loop [result '() + f (first l) + r (rest l)] + (if f + (recur (process-expression-element result f) (first r) (rest r)) + (manual-fixes (into {} result))))) + +(defn name->expressions-info + "Returns an expressions-info map for the given license name." + [name] + (when-not (s/blank? name) + (let [name (s/trim name)] + (lciei/prepend-source name + (or ; 1. Is it a cursed name? + (get @cursed-names-d name) + + ; 2. Construct an expressions-info map from the name + (some->> (split-on-operators name) + (drop-while keyword?) + (lc3/rdrop-while keyword?) + (map #(if (keyword? %) % (string->ids-info %))) + flatten + seq + build-expressions-info-map)))))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (lcis/init!) + (lcirm/init!) + (lcihttp/init!) + @cursed-names-d + nil) diff --git a/src/lice_comb/impl/regex_matching.clj b/src/lice_comb/impl/regex_matching.clj new file mode 100644 index 0000000..afbb302 --- /dev/null +++ b/src/lice_comb/impl/regex_matching.clj @@ -0,0 +1,407 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.regex-matching + "Helper functionality focused on regex matching. Note: this namespace is not + part of the public API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [medley.core :as med] + [dom-top.core :as dom] + [rencg.api :as rencg] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.utils :as lciu])) + +(defn- get-rencgs + "Get a value for an re-ncg, potentially looking at multiple ncgs in order + until a non-blank value is found. Also trims and lower-cases the value, and + replaces all whitespace with a single space." + ([m names] (get-rencgs m names nil)) + ([m names default] + (loop [f (first names) + r (rest names)] + (if f + (let [value (get m f)] + (if (s/blank? value) + (recur (first r) (rest r)) + (-> value + (s/trim) + (s/lower-case) + (s/replace #"\s+" " ")))) + default)))) + +(defn- assert-listed-id + "Checks that the id is a listed SPDX identifier (license or exception) and + throws if not. Returns the id." + [id] + (if (or (contains? @lcis/license-ids-d id) + (contains? @lcis/exception-ids-d id)) + id + (throw (ex-info (str "Invalid SPDX id constructed: '" id + "' - please raise an issue at " + "https://github.com/pmonks/lice-comb/issues/new?assignees=pmonks&labels=bug&template=Invalid_id_constructed.md&title=Invalid+SPDX+identifer+constructed:+" id) + {:id id})))) + +(defn- generic-id-constructor + "A generic SPDX id constructor which works for many simple regexes." + [m] + (when m + (let [version (get-rencgs m ["version"]) + confidence (if (s/blank? (:latest-ver m)) + :high ; We didn't need a version + (if (s/blank? version) + :low ; Version not provided at all + (if (and (:pad-ver? m) + (not (s/includes? version "."))) + :medium ; We got a partial version + :high))) ; We got a full version + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (and (:pad-ver? m) + (not (s/includes? version "."))) + (str version ".0") + version) + id (str (:id m) (when-not (s/blank? version) (str "-" version)))] + [(assert-listed-id id) confidence]))) + +(defn- number-name-to-number + "Converts the name of a number to that number (as a string). e.g. + \"two\" -> \"2\". Returns s unchanged if it's not a number name." + [^String s] + (when s + (case s + "two" "2" + "three" "3" + "four" "4" + s))) + +(defn- bsd-id-constructor + "An SPDX id constructor specific to the BSD family of licenses." + [m] + (let [clause-count1 (number-name-to-number (get-rencgs m ["clausecount1"])) + clause-count2 (number-name-to-number (get-rencgs m ["clausecount2"])) + preferred-clause-count (case [(lciu/is-digits? clause-count1) (lciu/is-digits? clause-count2)] + [true true] clause-count1 + [true false] clause-count1 + [false true] clause-count2 + (if (contains? #{"simplified" "new" "revised" "modified" "aduna"} clause-count1) + clause-count1 + clause-count2)) + [clause-count confidence] (case preferred-clause-count + ("2" "simplified") ["2" :high] + ("3" "new" "revised" "modified" "aduna") ["3" :high] + ("4" "original") ["4" :high] + ["4" :low]) ; Note: we default to 4 clause, since it was the original form of the BSD license + suffix (case (get-rencgs m ["suffix"]) + "patent" "Patent" + "views" "Views" + "attribution" "Attribution" + "clear" "Clear" + "lbnl" "LBNL" + "modification" "Modification" + ("no military license" "no military licence") "No-Military-License" + ("no nuclear license" "no nuclear licence") "No-Nuclear-License" + ("no nuclear license 2014" "no nuclear licence 2014") "No-Nuclear-License-2014" + "no nuclear warranty" "No-Nuclear-Warranty" + "open mpi" "Open-MPI" + "shortened" "Shortened" + "uc" "UC" + nil) + base-id (str (:id m) "-" clause-count "-Clause") + id-with-suffix (str base-id "-" suffix)] + (if (contains? @lcis/license-ids-d id-with-suffix) ; Not all suffixes are valid with all BSD clause counts, so check that it's valid before returning it + [id-with-suffix confidence] + [(assert-listed-id base-id) (if (= confidence :low) :low :medium)]))) ; The suffix we got wasn't valid, which knocks down confidence + +(defn- cc-id-constructor + "An SPDX id constructor specific to the Creative Commons family of licenses." + [m] + (let [nc? (not (s/blank? (get-rencgs m ["noncommercial"]))) + nd? (not (s/blank? (get-rencgs m ["noderivatives"]))) + sa? (not (s/blank? (get-rencgs m ["sharealike"]))) + version (get-rencgs m ["version"] "") + version (s/replace version #"\p{Punct}+" ".") + confidence (if (s/blank? version) + :low + (if (s/includes? version ".") + :high + :medium)) + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (s/includes? version ".") + version + (str version ".0")) + base-id (str "CC-BY-" + (when nc? "NC-") + (when nd? "ND-") + (when (and (not nd?) sa?) "SA-") ; SA and ND are incompatible (and have no SPDX id as a result), and if both are (erroneously) specified we conservatively choose ND + version) + region (case (get-rencgs m ["region"]) + "australia" "AU" + "austria" "AT" + ("england" "england and wales" "england & wales" "uk") "UK" + "france" "FR" + "germany" "DE" + "igo" "IGO" + "japan" "JP" + "netherlands" "NL" + ("united states" "usa" "us") "US" + nil) + id-with-region (str base-id (when-not (s/blank? region) (str "-" region)))] + (if (contains? @lcis/license-ids-d id-with-region) ; Not all license variants and versions have a region specific identifier, so check that it's valid before returning it + [id-with-region confidence] + [(assert-listed-id base-id) confidence]))) + +(defn- gpl-id-constructor + "An SPDX id constructor specific to the GNU family of licenses." + [m] + (let [variant (cond (contains? m "agpl") "AGPL" + (contains? m "lgpl") "LGPL" + (contains? m "gpl") "GPL") + version (get-rencgs m ["version"] "") + version (s/replace version #"\p{Punct}+" ".") + confidence (if (s/blank? version) + :low + (if (s/includes? version ".") + :high + :medium)) + version (if (s/blank? version) + (:latest-ver m) + version) + version (if (s/includes? version ".") + version + (str version ".0")) + suffix (if (contains? m "orLater") + "or-later" + "only") ; Note: we (conservatively) default to "only" when we don't have an explicit suffix + id (str variant "-" version "-" suffix)] + [(assert-listed-id id) confidence])) + +(defn- simple-regex-match + "Constructs a 'simple' name match structure that's a case-insensitive match + for s." + [s] + {:id s + :regex (re-pattern (str "(?i)\\b" (lciu/escape-re s) "\\b")) + :fn (constantly [s :medium])}) + +; The regex for the GNU family is a nightmare, so we build it up (and test it) in pieces +(def agpl-re #"(?AGPL|Affero)(\s+GNU)?(\s+General)?(\s+Public)?(\s+Licen[cs]e)?(\s+\(?AGPL\)?)?") +(def lgpl-re #"(?L\s?GPL|GNU\s+(Library|Lesser)|(Library|Lesser)\s+(L?GPL|General\s+Public\s+Licen[cs]e))(\s+or\s+Lesser)?(\s+General)?(\s+Pub?lic)?(\s+Licen[cs]e)?(\s+\(?LGPL\)?)?") +(def gpl-re #"(?GNU(?!\s+Classpath)|(?\d+([\._]\d+)?)?") +(def only-or-later-re #"[\s-]*((?only)|(\(?or(\s+\(?at\s+your\s+(option|discretion)\)?)?(\s+any)?)?([\s-]*(?later|lator|newer|\+)))?") +(def gnu-re (lciu/re-concat "(?x)(?i)\\b(\n# Alternative 1: AGPL\n" + agpl-re + "\n# Alternative 2: LGPL\n|" + lgpl-re + "\n# Alternative 3: GPL\n|" + gpl-re + "\n)\n# Version\n" + version-re + "\n# Only/or-Later suffix\n" + only-or-later-re)) + +; Regexes used for license name matching, along with functions for constructing an SPDX id and confidence metric from them +(def ^:private license-name-matching-d (delay + (concat + ; By default we add most SPDX ids as "simple" regex matches + (map simple-regex-match (disj @lcis/license-ids-d "MIT" "Zlib")) + (map simple-regex-match (disj @lcis/exception-ids-d "Classpath-exception-2.0")) + [ + {:id "AFL" + :regex #"(?i)\bAcademic(\s+Free)?(\s+Licen[cs]e)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "3.0"} + {:id "Apache" + :regex #"(?i)\b(ASL|Apache)(\s+Software)?(\s+Licen[cs]e(s)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?(?!.*acknowledgment\s+clause\s+removed)\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Artistic" + :regex #"(?i)\bArtistic\s+Licen[cs]e(\s*V(ersion)?)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Beerware" + :regex #"(?i)\bBeer-?ware\b" + :fn (constantly ["Beerware" :medium])} + {:id "BSL" + :regex #"(?i)\bBoost(\s+Software)?(\s+Licen[cs]e)?[\s,-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "BSD" + :regex #"(?i)\b(?\p{Alnum}+)?[\s,-]*(C(lause)?|Type)?\s*\bBSD[\s-]*\(?(Licen[cs]e|Type|C(lause)?)?[\s-]*(?\p{Alnum}+)?([\s-]+Clause)?(?\s+(Patent|Views|Attribution|Clear|LBNL|Modification|No\s+Military\s+Licen[cs]e|No\s+Nuclear\s+Licen[cs]e([\s-]+2014)?|No\s+Nuclear\s+Warranty|Open\s+MPI|Shortened|UC))?" + :fn bsd-id-constructor} + {:id "CC0" + :regex #"(?i)\bCC\s*0" + :fn (constantly ["CC0-1.0" :medium])} + {:id "CECILL" + :regex #"(?i)\bCeCILL(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?(\s+Agreement)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.1"} + {:id "Classpath-exception" + :regex #"(?i)\bClasspath[\s-]+exception(\s*V(ersion)?)?[\s-]*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "CDDL" + :regex #"(?i)(CDDL|Common\s+Development\s+(and|\&)?\s+Distribution\s+Licen[cs]e)(\s+\(?CDDL\)?)?[\s,-]*(\s*V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.1"} + {:id "CPL" + :regex #"(?i)Common\s+Public\s+Licen[cs]e[\s,-]*(\s*V(ersion)?)?(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.0"} + {:id "Creative commons family" + :regex #"(?i)(\bCC[\s-]BY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?Non\s*Commercial|NC)|(?No[\s-]*Deriv(ative)?s?|ND)|(?Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\s*(?Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?" + :fn cc-id-constructor + :pad-ver? true + :latest-ver "4.0"} + {:id "EPL" ; Eclipse Public License (EPL) - v 1.0 + :regex #"(?i)\b(EPL|Eclipse(\s+Public)?(\s+Licen?[cs]e)?)(\s*\(EPL\))?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" ; Note: optional "n" in "license" is because of a known typo + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "EUPL" + :regex #"(?i)\bEuropean\s+Union(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(\(?EUPL\)?)?[\s,-]*(V(ersion)?)?(\.)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.2"} + {:id "FreeBSD" + :regex #"(?i)\bFreeBSD\b" + :fn (constantly ["BSD-2-Clause-FreeBSD" :medium])} + {:id "GNU license family" + :regex gnu-re + :fn gpl-id-constructor + :pad-ver? true + :latest-ver 3.0} + {:id "Hippocratic" + :regex #"(?i)\bHippocratic\b" + :fn (constantly ["Hippocratic-2.1" :medium])} ; There are no other listed versions of this license + {:id "LLVM-exception" + :regex #"(?i)\bLLVM[\s-]+Exception\b" + :fn (constantly ["LLVM-exception" :medium])} + {:id "MIT" + :regex #"(?i)\b(MIT|Bouncy\s+Castle)(?![\s/]*(X11|ISC))(\s+Public)?(\s+Licen[cs]e)?\b" + :fn (constantly ["MIT" :medium])} + {:id "MPL" + :regex #"(?i)\b(MPL|Mozilla)(\s+Public)?(\s+Licen[cs]e)?[\s,-]*(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "MX4J" + :regex #"(?i)\bMX4J\s+Licen[cs]e(,?\s+v(ersion)?\s*1\.0)?\b" + :fn (constantly ["Apache-1.1" :medium])} ; See https://github.com/spdx/license-list-XML/pull/594 - the MX4J license *is* the Apache-1.1 license, according to SPDX + {:id "NASA" + :regex #"(?i)\bNASA(\s+Open)?(\s+Source)?(\s+Agreement)?[\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "1.3"} + {:id "Plexus" + :regex #"(?i)\bApache\s+Licen[cs]e(\s+but)?(\s+with)?(\s+the)?\s+acknowledgment\s+clause\s+removed\b" + :fn (constantly ["Plexus" :medium])} + {:id "Proprietary or commercial" + :regex #"(?i)\b(Propriet[aoe]ry|Commercial|All\s+Rights\s+Reserved|Private)\b" + :fn (constantly [(lcis/proprietary-commercial) :medium])} + {:id "Public Domain" + :regex #"(?i)\bPublic\s+Domain(?![\s\(]*CC\s*0)" + :fn (constantly [(lcis/public-domain) :medium])} + {:id "Ruby" + :regex #"(?i)\bRuby(\s+Licen[cs]e)?\b" + :fn (constantly ["Ruby" :medium])} + {:id "SGI-B" + :regex #"(?i)\bSGI(\s+Free)?(\s+Software)?(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?\d+(\.\d+)?)?)?\b" + :fn generic-id-constructor + :pad-ver? true + :latest-ver "2.0"} + {:id "Unlicense" + :regex #"(?i)\bUnlicen[cs]e\b" + :fn (constantly ["Unlicense" :medium])} + {:id "WTFPL" + :regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b" + :fn (constantly ["WTFPL" :medium])} + {:id "Zlib" + :regex #"\b(?i)zlib(?![\s/]+libpng)\b" + :fn (constantly ["Zlib" :medium])} + ]))) + +(defn- match + "If a match occured for the given regex element when tested against string s, + returns a map containing the following keys: + * :id The SPDX license or exception identifier that was determined + * :type The 'type' of match - will always have the value :concluded + * :confidence The confidence of the match: either :high, :medium, or :low + * :strategy The matching strategy - will always have the value :regex-matching + * :source A list of strings containing source information (specifically + the portion of the string s that matched this regex element) + *: start The start index of the given match within s + + Returns nil if there was no match." + [s elem] + (when-let [match (rencg/re-find-ncg (:regex elem) s)] + (let [[id confidence] ((:fn elem) (merge {:name s} elem match)) + source (s/trim (subs s (:start match) (:end match)))] + {:id id + :type :concluded + :confidence (if (= source id) :high confidence) + :strategy :regex-matching + :source (list source) + :start (:start match)}))) + +(defn matches + "Returns a sequence (NOT A SET!) of maps where each key is a SPDX license or + exception identifier (a String) that was found in s, and the value is a + sequence containing a single map describing how the identifier was determined. + The map contains these keys: + * :type The 'type' of match - will always have the value :concluded + * :confidence The confidence of the match: either :high, :medium, or :low + * :strategy The matching strategy - will always have the value :regex-matching + * :source A sequence of strings containing source information + (specifically the substring of s that matched this identifier) + + Results are in the order in which they appear in the string, and the function + returns nil if there were no matches." + [s] + (when-let [matches (seq (filter identity (dom/real-pmap (partial match s) @license-name-matching-d)))] + (some->> matches + (med/distinct-by :id) ;####TODO: THINK ABOUT MERGING INSTEAD OF DROPPING + (sort-by :start) + (map #(hash-map (:id %) (list {:id (:id %) ; We duplicate this here in case the result gets merged into an expression + :type (:type %) + :confidence (:confidence %) + :strategy (:strategy %) + :source (:source %)})))))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (lcis/init!) + @license-name-matching-d + nil) diff --git a/src/lice_comb/impl/spdx.clj b/src/lice_comb/impl/spdx.clj new file mode 100644 index 0000000..1407f95 --- /dev/null +++ b/src/lice_comb/impl/spdx.clj @@ -0,0 +1,139 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.spdx + "SPDX-related functionality. Note: this namespace is not part of the public + API of lice-comb and may change without notice." + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [spdx.expressions :as sexp] + [lice-comb.impl.utils :as lciu])) + +; The subset of SPDX license identifiers that we use; specifically excludes the superceded deprecated GPL family identifiers +(def license-ids-d + (delay + (disj (set (filter #(not (s/ends-with? % "+")) (sl/ids))) + "AGPL-1.0" "AGPL-3.0" "GPL-1.0" "GPL-2.0" "GPL-3.0" "LGPL-2.0" "LGPL-2.1" "LGPL-3.0" + "GPL-2.0-with-autoconf-exception" "GPL-2.0-with-bison-exception" "GPL-2.0-with-classpath-exception" + "GPL-2.0-with-font-exception" "GPL-2.0-with-GCC-exception" "GPL-3.0-with-autoconf-exception" + "GPL-3.0-with-GCC-exception"))) + +; The subset of SPDX exception identifiers that we use; right now this is all of them (placeholder in case we need to use a subset in future) +(def exception-ids-d (delay (se/ids))) + +; The license and exception lists +(def license-list-d (delay (map sl/id->info @license-ids-d))) +(def exception-list-d (delay (map se/id->info @exception-ids-d))) + +; The unlisted license refs lice-comb uses (note: the unlisted one usually has a hyphen then a base62 suffix appended) +(def ^:private public-domain-license-ref "LicenseRef-lice-comb-PUBLIC-DOMAIN") +(def ^:private proprietary-commercial-license-ref "LicenseRef-lice-comb-PROPRIETARY-COMMERCIAL") +(def ^:private unlisted-license-ref-prefix "LicenseRef-lice-comb-UNLISTED") + +; Lower case id map +(def spdx-ids-d (delay (merge (into {} (map #(vec [(s/lower-case %) %]) @license-ids-d)) + (into {} (map #(vec [(s/lower-case %) %]) @exception-ids-d))))) + +(defn- name-to-id-tuple + [list-entry] + [(s/lower-case (s/trim (:name list-entry))) (:id list-entry)]) + +(def index-name-to-id-d (delay (merge (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (map name-to-id-tuple @license-list-d))) + (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (map name-to-id-tuple @exception-list-d)))))) + +(defn- urls-to-id-tuples + "Extracts all urls for a given list (license or exception) entry." + [list-entry] + (let [id (:id list-entry) + simplified-uris (map lciu/simplify-uri (filter (complement s/blank?) (concat (:see-also list-entry) (get-in list-entry [:cross-refs :url]))))] + (map #(vec [% id]) simplified-uris))) + +(def index-uri-to-id-d (delay (merge (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @license-list-d))) + (lciu/mapfonv #(lciu/nset (map second %)) (group-by first (mapcat urls-to-id-tuples @exception-list-d)))))) + +(defn public-domain? + "Is the given id lice-comb's custom 'public domain' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case public-domain-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing public domain." + :arglists '([])} + public-domain + (constantly public-domain-license-ref)) + +(defn proprietary-commercial? + "Is the given id lice-comb's custom 'proprietary / commercial' LicenseRef?" + [id] + (= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref))) + +(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) + representing a proprietary / commercial license." + :arglists '([])} + proprietary-commercial + (constantly proprietary-commercial-license-ref)) + +(defn unlisted? + "Is the given id a lice-comb custom 'unlisted' LicenseRef?" + [id] + (when id + (s/starts-with? (s/lower-case id) (s/lower-case unlisted-license-ref-prefix)))) + +(defn name->unlisted + "Constructs a valid SPDX id (a LicenseRef specific to lice-comb) for an + unlisted license, with the given name appended as Base62 (since clj-spdx + identifiers are basically constrained to [A-Z][a-z][0-9] ie. Base62)." + [name] + (str unlisted-license-ref-prefix (when-not (s/blank? name) (str "-" (lciu/base62-encode name))))) + +(defn unlisted->name + "Get the original name of the given unlisted license. Returns nil if id is nil + or is not a lice-comb's unlisted LicenseRef." + [id] + (when (unlisted? id) + (str "Unlisted (" + (if (> (count id) (count unlisted-license-ref-prefix)) + (lciu/base62-decode (subs id (+ 2 (count unlisted-license-ref-prefix)))) + "-original name not available-") + ")"))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + ; Parallelise initialisation of the spdx.licenses and spdx.exceptions namespaces, as they're both sloooooooow (~1.5 mins total) + (let [sl-init (future (sl/init!)) + se-init (future (se/init!))] + @sl-init + @se-init) + (sexp/init!) + + ; Serially initialise this namespace's dependent state - they're all pretty fast (< 1s) + @license-ids-d + @exception-ids-d + @license-list-d + @exception-list-d + @spdx-ids-d + @index-uri-to-id-d + @index-name-to-id-d + nil) diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj new file mode 100644 index 0000000..4266011 --- /dev/null +++ b/src/lice_comb/impl/utils.clj @@ -0,0 +1,251 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.utils + "General purpose utility fns that I seem to end up needing in every single + project I write... Note: this namespace is not part of the public API of + lice-comb and may change without notice." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clj-base62.core :as base62])) + +(defn mapfonv + "Returns a new map where f has been applied to all of the values of m." + [f m] + (when m + (into {} + (for [[k v] m] + [k (f v)])))) + +(defn map-pad + "Like map, but when presented with multiple collections of different lengths, + 'pads out' the missing elements with nil rather than terminating early." + [f & cs] + (loop [result nil + firsts (map first cs) + rests (map rest cs)] + (if-not (seq (keep identity firsts)) + result + (recur (cons (apply f firsts) result) + (map first rests) + (map rest rests))))) + +(defn strim + "nil safe version of clojure.string/trim" + [^String s] + (when s (s/trim s))) + +(defn is-digits? + "Does the given string contains digits only?" + [^String s] + (boolean ; Eliminate nil-punning + (when-not (s/blank? s) + (every? #(Character/isDigit ^Character %) s)))) + +(defn nset + "nil preserving version of clojure.core/set" + [coll] + (some-> (seq coll) + set)) + +(defn escape-re + "Escapes the given string for use in a regex." + [s] + (when s + (s/escape s {\< "\\<" + \( "\\(" + \[ "\\[" + \{ "\\{" + \\ "\\\\" + \^ "\\^" + \- "\\-" + \= "\\=" + \$ "\\$" + \! "\\!" + \| "\\|" + \] "\\]" + \} "\\}" + \) "\\)" + \? "\\?" + \* "\\*" + \+ "\\+" + \. "\\." + \> "\\>" + }))) + +(defn re-concat + "Concatenate all of the given regexes or strings into a single regex." + [& res] + (re-pattern (apply str res))) + +(defn base62-encode + "Encodes the given string to Base62/UTF-8." + [^String s] + (when s + (base62/encode (.getBytes s (java.nio.charset.StandardCharsets/UTF_8))))) + +(defn base62-decode + "Decodes the given Base62/UTF-8 string." + [^String s] + (when s + (java.lang.String. ^bytes (base62/decode s) (java.nio.charset.StandardCharsets/UTF_8)))) + +(defn valid-http-uri? + "Returns true if given string is a valid HTTP or HTTPS URI." + [s] + ; Note: no nil check needed since the isValid method handles null sanely + (.isValid (org.apache.commons.validator.routines.UrlValidator. ^"[Ljava.lang.String;" (into-array String ["http" "https"])) s)) + +(defn simplify-uri + "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI) if + possible, returning a String. Returns nil if the input is nil or blank." + [uri] + (let [uri (str uri)] + (when-not (s/blank? uri) + (let [luri (s/lower-case (s/trim uri))] + (if (valid-http-uri? luri) + (-> luri + (s/replace #"\Ahttps?://(www\.)?" "http://") ; Normalise to http and strip any www. extension on hostname + (s/replace #"\.[\p{Alnum}]{3,}\z" "")) ; Strip file type extension (if any) + luri))))) + +(defn readable-dir? + "Is d (a String or File) a readable directory?" + [d] + (let [d (io/file d)] + (and d + (.exists d) + (.canRead d) + (.isDirectory d)))) + +(defmulti readable-file? + "Is f (a String, File, InputStream, or Reader) a readable file?" + type) + +(defmethod readable-file? nil + [_]) + +(defmethod readable-file? java.io.File + [^java.io.File f] + (and f + (.exists f) + (.canRead f) + (not (.isDirectory f)))) + +(defmethod readable-file? java.lang.String + [s] + (or (valid-http-uri? s) + (readable-file? (io/file s)))) + +(defmethod readable-file? java.io.InputStream + [_] + true) + +(defmethod readable-file? java.io.Reader + [_] + true) + +(defmethod readable-file? java.net.URL + [_] + true) + +(defmethod readable-file? java.net.URI + [_] + true) + +(defmulti filepath + "Returns the full path and name of the given file-like thing (String, File, + ZipEntry, URI, URL)." + type) + +(defmethod filepath nil + [_]) + +(defmethod filepath java.io.File + [^java.io.File f] + (.getPath f)) + +(defmethod filepath java.lang.String + [s] + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filepath (io/as-url s)) + (filepath (io/file s)))))) + +(defmethod filepath java.util.zip.ZipEntry + [^java.util.zip.ZipEntry ze] + (.getName ze)) + +(defmethod filepath java.net.URI + [^java.net.URI uri] + (str uri)) + +(defmethod filepath java.net.URL + [^java.net.URL url] + (str url)) + +(defmethod filepath java.io.InputStream + [_] + (throw (ex-info "Cannot determine filepath of an InputStream - did you forget to provide it separately?" {}))) + +(defmulti filename + "Returns just the name component of the given file-like thing (String, File, + ZipEntry, URI, URL), excluding any parents." + type) + +(defmethod filename nil + [_]) + +(defmethod filename java.io.File + [^java.io.File f] + (.getName f)) + +(defmethod filename java.lang.String + [s] + (when s + (let [s (s/trim s)] + (if (valid-http-uri? s) + (filename (io/as-url s)) + (filename (io/file s)))))) + +(defmethod filename java.util.zip.ZipEntry + [^java.util.zip.ZipEntry ze] + (filename (.getName ze))) + +(defmethod filename java.net.URI + [^java.net.URI uri] + (filename (.getPath uri))) + +(defmethod filename java.net.URL + [^java.net.URL url] + (filename (.getPath url))) + +(defmethod filename java.io.InputStream + [_] + (throw (ex-info "Cannot determine filename of an InputStream - did you forget to provide it separately?" {}))) + +(defn getenv + "Obtain the given environment variable, returning default (or nil, if default + is not provided) if it isn't set." + ([var] (getenv var nil)) + ([var default] + (let [val (System/getenv var)] + (if-not (s/blank? val) + val + default)))) diff --git a/src/lice_comb/lein.clj b/src/lice_comb/lein.clj new file mode 100644 index 0000000..edc56ca --- /dev/null +++ b/src/lice_comb/lein.clj @@ -0,0 +1,74 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.lein + "Functionality related to combing Leiningen dependency sequences for license + information." + (:require [dom-top.core :as dom] + [lice-comb.deps :as lcd] + [lice-comb.impl.expressions-info :as lciei])) + +(defn- lein-dep->toolsdeps-dep + "Converts a leiningen style dependency vector into a (partial) tools.deps style + dependency MapEntry. This is partial in that just enough of the tools.deps style + info map (in the value) is constructed for lice-comb.deps to function." + [[ga version :as dep]] + (when dep + [ga {:mvn/version version :deps/manifest :mvn}])) ;####TODO: Synthesise :paths key (for paths to JAR files) + +(defn dep->expressions-info + "Attempt to detect the SPDX license expression(s) (a map) in a Leiningen + style dep (a vector of the form `[groupId/artifactId \"version\"]`)." + [dep] + (when-let [toolsdep-dep (lein-dep->toolsdeps-dep dep)] + (lciei/prepend-source (pr-str dep) (lcd/dep->expressions-info toolsdep-dep)))) + +(defn dep->expressions + "Attempt to detect the SPDX license expression(s) (a set) in a Leiningen + style dep (a vector of the form `[groupId/artifactId \"version\"]`)." + [dep] + (some-> (dep->expressions-info dep) + keys + set)) + +(defn deps->expressions-info + "Attempt to detect all of the SPDX license expression(s) in a Leiningen style + dependency vector. The result is a map, where each entry in the map has a key + that is the Leiningen dep, and the value is the lice-comb expressions-info map + for that dep." + [deps] + (when deps + (into {} (dom/real-pmap #(vec [% (dep->expressions-info %)]) deps)))) + +(defn deps->expressions + "Attempt to detect all of the SPDX license expression(s) in a Leiningen style + dependency vector. The result is a map, where each entry in the map has a key + that is the Leiningen dep, and the value is the set of SPDX license + expression(s) for that dep." + [deps] + (when deps + (into {} (dom/real-pmap #(vec [% (dep->expressions %)]) deps)))) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcd/init!) + nil) diff --git a/src/lice_comb/matching.clj b/src/lice_comb/matching.clj new file mode 100644 index 0000000..97579bd --- /dev/null +++ b/src/lice_comb/matching.clj @@ -0,0 +1,224 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.matching + "The core matching functionality within lice-comb. Matching is provided for + three categories of input, and uses a different process for each: + 1. License names + 2. License uris + 3. License texts + + Each matching fn has two variants: + 1. A 'simple' version that returns a set of SPDX expressions (Strings) + 2. An 'info' version that returns an 'expressions-info map' + + An expressions-info map has this structure: + * key: an SPDX expression (String), which may be a single SPDX license + identifier) + * value: a sequence of 'expression-info' maps + + Each lice-comb expression-info map has this structure: + * :id (String, optional): + The SPDX identifier within the expression that this info map refers to. + * :type (either :declared or :concluded, mandatory): + Whether this identifier was unambiguously declared within the input or + was instead concluded by lice-comb (see SPDX specification for more detail + on the definition of these two terms). + * :confidence (one of: :high, :medium, :low, only provided when :type = :concluded): + Indicates the approximate confidence lice-comb has in its conclusions for + this particular SPDX identifier. + * :strategy (a keyword, mandatory): + The strategy lice-comb used to determine this particular SPDX identifier. + See the source for lice-comb.utils for an up-to-date list of all possible + values. + * :source (a sequence of Strings): + The list of sources used to arrive at this SPDX identifier, starting from + the most general (the input) to the most specific (the smallest subset of + the input that was used to make this determination)." + (:require [clojure.string :as s] + [spdx.licenses :as sl] + [spdx.exceptions :as se] + [spdx.expressions :as sexp] + [lice-comb.impl.spdx :as lcis] + [lice-comb.impl.matching :as lcim] + [lice-comb.impl.utils :as lciu])) + +(defn public-domain? + "Is the given id lice-comb's custom 'public domain' LicenseRef?" + [id] + (lcis/public-domain? id)) + +(defn proprietary-commercial? + "Is the given id lice-comb's custom 'proprietary / commercial' LicenseRef?" + [id] + (lcis/proprietary-commercial? id)) + +(defn unlisted? + "Is the given id a lice-comb custom 'unlisted' LicenseRef?" + [id] + (lcis/unlisted? id)) + +(defn id->name + "Returns the human readable name of the given license or exception identifier; + either the official SPDX license or exception name or, if the id is a + lice-comb specific LicenseRef, a lice-comb specific name. Returns the id + verbatim if unable to determine a name. Returns nil if the id is blank." + [id] + (when-not (s/blank? id) + (cond (sl/listed-id? id) (:name (sl/id->info id)) + (se/listed-id? id) (:name (se/id->info id)) + (public-domain? id) "Public domain" + (proprietary-commercial? id) "Proprietary/commercial" + (unlisted? id) (lcis/unlisted->name id) + :else id))) + +(defn text->ids-info + "Returns an expressions-info map for the given license text (a String, Reader, + InputStream, or something that is accepted by clojure.java.io/reader - File, + URL, URI, Socket, etc.), or nil if no expressions were found. + + Notes: + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames through clojure.java.io/file (or similar) first" + [text] + (lcim/text->ids text)) + +(defn text->ids + "Returns a set of SPDX expressions (Strings) for the given license text (a + String, Reader, InputStream, or something that is accepted by + clojure.java.io/reader - File, URL, URI, Socket, etc.), or nil if no + expressions were found. + + Notes: + * this function implements the SPDX matching guidelines (via clj-spdx). + See https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/ + * the caller is expected to open & close a Reader or InputStream passed to + this function (e.g. using clojure.core/with-open) + * you cannot pass a String representation of a filename to this method - you + should pass filenames through clojure.java.io/file (or similar) first" + [text] + (some-> (text->ids-info text) + keys + set)) + +(defn uri->ids-info + "Returns an exceptions-info map for the given license uri (a String, URL, or + URI). + + Notes: + * This is done + + + + Returns the SPDX license and/or exception identifiers (a map) for the given + uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions. + + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." + [uri] + (lcim/uri->ids uri)) + +(defn uri->ids + "Returns the SPDX license and/or exception identifiers (a set of Strings) for + the given uri, or nil if there aren't any. It does this via two steps: + 1. Seeing if the given URI is in the license or exception list, and returning + the ids of the associated licenses and/or exceptions if so + 2. Attempting to retrieve the plain text content of the given URI and + performing full SPDX license matching on the result if there was one + + Notes on step 1: + 1. this does not perform exact matching; rather it simplifies URIs in various + ways to avoid irrelevant differences, including performing a + case-insensitive comparison, ignoring protocol differences (http vs https), + ignoring extensions representing MIME types (.txt vs .html, etc.), etc. + See lice-comb.impl.utils/simplify-uri for exact details. + 2. URIs in the SPDX license and exception lists are not unique - the same URI + may represent multiple licenses and/or exceptions." + [uri] + (some-> (uri->ids-info uri) + keys + set)) + +(defn name->expressions-info + "Returns a lice-comb expressions-info map for the given 'license name' (a + String), or nil if there isn't one. This involves: + 1. Determining whether the name is a valid SPDX license expression, and if so + normalising it (see clj-spdx's spdx.expressions/normalise fn) + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids-info) + 3. attempting to construct one or more SPDX license expressions from the + name + + The keys in the maps are the detected SPDX license and exception identifiers, + and each value contains information about how that identifiers was determined." + [name] + (when-not (s/blank? name) + (let [name (s/trim name)] + ; 1. If it's a valid SPDX expression, return the normalised rendition of it in a set + (if-let [normalised-expression (sexp/normalise name)] + {normalised-expression (list {:type :declared :strategy :spdx-expression :source (list name)})} + ; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI) + (if (lciu/valid-http-uri? name) + (if-let [ids (uri->ids-info name)] + ids + {(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted + ; 3. Attempt to build SPDX expression(s) from the name + (lcim/name->expressions-info name)))))) + +(defn name->expressions + "Attempts to determine the SPDX license expression(s) (a set of Strings) from + the given 'license name' (a String), or nil if there aren't any. This involves: + 1. Determining whether the name is a valid SPDX license expression, and if so + normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it + 2. Checking if the name is actually a URI, and if so performing URL matching + on it (as per url->ids) + 3. attempting to construct one or more SPDX license expressions from the + name" + [name] + (some-> (name->expressions-info name) + keys + set)) + +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it. + + Note: this method has a substantial performance cost." + [] + (lcis/init!) + (lcim/init!) + nil) diff --git a/src/lice_comb/maven.clj b/src/lice_comb/maven.clj index efb3a47..857f1b1 100644 --- a/src/lice_comb/maven.clj +++ b/src/lice_comb/maven.clj @@ -17,92 +17,124 @@ ; (ns lice-comb.maven - "Maven related functionality, mostly related to POMs." - (:require [clojure.string :as s] - [clojure.java.io :as io] - [clojure.data.xml :as xml] - [clojure.tools.logging :as log] - [xml-in.core :as xi] - [clojure.java.shell :as sh] - [lice-comb.spdx :as spdx] - [lice-comb.utils :as u])) + "Functionality related to combing Maven POMs for license information." + (:require [clojure.string :as s] + [clojure.java.io :as io] + [clojure.data.xml :as xml] + [clojure.tools.logging :as log] + [xml-in.core :as xi] + [lice-comb.matching :as lcmtch] + [lice-comb.impl.matching :as lcim] + [lice-comb.impl.expressions-info :as lciei] + [lice-comb.impl.http :as lcihttp] + [lice-comb.impl.utils :as lciu])) -(def ^:private local-maven-repo - (try - (let [sh-result (sh/sh "mvn" "help:evaluate" "-Dexpression=settings.localRepository" "-q" "-DforceStdout")] - (if (= 0 (:exit sh-result)) - (s/trim (:out sh-result)) - (str (System/getProperty "user.home") "/.m2/repository"))) - (catch java.io.IOException _ - (str (System/getProperty "user.home") "/.m2/repository")))) +(defn- licenses-from-pair + "Attempts to determine the license(s) (a map) from a POM license name/URL + pair. Returns nil if no matches were found." + [{:keys [name url]}] + ; 1. Look in the name field(s) + (if-let [name-expressions (lciei/prepend-source "" (lcmtch/name->expressions-info name))] + name-expressions + ; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate) + (when-let [uri-ids (lciei/prepend-source "" (lcmtch/uri->ids-info url))] + uri-ids))) -(def ^:private remote-maven-repos #{"https://repo1.maven.org/maven2" "https://repo.clojars.org"}) +(xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") -(defn- uri-resolves? - "Does the given URI resolve (i.e. does the resource it points to exist)?" - [^java.net.URI uri] - (and uri - (let [http (doto ^java.net.HttpURLConnection (.openConnection (.toURL uri)) - (.setRequestMethod "HEAD"))] - (= 200 (.getResponseCode http))))) +(defn- xml-find-all-alts + "As for xi/find-all, but supports an alternative fallback set of tags (to + help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (seq (xi/find-all xml ks1))] + result + (seq (xi/find-all xml ks2)))) -(defn pom-uri-for-gav - "Attempts to locate the POM for the given GAV, which is a URI that may point to a file in the local Maven repository or a remote Maven repository (e.g. on Maven Central or Clojars)." - ([{:keys [group-id artifact-id version]}] (pom-uri-for-gav group-id artifact-id version)) - ([group-id artifact-id version] - (when (and (not (s/blank? group-id)) - (not (s/blank? artifact-id)) - (not (s/blank? version))) - (let [gav-path (str (s/replace group-id "." "/") "/" artifact-id "/" version "/" artifact-id "-" version ".pom") - local-pom (io/file (str local-maven-repo "/" gav-path))] - (if (and (.exists local-pom) - (.isFile local-pom)) - (.toURI local-pom) - (first (filter uri-resolves? (map #(java.net.URI. (str % "/" gav-path)) remote-maven-repos)))))))) +(defn- xml-find-first-string + "As for xi/find-first, but assumes the target is a single content tag (and + returns that, or nil if it's blank or the tag doesn't exist." + [xml ks] + (when-let [result (first (xi/find-first xml ks))] + (when-not (s/blank? result) + result))) -(defn- licenses-from-pair - "Attempts to determine the license(s) (a set) from a POM license name/URL pair." - [{:keys [name url]}] - (if-let [license (spdx/uri->id url)] - #{license} - (if-let [licenses (spdx/name->ids name)] - licenses - (when name #{(str "NON-SPDX-Unknown (" name ")")})))) ; Last resort - return the license name +(defn- xml-find-first-string-alts + "As for xml-find-first-string, but supports an alternative fallback set of + tags (to help with namespace messes in pom.xml files)." + [xml ks1 ks2] + (if-let [result (xml-find-first-string xml ks1)] + result + (xml-find-first-string xml ks2))) -(xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0") +(defmulti pom->expressions-info + "Returns an expressions-info map for the given POM file (an InputStream or + something that can have an io/input-stream opened on it), or nil if no + expressions were found. + + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." + {:arglists '([pom] [pom filepath])} + (fn [& args] (type (first args)))) + +; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - so we look for both namespaced and non-namespaced versions of all tags +(defmethod pom->expressions-info java.io.InputStream + [pom-is filepath] + (lciei/prepend-source filepath + (let [pom-xml (xml/parse pom-is)] + (if-let [pom-licenses (xml-find-all-alts pom-xml [::pom/project ::pom/licenses] [:project :licenses])] + ; block exists - process it + (let [name-uri-pairs (some->> pom-licenses + (filter map?) ; Get rid of non-tag content (whitespace etc.) + (filter #(or (= ::pom/license (:tag %)) (= :license (:tag %)))) ; Get rid of non tags (which shouldn't exist, but Maven POMs are a shitshow...) + (map #(identity (let [name (xml-find-first-string-alts % [::pom/license ::pom/name] [:license :name]) + url (xml-find-first-string-alts % [::pom/license ::pom/url] [:license :url])] + (when (or name url) + {:name name :url url})))) + set) + licenses (into {} (map licenses-from-pair name-uri-pairs))] + (lcim/manual-fixes licenses)) + ; License block doesn't exist, so attempt to lookup the parent pom and get it from there + (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) + parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) + parent-gav (merge {} + (when parent {:group-id (lciu/strim (first (xi/find-first parent [::pom/groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent [::pom/artifactId]))) + :version (lciu/strim (first (xi/find-first parent [::pom/version])))}) + (when parent-no-ns {:group-id (lciu/strim (first (xi/find-first parent-no-ns [:groupId]))) + :artifact-id (lciu/strim (first (xi/find-first parent-no-ns [:artifactId]))) + :version (lciu/strim (first (xi/find-first parent-no-ns [:version])))}))] + (when-not (empty? parent-gav) + (pom->expressions-info (lcihttp/gav->pom-uri parent-gav)))))))) ; Note: naive (stack consuming) recursion, which is fine here as pom hierarchies are rarely very deep + +(defmethod pom->expressions-info :default + ([pom] (pom->expressions-info pom (lciu/filepath pom))) + ([pom filepath] + (when pom + (with-open [pom-is (io/input-stream pom)] + (if-let [expressions (pom->expressions-info pom-is filepath)] + expressions + (log/info (str "'" filepath "'") "contains no license information")))))) -(defmulti pom->ids - "Attempt to detect the license(s) reported in a pom.xml file. pom may be a java.io.InputStream, or anything that can be opened by clojure.java.io/input-stream." - {:arglists '([pom])} - type) +(defn pom->expressions + "Returns a set of SPDX expressions (Strings) for the given POM file (an + InputStream or something that can have an io/input-stream opened on it), or + nil if no expressions were found. -(defmethod pom->ids java.io.InputStream - [pom-is] - (let [pom-xml (xml/parse pom-is) - licenses (seq (xi/find-all pom-xml [::pom/project ::pom/licenses ::pom/license])) - licenses-no-ns (seq (xi/find-all pom-xml [:project :licenses :license]))] ; Note: a few rare pom.xml files are missing the xmlns declation (e.g. software.amazon.ion/ion-java) - this case catches those - (if (or licenses licenses-no-ns) - ; Licenses block exists - process it - (let [name-uri-pairs (u/nset (concat (u/map-pad #(hash-map :name (u/strim %1) :url (u/strim %2)) (xi/find-all licenses [::pom/name]) (xi/find-all licenses [::pom/url])) - (u/map-pad #(hash-map :name (u/strim %1) :url (u/strim %2)) (xi/find-all licenses-no-ns [:name]) (xi/find-all licenses-no-ns [:url]))))] - (u/nset (mapcat licenses-from-pair name-uri-pairs))) - ; License block doesn't exist, so attempt to lookup the parent pom and get it from there - (let [parent (seq (xi/find-first pom-xml [::pom/project ::pom/parent])) - parent-no-ns (seq (xi/find-first pom-xml [:project :parent])) - parent-gav (merge {} - (when parent {:group-id (u/strim (first (xi/find-first parent [::pom/groupId]))) - :artifact-id (u/strim (first (xi/find-first parent [::pom/artifactId]))) - :version (u/strim (first (xi/find-first parent [::pom/version])))}) - (when parent-no-ns {:group-id (u/strim (first (xi/find-first parent-no-ns [:groupId]))) - :artifact-id (u/strim (first (xi/find-first parent-no-ns [:artifactId]))) - :version (u/strim (first (xi/find-first parent-no-ns [:version])))}))] - (when-not (empty? parent-gav) - (pom->ids (pom-uri-for-gav parent-gav))))))) ; Note: naive (stack consuming) recursion + If an InputStream is provided, it is the caller's responsibility to open and + close it, and a filepath associated with the InputStream *must* be provided as + the second parameter (it is optional for other types of input)." + ([pom] (pom->expressions pom (lciu/filepath pom))) + ([pom filepath] + (some-> (pom->expressions-info pom filepath) + keys + set))) -(defmethod pom->ids :default - [pom] - (when pom - (with-open [pom-is (io/input-stream pom)] - (if-let [pom-licenses (pom->ids pom-is)] - pom-licenses - (log/info (str "'" pom "'") "contains no license information"))))) +(defn init! + "Initialises this namespace upon first call (and does nothing on subsequent + calls), returning nil. Consumers of this namespace are not required to call + this fn, as initialisation will occur implicitly anyway; it is provided to + allow explicit control of the cost of initialisation to callers who need it." + [] + (lcmtch/init!) + nil) diff --git a/src/lice_comb/spdx.clj b/src/lice_comb/spdx.clj deleted file mode 100644 index a20065b..0000000 --- a/src/lice_comb/spdx.clj +++ /dev/null @@ -1,140 +0,0 @@ -; -; Copyright © 2021 Peter Monks -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; SPDX-License-Identifier: Apache-2.0 -; - -(ns lice-comb.spdx - "SPDX related functionality." - (:require [clojure.string :as s] - [clojure.java.io :as io] - [clojure.reflect :as cr] - [clojure.edn :as edn] - [clojure.tools.logging :as log] - [cheshire.core :as json] - [lice-comb.data :as d] - [lice-comb.utils :as u])) - -(def ^:private spdx-license-list-uri "https://spdx.org/licenses/licenses.json") -(def ^:private spdx-license-list (try - (json/parse-string (slurp spdx-license-list-uri) u/clojurise-json-key) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " spdx-license-list-uri ". Please check your internet connection and try again.") {} e))))) - -(def ^:private aliases-uri (d/uri-for-data "/spdx/aliases.edn")) -(def ^:private aliases (try - (edn/read-string (slurp aliases-uri)) - (catch Exception e - (throw (ex-info (str "Unexpected " (cr/typename (type e)) " while reading " aliases-uri ". Please check your internet connection and try again.") {} e))))) - -(def license-list-version - "The version of the license list in use." - (:license-list-version spdx-license-list)) - -(def license-list - "The SPDX license list." - (:licenses spdx-license-list)) - -; Alternative indexes into the SPDX list -(def ^:private idx-id-to-info (into {} (map #(vec [(:license-id %) %]) license-list))) -(def ^:private idx-lname-to-id (apply merge (map #(hash-map (s/trim (s/lower-case (:name %))) (:license-id %)) license-list))) -(def ^:private idx-uri-to-id (into {} (mapcat (fn [lic] (map #(vec [(u/simplify-uri %) (:license-id lic)]) (:see-also lic))) license-list))) -(def ^:private idx-regex-to-id (merge aliases - (apply merge (map #(hash-map (s/replace (u/escape-re (s/lower-case (:name %))) #"\s+" "\\\\s+") #{(:license-id %)}) license-list)))) - -; Store regexes in reverse size order, on the assumption that longer regexes are more specific and should be processed first -; Note: `regexes` actually contains string representations, since regexes in Clojure don't implement equality / hash 🙄 -(def ^:private regexes (reverse (sort-by #(count %) (concat (keys idx-regex-to-id) (keys idx-regex-to-id))))) -(def ^:private re-pattern-mem (memoize re-pattern)) ; So we memomize re-pattern to save having to recompile the regex string representations every time we use them - -(def ids - "All SPDX license identifiers in the list." - (keys idx-id-to-info)) - -(defn id->info - "Returns the SPDX license information for the given SPDX license identifier, or nil if unable to do so." - [spdx-id] - (when spdx-id - (get idx-id-to-info spdx-id))) - -(defn id->spdx-name - "Returns the official license name for the given SPDX id, or nil if unable to do so." - [spdx-id] - (when spdx-id - (:name (id->info spdx-id)))) - -(defn spdx-name->id - "Returns the SPDX license identifier equivalent of the given license name (matched case insensitively), or nil if unable to do so." - [name] - (when name - (get idx-lname-to-id (s/trim (s/lower-case name))))) - -(defn uri->id - "Returns the SPDX license identifier equivalent for the given uri, or nil if unable to do so. - - Notes: - 1. this does not perform exact matching; rather it checks whether the given uri matches the start of any of the known license uris. - 2. uris in the SPDX license list are not unique to a license (approximately 70 out of 600 are duplicates)" - [uri] - (when uri - (let [simplified-uri (u/simplify-uri uri) - uri-match (first (filter (partial s/starts-with? simplified-uri) (keys idx-uri-to-id)))] - (get idx-uri-to-id uri-match)))) - -(defn spdx-id? - "Is the given identifier an SPDX identifier?" - [id] - (when id - (not (s/starts-with? id "NON-SPDX")))) - -(defn id->name - "Returns the license name of the given id; either the official SPDX name or (if the id is not an SPDX id) an unofficial name. Returns the id as-is if unable to determine its name." - [id] - (if (spdx-id? id) - (id->spdx-name id) - (case id - "NON-SPDX-Public-Domain" "Public domain" - id))) - -(defn name->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given license name (a string). Returns nil if unable to do so." - [name] - (when (not (s/blank? name)) - (let [name (s/trim name)] - (if-let [exact-id-match (id->info name)] ; First we exact match on the id, for those cases where someone has used the SPDX id as the name (e.g. in a pom.xml file) - #{(:license-id exact-id-match)} - (if-let [exact-name-match (spdx-name->id name)] ; Then we exact match on the name (albeit case-insensitively) - #{exact-name-match} - (if-let [re-name-match (get idx-regex-to-id (first (filter #(re-find (re-pattern-mem %) (s/lower-case name)) regexes)))] ; Then the last resort is to match on the regexes - re-name-match - (log/warn "Unable to find a license for" (str "'" name "'")))))))) - -(defmulti text->ids - "Attempts to determine the SPDX license identifier(s) (a set) from the given license text (an InputStream, or something that can have an io/input-stream opened on it)." - {:arglists '([text])} - type) - -; TODO: https://github.com/pmonks/lice-comb/issues/3 -(defmethod text->ids java.io.InputStream - [is] - (let [rdr (io/reader is) ; Note: we don't wrap this in "with-open", since the input-stream we're handed is closed by the calling fn - first-lines (s/trim (s/join " " (take 2 (remove s/blank? (map s/trim (line-seq rdr))))))] ; Take the first two non-blank lines, since many licenses put the name on line 1, and the version on line 2 - (name->ids first-lines))) - -(defmethod text->ids :default - [src] - (when src - (with-open [is (io/input-stream src)] - (text->ids is)))) diff --git a/src/lice_comb/utils.clj b/src/lice_comb/utils.clj index 65cc648..8736849 100644 --- a/src/lice_comb/utils.clj +++ b/src/lice_comb/utils.clj @@ -1,5 +1,5 @@ ; -; Copyright © 2021 Peter Monks +; Copyright © 2023 Peter Monks ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. @@ -17,126 +17,71 @@ ; (ns lice-comb.utils - "General purpose utility fns that I seem to end up needing in every single project I write..." - (:require [clojure.string :as s] - [clojure.java.io :as io])) - -(defn clojurise-json-key - "Converts JSON-style string keys (e.g. \"fullName\") to Clojure keyword keys (e.g. :full-name)." - [k] - (when k - (keyword - (s/replace - (s/join "-" - (map s/lower-case - (s/split k #"(? "\\>" - }))) - -(defn simplify-uri - "Simplifies a URI (which can be a string, java.net.URL, or java.net.URI). Returns a string." - [uri] - (when uri - (s/replace (s/replace (s/lower-case (s/trim (str uri))) - "https://" "http://") - "://www." "://"))) - -(defmulti filename - "Returns just the name component of the given file or path string, excluding any parents." - type) - -(defmethod filename nil - [_]) - -(defmethod filename java.io.File - [^java.io.File f] - (.getName f)) - -(defmethod filename java.lang.String - [s] - (filename (io/file s))) - -(defmethod filename java.util.zip.ZipEntry - [^java.util.zip.ZipEntry ze] - (filename (.getName ze))) ; Note that Zip Entry names include the entire path - -(defmethod filename java.net.URI - [^java.net.URI uri] - (filename (.getPath uri))) - -(defmethod filename java.net.URL - [^java.net.URL url] - (filename (.getPath url))) - -(defn getenv - "Obtain the given environment variable, returning default (or nil, if default is not provided) if it isn't set." - ([var] (getenv var nil)) - ([var default] - (let [val (System/getenv var)] - (if-not (s/blank? val) - val - default)))) + "Miscellaneous functionality." + (:require [clojure.string :as s])) + +(def ^:private strategy->string { + :spdx-expression "SPDX expression" + :spdx-listed-identifier-exact-match "SPDX identifier" + :spdx-listed-identifier-case-insensitive-match "SPDX identifier (case insensitive match)" + :spdx-text-matching "SPDX license text matching" + :spdx-listed-name "SPDX listed name (case insensitive match)" + :spdx-listed-uri "SPDX listed URI (relaxed matching)" + :expression-inference "inferred SPDX expression" + :regex-matching "regular expression matching" + :unlisted "fallback to unlisted LicenseRef" + :manual-verification "manual verification"}) + +(defn- expression-info-keyfn + "sort-by keyfn for lice-comb info maps" + [metadata] + (str (case (:id metadata) + nil "0" + "1") + "-" + (case (:type metadata) + :declared "0" + :concluded "1") + "-" + (case (:confidence metadata) + nil "0" + :high "1" + :medium "2" + :low "3") + "-" + (case (:strategy metadata) + :spdx-expression "0" + :spdx-listed-identifier-exact-match "1" + :spdx-listed-identifier-case-insensitive-match "2" + :spdx-text-matching "3" + :spdx-listed-name "4" + :spdx-listed-uri "5" + :expression-inference "6" + :regex-matching "7" + :unlisted "8" + :manual-verification "9"))) + +(defn- expression-info->string + "Converts the given expression-info map into a human-readable string, using + the information in license-info map m." + [m id] + (str id ":\n" + (when-let [info-list (sort-by expression-info-keyfn (seq (get m id)))] + (s/join "\n" (map #(str " " + (when-let [md-id (:id %)] (when (not= id md-id) (str md-id " "))) + (case (:type %) + :declared "Declared" + :concluded "Concluded") + (when-let [confidence (:confidence %)] (str "\n Confidence: " (name confidence))) + (when-let [strategy (:strategy %)] (str "\n Strategy: " (get strategy->string strategy (name strategy)))) + (when-let [source (seq (:source %))] (str "\n Source:\n > " (s/join "\n > " source)))) + info-list))))) + +(defn expressions-info->string + "Converts the given expressions-info map into a human-readable string. This + function is mostly intended for debugging / developer discovery purposes, and + the content and format of the output may change without warning." + [m] + (when m + (let [ids (sort (keys m))] + (s/join "\n\n" (map (partial expression-info->string m) ids))))) diff --git a/test/lice_comb/data/complex.pom b/test/lice_comb/data/complex.pom new file mode 100644 index 0000000..a317375 --- /dev/null +++ b/test/lice_comb/data/complex.pom @@ -0,0 +1,21 @@ + + + + + Apache License 2.0 + http://www.apache.org/licenses/LICENSE-2.0.html + + + mit license + + + gpl 2.0 with classpath exception + + + https://opensource.org/licenses/BSD-3-Clause + + + Unlicense AND CC0-1.0 + + + diff --git a/test/lice_comb/data/pom-in-a-zip.zip b/test/lice_comb/data/pom-in-a-zip.zip new file mode 100644 index 0000000..2ed5086 Binary files /dev/null and b/test/lice_comb/data/pom-in-a-zip.zip differ diff --git a/test/lice_comb/deps_test.clj b/test/lice_comb/deps_test.clj index 6142b75..d550ba9 100644 --- a/test/lice_comb/deps_test.clj +++ b/test/lice_comb/deps_test.clj @@ -18,8 +18,9 @@ (ns lice-comb.deps-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.deps :refer [dep->ids deps-licenses]])) + [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.deps :refer [dep->expressions deps-expressions]])) (use-fixtures :once fixture) @@ -27,129 +28,130 @@ (deftest dep->ids-tests (testing "Nil deps" - (is (nil? (dep->ids nil)))) + (is (nil? (dep->expressions nil)))) (testing "Unknown dep types" - (is (thrown? clojure.lang.ExceptionInfo (dep->ids ['com.github.pmonks/lice-comb {:deps/manifest :invalid :mvn/version "1.0.0"}])))) + (is (thrown? clojure.lang.ExceptionInfo (dep->expressions ['com.github.pmonks/lice-comb {:deps/manifest :invalid :mvn/version "1.0.0"}])))) (testing "Invalid deps" - (is (nil? (dep->ids ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA - (is (nil? (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V + (is (nil? (dep->expressions ['com.github.pmonks/invalid-project {:deps/manifest :mvn :mvn/version "0.0.1"}]))) ; Invalid GA + (is (nil? (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.0.0-SNAPSHOT"}])))) ; Invalid V (testing "Valid deps - single license" - (is (= #{"Apache-2.0"} (dep->ids ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) - (is (= #{"NON-SPDX-Public-Domain"} (dep->ids ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) - (is (= #{"CDDL-1.0"} (dep->ids ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) - (is (= #{"CC0-1.0"} (dep->ids ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"GPL-3.0"} (dep->ids ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"MIT"} (dep->ids ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) - (is (= #{"MIT"} (dep->ids ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) - (is (= #{"Plexus"} (dep->ids ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"BSD-3-Clause"} (dep->ids ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"MIT"} (dep->ids ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) - (is (= #{"NON-SPDX-Public-Domain"} (dep->ids ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) - (is (= #{"Apache-2.0"} (dep->ids ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) - (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" - (is (= #{"EPL-1.0"} (dep->ids ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) - (is (= #{"EPL-1.0"} (dep->ids ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat {:deps/manifest :mvn :mvn/version "1.0.12"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}]))) + (is (valid= #{"BSD-4-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "5.2"}]))) + (is (valid= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-core {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-kms {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-s3 {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.amazonaws/aws-java-sdk-sts {:deps/manifest :mvn :mvn/version "1.12.129"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-cbor {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.fasterxml.jackson.dataformat/jackson-dataformat-smile {:deps/manifest :mvn :mvn/version "2.13.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.google.guava/guava {:deps/manifest :mvn :mvn/version "31.0.1-jre"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-api {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-mock {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-noop {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['io.opentracing/opentracing-util {:deps/manifest :mvn :mvn/version "0.33.0"}]))) + (is (valid= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation {:deps/manifest :mvn :mvn/version "1.1.1"}]))) + (is (valid= #{"CDDL-1.0"} (dep->expressions ['javax.annotation/jsr250-api {:deps/manifest :mvn :mvn/version "1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['javax.enterprise/cdi-api {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['javax.inject/javax.inject {:deps/manifest :mvn :mvn/version "1"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['junit/junit {:deps/manifest :mvn :mvn/version "4.13.2"}]))) + (is (valid= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa {:deps/manifest :mvn :mvn/version "0.3.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['net.jpountz.lz4/lz4 {:deps/manifest :mvn :mvn/version "1.3.0"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-application {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bdiv3 {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-component {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-micro {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-microservice {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-kernel-model-bpmn {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-platform-bridge {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-rules-eca {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-binary {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-json {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-traverser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-serialization-xml {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-base {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-relay {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-tcp {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-transport-websocket {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-bytecode {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-commons {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-concurrent {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-gui {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-javaparser {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-nativetools {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-util-security {:deps/manifest :mvn :mvn/version "4.0.250"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcpkix-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcprov-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.bouncycastle/bcutil-jdk15on {:deps/manifest :mvn :mvn/version "1.70"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/core.async {:deps/manifest :mvn :mvn/version "1.5.648"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.codec {:deps/manifest :mvn :mvn/version "0.1.1"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.json {:deps/manifest :mvn :mvn/version "2.4.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.priority-map {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.0.8"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/data.zip {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/java.classpath {:deps/manifest :mvn :mvn/version "1.0.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer {:deps/manifest :mvn :mvn/version "1.1.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.analyzer.jvm {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.cli {:deps/manifest :mvn :mvn/version "1.0.206"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.deps.alpha {:deps/manifest :mvn :mvn/version "0.12.1090"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.gitlibs {:deps/manifest :mvn :mvn/version "2.4.172"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.logging {:deps/manifest :mvn :mvn/version "1.2.2"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.clojure/tools.namespace {:deps/manifest :mvn :mvn/version "1.2.0"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.codehaus.mojo/animal-sniffer-annotations {:deps/manifest :mvn :mvn/version "1.20"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-classworlds {:deps/manifest :mvn :mvn/version "2.6.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-component-annotations {:deps/manifest :mvn :mvn/version "2.1.0"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-interpolation {:deps/manifest :mvn :mvn/version "1.26"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.codehaus.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "2.0"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.inject {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (valid= #{"EPL-1.0"} (dep->expressions ['org.eclipse.sisu/org.eclipse.sisu.plexus {:deps/manifest :mvn :mvn/version "0.3.5"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.hamcrest/hamcrest-core {:deps/manifest :mvn :mvn/version "2.2"}]))) + (is (valid= #{"Plexus"} (dep->expressions ['org.jdom/jdom2 {:deps/manifest :mvn :mvn/version "2.0.6.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.nanohttpd/nanohttpd-websocket {:deps/manifest :mvn :mvn/version "2.3.1"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-analysis {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-tree {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"BSD-3-Clause"} (dep->expressions ['org.ow2.asm/asm-util {:deps/manifest :mvn :mvn/version "9.2"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/jul-to-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.slf4j/log4j-over-slf4j {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-api {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"MIT"} (dep->expressions ['org.slf4j/slf4j-nop {:deps/manifest :mvn :mvn/version "1.7.32"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-cipher {:deps/manifest :mvn :mvn/version "1.7"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.sonatype.plexus/plexus-sec-dispatcher {:deps/manifest :mvn :mvn/version "1.4"}]))) + (is (valid= #{(lcis/public-domain)} (dep->expressions ['org.tukaani/xz {:deps/manifest :mvn :mvn/version "1.9"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['org.xerial.snappy/snappy-java {:deps/manifest :mvn :mvn/version "1.1.8.4"}]))) + (is (valid= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java {:deps/manifest :mvn :mvn/version "1.0.0"}])))) + (testing "Valid deps - no licenses in deployed artifacts" + (is (nil? (dep->expressions ['slipset/deps-deploy {:deps/manifest :mvn :mvn/version "0.2.0"}]))) + (is (nil? (dep->expressions ['borkdude/sci.impl.reflector {:deps/manifest :mvn :mvn/version "0.0.1"}])))) (testing "Valid deps - multi license" - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->ids ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"EPL-1.0" "LGPL-2.1"} (dep->ids ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) - (is (= #{"CDDL-1.1" "GPL-2.0-with-classpath-exception"} (dep->ids ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) - (is (= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->ids ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (dep->ids ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) + (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-classic {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-core {:deps/manifest :mvn :mvn/version "1.2.7"}]))) + (is (valid= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} (dep->expressions ['javax.mail/mail {:deps/manifest :mvn :mvn/version "1.4.7"}]))) + (is (valid= #{"Apache-2.0" "LGPL-2.1-or-later"} (dep->expressions ['net.java.dev.jna/jna-platform {:deps/manifest :mvn :mvn/version "5.10.0"}]))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (dep->expressions ['org.checkerframework/checker-compat-qual {:deps/manifest :mvn :mvn/version "2.5.5"}])))) (testing "Valid deps - Maven classifiers" -; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->ids ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (dep->ids ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (valid= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}]))))) -(deftest deps-licenses-test +(comment +(deftest deps-expressions-test (testing "Nil and empty deps" - (is (nil? (deps-licenses nil))) - (is (= {} (deps-licenses {})))) + (is (nil? (deps-expressions nil))) + (is (= {} (deps-expressions {})))) (testing "Single deps" - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) - (is (= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-licenses {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run -; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}})))))) ; Blocked on https://github.com/jnr/jffi/issues/141 - (is (= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) - (is (= (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi)) - (:lice-comb/licenses (get (deps-licenses {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native))))) + (is (valid= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"}}) 'org.clojure/clojure)))) + (is (valid= #{"EPL-1.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}) 'com.github.athos/clj-check)))) ; Note: we use this git dep, as it's used earlier in the build, so we can be sure it's been downloaded before this test is run +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}})))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (valid= #{"Apache-2.0"} (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native)))) + (is (= (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi)) + (:lice-comb/licenses (get (deps-expressions {'com.github.jnr/jffi$native {:deps/manifest :mvn :mvn/version "1.3.11"}}) 'com.github.jnr/jffi$native))))) (testing "Multiple deps" (is (= {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3" :lice-comb/licenses #{"EPL-1.0"}} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194" :lice-comb/licenses #{"EPL-1.0"}} @@ -166,7 +168,7 @@ 'clj-xml-validation/clj-xml-validation {:deps/manifest :mvn :mvn/version "1.0.2" :lice-comb/licenses #{"EPL-1.0"}} 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2" :lice-comb/licenses #{"EPL-1.0"}} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1" :lice-comb/licenses #{"EPL-1.0"}}} - (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} + (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194"} 'org.clojure/core.specs.alpha {:deps/manifest :mvn :mvn/version "0.2.56"} 'org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.2.0-alpha6"} @@ -191,7 +193,7 @@ 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2" :lice-comb/licenses #{"EPL-1.0"}} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1" :lice-comb/licenses #{"EPL-1.0"}} 'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check") :lice-comb/licenses #{"EPL-1.0"}}} - (deps-licenses {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} + (deps-expressions {'org.clojure/clojure {:deps/manifest :mvn :mvn/version "1.10.3"} 'org.clojure/spec.alpha {:deps/manifest :mvn :mvn/version "0.2.194"} 'org.clojure/core.specs.alpha {:deps/manifest :mvn :mvn/version "0.2.56"} 'org.clojure/data.xml {:deps/manifest :mvn :mvn/version "0.2.0-alpha6"} @@ -201,3 +203,4 @@ 'camel-snake-kebab/camel-snake-kebab {:deps/manifest :mvn :mvn/version "0.4.2"} 'tolitius/xml-in {:deps/manifest :mvn :mvn/version "0.1.1"} 'com.github.athos/clj-check {:deps/manifest :deps :deps/root (str gitlib-dir "/com.github.athos/clj-check")}}))))) +) \ No newline at end of file diff --git a/test/lice_comb/files_test.clj b/test/lice_comb/files_test.clj index f425500..260e22a 100644 --- a/test/lice_comb/files_test.clj +++ b/test/lice_comb/files_test.clj @@ -19,13 +19,17 @@ (ns lice-comb.files-test (:require [clojure.test :refer [deftest testing is use-fixtures]] [clojure.java.io :as io] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.files :refer [probable-license-file? probable-license-files file->ids dir->ids zip->ids]])) + [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.files :refer [init! probable-license-file? probable-license-files file->expressions dir->expressions zip->expressions]])) (use-fixtures :once fixture) (def test-data-path "./test/lice_comb/data") +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + (deftest probable-license-file?-tests (testing "Nil, empty or blank names" (is (= false (probable-license-file? nil))) @@ -52,84 +56,88 @@ (is (= false (probable-license-file? "pm.xml")))) (testing "Filenames including paths" (is (= true (probable-license-file? "/path/to/a/project/containing/a/pom.xml"))) - (is (= false (probable-license-file? "/a/different/path/to/some/NOTICES"))))) + (is (= false (probable-license-file? "/a/different/path/to/some/NOTICES"))) + (is (= true (probable-license-file? "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom"))))) (deftest probable-license-files-tests (testing "Nil, empty, or blank directory" - (is (nil? (probable-license-files nil))) - (is (thrown? java.io.FileNotFoundException (probable-license-files ""))) - (is (thrown? java.io.FileNotFoundException (probable-license-files " "))) - (is (thrown? java.io.FileNotFoundException (probable-license-files "\n"))) - (is (thrown? java.io.FileNotFoundException (probable-license-files "\t")))) + (is (nil? (probable-license-files nil))) + (is (nil? (probable-license-files ""))) + (is (nil? (probable-license-files " "))) + (is (nil? (probable-license-files "\n"))) + (is (nil? (probable-license-files "\t")))) + (testing "Doesn't exist" + (is (nil? (probable-license-files "THIS_DIRECTORY_DOESNT_EXIST")))) (testing "Not a directory" - (is (thrown? java.nio.file.NotDirectoryException (probable-license-files "deps.edn")))) + (is (nil? (probable-license-files "deps.edn")))) (testing "A real directory" (is (= #{(io/file (str test-data-path "/asf-cat-1.0.12.pom")) (io/file (str test-data-path "/with-parent.pom")) (io/file (str test-data-path "/no-xml-ns.pom")) (io/file (str test-data-path "/simple.pom")) + (io/file (str test-data-path "/complex.pom")) (io/file (str test-data-path "/CC-BY-4.0/LICENSE")) (io/file (str test-data-path "/MPL-2.0/LICENSE"))} (probable-license-files test-data-path))))) -(deftest file->ids-tests +(deftest file->expressions-tests (testing "Nil, empty, or blank filename" - (is (nil? (file->ids nil))) - (is (thrown? java.io.FileNotFoundException (file->ids ""))) - (is (thrown? java.io.FileNotFoundException (file->ids " "))) - (is (thrown? java.io.FileNotFoundException (file->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (file->ids "\t")))) + (is (nil? (file->expressions nil))) + (is (nil? (file->expressions ""))) + (is (nil? (file->expressions " "))) + (is (nil? (file->expressions "\n"))) + (is (nil? (file->expressions "\t")))) (testing "Non-existent files" - (is (thrown? java.io.FileNotFoundException (file->ids "this_file_does_not_exist")))) - (testing "License files" -; (is (= #{"Apache-1.0"} (file->ids "https://www.apache.org/licenses/LICENSE-1.0"))) ; Note: this page incorrectly lists itself as Apache 1.1 - (is (= #{"CC-BY-4.0"} (file->ids (str test-data-path "/CC-BY-4.0/LICENSE")))) - (is (= #{"MPL-2.0"} (file->ids (str test-data-path "/MPL-2.0/LICENSE")))) - (is (= #{"Apache-1.1"} (file->ids "https://www.apache.org/licenses/LICENSE-1.1"))) - (is (= #{"Apache-2.0"} (file->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) - (is (= #{"EPL-1.0"} (file->ids "https://www.eclipse.org/org/documents/epl-1.0/EPL-1.0.txt"))) - (is (= #{"EPL-2.0"} (file->ids "https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt"))) - (is (= #{"CDDL-1.0"} (file->ids "https://spdx.org/licenses/CDDL-1.0.txt"))) - (is (= #{"CDDL-1.1"} (file->ids "https://spdx.org/licenses/CDDL-1.1.txt"))) - (is (= #{"GPL-1.0"} (file->ids "https://www.gnu.org/licenses/gpl-1.0.txt"))) - (is (= #{"GPL-2.0"} (file->ids "https://www.gnu.org/licenses/gpl-2.0.txt"))) - (is (= #{"GPL-3.0"} (file->ids "https://www.gnu.org/licenses/gpl-3.0.txt"))) - (is (= #{"LGPL-2.0"} (file->ids "https://www.gnu.org/licenses/lgpl-2.0.txt"))) - (is (= #{"LGPL-2.1"} (file->ids "https://www.gnu.org/licenses/lgpl-2.1.txt"))) - (is (= #{"LGPL-3.0"} (file->ids "https://www.gnu.org/licenses/lgpl-3.0.txt"))) - (is (= #{"AGPL-3.0"} (file->ids "https://www.gnu.org/licenses/agpl-3.0.txt"))) -; (is (= #{"Unlicense"} (file->ids "https://unlicense.org/UNLICENSE"))) ; As of June 2023, unlicense.org no longer resolves - (is (= #{"WTFPL"} (file->ids "http://www.wtfpl.net/txt/copying/")))) + (is (nil? (file->expressions "this_file_does_not_exist")))) + (testing "Handed a directory" + (is (nil? (file->expressions ".")))) + (testing "Files on disk" +; (is (= #{"CC-BY-4.0"} (file->expressions (str test-data-path "/CC-BY-4.0/LICENSE")))) ; Failing due to https://github.com/spdx/license-list-XML/issues/1960 + (is (valid= #{"MPL-2.0"} (file->expressions (str test-data-path "/MPL-2.0/LICENSE"))))) + (testing "URLs" + (is (valid= #{"Apache-2.0"} (file->expressions "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (valid= #{"Apache-2.0"} (file->expressions (io/as-url "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) + (testing "InputStreams" + (is (thrown? clojure.lang.ExceptionInfo (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is)))) + (is (valid= #{"Apache-2.0"} (with-open [is (io/input-stream "https://www.apache.org/licenses/LICENSE-2.0.txt")] (file->expressions is "LICENSE_2.0.txt"))))) (testing "POM files" - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (file->ids (str test-data-path "/no-xml-ns.pom")))) - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/asf-cat-1.0.12.pom")))) - (is (= #{"Apache-2.0"} (file->ids (str test-data-path "/with-parent.pom")))))) - -(deftest dir->ids-tests - (testing "Nil, empty, or blank directory name" - (is (nil? (dir->ids nil))) - (is (thrown? java.io.FileNotFoundException (dir->ids ""))) - (is (thrown? java.io.FileNotFoundException (dir->ids " "))) - (is (thrown? java.io.FileNotFoundException (dir->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (dir->ids "\t")))) - (testing "Non-existent or invalid directory" - (is (thrown? java.io.FileNotFoundException (dir->ids "this_directory_does_not_exist"))) - (is (thrown? java.nio.file.NotDirectoryException (dir->ids "deps.edn")))) - (testing "Valid directory" - (is (= #{"Apache-2.0" "BSD-3-Clause" "MPL-2.0" "CC-BY-4.0"} (dir->ids "."))))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/simple.pom")))) + (is (valid= #{"BSD-3-Clause"} (file->expressions (str test-data-path "/no-xml-ns.pom")))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/asf-cat-1.0.12.pom")))) + (is (valid= #{"Apache-2.0"} (file->expressions (str test-data-path "/with-parent.pom")))))) -(deftest zip->ids-tests +(deftest zip->expressions-tests (testing "Nil, empty, or blank zip file name" - (is (nil? (zip->ids nil))) - (is (thrown? java.io.FileNotFoundException (zip->ids ""))) ; Note the hodgepodge of different thrown exception types here - java.util.zip is a mess! - (is (thrown? java.nio.file.NoSuchFileException (zip->ids " "))) - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "\n"))) - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "\t")))) + (is (nil? (zip->expressions nil))) + (is (nil? (zip->expressions ""))) + (is (nil? (zip->expressions " "))) + (is (nil? (zip->expressions "\n"))) + (is (nil? (zip->expressions "\t")))) (testing "Non-existent zip file" - (is (thrown? java.nio.file.NoSuchFileException (zip->ids "this_zip_file_does_not_exist")))) + (is (nil? (zip->expressions "this_zip_file_does_not_exist")))) + (testing "Handed a directory" + (is (nil? (file->expressions ".")))) (testing "Invalid zip file" - (is (thrown? java.util.zip.ZipException (zip->ids (str test-data-path "/bad.zip"))))) + (is (thrown? java.util.zip.ZipException (zip->expressions (str test-data-path "/bad.zip"))))) (testing "Valid zip file" - (is (= #{"Apache-2.0"} (zip->ids (str test-data-path "/good.zip")))))) + (is (valid= #{"Apache-2.0"} (zip->expressions (str test-data-path "/good.zip")))) + (is (valid= #{"AGPL-3.0-or-later"} (zip->expressions (str test-data-path "/pom-in-a-zip.zip")))))) +(deftest dir->expressions-tests + (testing "Nil, empty, or blank directory name" + (is (nil? (dir->expressions nil))) + (is (nil? (dir->expressions ""))) + (is (nil? (dir->expressions " "))) + (is (nil? (dir->expressions "\n"))) + (is (nil? (dir->expressions "\t")))) + (testing "Non-existent or invalid directory" + (is (nil? (dir->expressions "this_directory_does_not_exist"))) + (is (nil? (dir->expressions "deps.edn")))) + (testing "Valid directory" + (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 + #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0"} + (dir->expressions ".")))) + (testing "Valid directory - include ZIP compressed files" + (is (valid= ;#{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "CC-BY-4.0" "AGPL-3.0-or-later"} ; CC-BY-4.0 failing due to https://github.com/spdx/license-list-XML/issues/1960 + #{"GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Apache-2.0" "Unlicense AND CC0-1.0" "MIT" "MPL-2.0" "AGPL-3.0-or-later"} + (dir->expressions "." {:include-zips? true}))))) diff --git a/test/lice_comb/impl/expressions_info_test.clj b/test/lice_comb/impl/expressions_info_test.clj new file mode 100644 index 0000000..87d01f6 --- /dev/null +++ b/test/lice_comb/impl/expressions_info_test.clj @@ -0,0 +1,88 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.expressions-info-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.expressions-info :refer [prepend-source merge-maps]])) + +(use-fixtures :once fixture) + +(def md1 { + "Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")})}) + +(def md2 { + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})}) + +(def md3 { + "Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})}) + +(def mds (list md1 md2 md3)) + +(deftest prepend-source-tests + (testing "nil/empty/blank" + (is (nil? (prepend-source nil nil))) + (is (= {} (prepend-source nil {}))) + (is (nil? (prepend-source "" nil))) + (is (= {} (prepend-source "" {})))) + (testing "non-nil metadata that isn't lice-comb specific" + (is (= {:a "a"} (prepend-source "foo" {:a "a"})))) + (testing "non-nil metadata that is lice-comb specific" + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "MIT")})} + (prepend-source "pom.xml" md1))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("library.jar" "pom.xml" "Apache Software Licence v2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("library.jar" "pom.xml" "MIT")})} + (prepend-source "library.jar" (prepend-source "pom.xml" md1)))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "Apache style license")} + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("pom.xml" "apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("pom.xml" "Apache-2.0")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("pom.xml" "GNU General Public License 3.0 or later")})} + (prepend-source "pom.xml" md3))))) + +(deftest merge-maps-tests + (testing "nil/empty" + (is (nil? (merge-maps))) + (is (nil? (merge-maps nil)))) + (testing "identity" + (is (= md1 (merge-maps md1)))) + (testing "merges" + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")})} + (merge-maps md1 md2))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (merge-maps md2 md3))) + (is (= {"Apache-2.0" '({:type :concluded :confidence :medium :strategy :regex-matching :source ("Apache Software Licence v2.0")} + {:type :concluded :confidence :low :strategy :regex-matching :source ("Apache style license")} ; Note de-duping + {:type :concluded :confidence :medium :strategy :spdx-listed-identifier-case-insensitive-match :source ("apache-2.0")} + {:type :declared :strategy :spdx-listed-identifier-exact-match :source ("Apache-2.0")}) + "MIT" '({:type :concluded :confidence :high :strategy :spdx-listed-identifier-exact-match :source ("MIT")}) + "BSD-4-Clause" '({:type :concluded :confidence :low :strategy :regex-matching :source ("BSD")}) + "GPL-3.0-or-later" '({:type :concluded :confidence :low :strategy :regex-matching :source ("GNU General Public License 3.0 or later")})} + (apply merge-maps mds))))) diff --git a/test/lice_comb/impl/matching_test.clj b/test/lice_comb/impl/matching_test.clj new file mode 100644 index 0000000..59a7d0a --- /dev/null +++ b/test/lice_comb/impl/matching_test.clj @@ -0,0 +1,69 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.matching :refer [split-on-operators]])) + +(use-fixtures :once fixture) + +(deftest split-on-operators-tests + (testing "nil/empty/blank" + (is (nil? (split-on-operators nil))) + (is (nil? (split-on-operators ""))) + (is (nil? (split-on-operators " ")))) + (testing "Simple non-splits" + (is (= '("foo") (split-on-operators "foo"))) + (is (= '("Apache") (split-on-operators "Apache"))) + (is (= '("Apache MIT BSD") (split-on-operators "Apache MIT BSD"))) + (is (= '("ApacheandMIT") (split-on-operators "ApacheandMIT"))) + (is (= '("Apacheand MIT") (split-on-operators "Apacheand MIT"))) + (is (= '("Apache andMIT") (split-on-operators "Apache andMIT"))) + (is (= '("ApacheorMIT") (split-on-operators "ApacheorMIT"))) + (is (= '("Apacheor MIT") (split-on-operators "Apacheor MIT"))) + (is (= '("Apache orMIT") (split-on-operators "Apache orMIT"))) + (is (= '("ApachewithMIT") (split-on-operators "ApachewithMIT"))) + (is (= '("Apachewith MIT") (split-on-operators "Apachewith MIT"))) + (is (= '("Apache withMIT") (split-on-operators "Apache withMIT"))) + (is (= '("Apachew/MIT") (split-on-operators "Apachew/MIT"))) + (is (= '("Apachew/ MIT") (split-on-operators "Apachew/ MIT")))) + (testing "Simple and splits" + (is (= '("Apache" :and "MIT") (split-on-operators "Apache and MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache AND MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache aNd MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache & MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache &MIT"))) + (is (= '("Apache" :and "MIT") (split-on-operators "Apache&MIT")))) + (testing "Simple or splits" + (is (= '("Apache" :or "MIT") (split-on-operators "Apache or MIT"))) + (is (= '("Apache" :or "MIT") (split-on-operators "Apache OR MIT"))) + (is (= '("Apache" :or "MIT") (split-on-operators "Apache oR MIT")))) + (testing "Simple with splits" + (is (= '("Apache" :with "MIT") (split-on-operators "Apache with MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache WITH MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache wItH MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache w/ MIT"))) + (is (= '("Apache" :with "MIT") (split-on-operators "Apache w/MIT")))) + (testing "Complex non-splits" + (is (= '("COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0") (split-on-operators "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (= '("Copyright & all rights reserved Lean Pixel") (split-on-operators "Copyright & all rights reserved Lean Pixel"))) + (is (= '("GNU General Public License v3.0 or later") (split-on-operators "GNU General Public License v3.0 or later"))) + (is (= '("GNU General Public License, Version 3 (or later)") (split-on-operators "GNU General Public License, Version 3 (or later)"))) + (is (= '("GNU Lesser General Public License, version 2.1 or newer") (split-on-operators "GNU Lesser General Public License, version 2.1 or newer"))) + (is (= '("LGPL-3.0-or-later") (split-on-operators "LGPL-3.0-or-later"))))) diff --git a/test/lice_comb/impl/regex_matching_test.clj b/test/lice_comb/impl/regex_matching_test.clj new file mode 100644 index 0000000..4bffced --- /dev/null +++ b/test/lice_comb/impl/regex_matching_test.clj @@ -0,0 +1,238 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.regex-matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [clojure.set :as set] + [rencg.api :as rencg] + [lice-comb.impl.utils :as lcu] + [lice-comb.test-boilerplate :refer [fixture testing-with-data]] + [lice-comb.impl.regex-matching :refer [init! version-re only-or-later-re agpl-re lgpl-re gpl-re gnu-re matches]])) + +(use-fixtures :once fixture) + +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + +(def agpl-licenses-and-ids { + "AGPL" '("AGPL-3.0-only") + "AGPL v3" '("AGPL-3.0-only") + "AGPLv3" '("AGPL-3.0-only") + "Affero GNU Public License v3" '("AGPL-3.0-only") + "Affero General Public License" '("AGPL-3.0-only") + "Affero General Public License v3 or later (at your option)" '("AGPL-3.0-or-later") + "Affero General Public License version 3 or lator" '("AGPL-3.0-or-later") + "Affero General Public License," '("AGPL-3.0-only") + "GNU AFFERO GENERAL PUBLIC LICENSE Version 3" '("AGPL-3.0-only") + "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3" '("AGPL-3.0-only") + "GNU AGPL-V3 or later" '("AGPL-3.0-or-later") + "GNU AGPLv3" '("AGPL-3.0-only") + "GNU Affero General Public Licence" '("AGPL-3.0-only") + "GNU Affero General Public License (AGPL)" '("AGPL-3.0-only") + "GNU Affero General Public License (AGPL) version 3.0" '("AGPL-3.0-only") + "GNU Affero General Public License 3.0 (AGPL-3.0)" '("AGPL-3.0-only") + "GNU Affero General Public License Version 3" '("AGPL-3.0-only") + "GNU Affero General Public License v3" '("AGPL-3.0-only") + "GNU Affero General Public License v3.0" '("AGPL-3.0-only") + "GNU Affero General Public License v3.0 only" '("AGPL-3.0-only") + "GNU Affero General Public License, version 3" '("AGPL-3.0-only") + }) + +(def lgpl-licenses-and-ids { + "GNU General Lesser Public License (LGPL) version 3.0" '("LGPL-3.0-only") + "GNU LESSER GENERAL PUBLIC LICENSE" '("LGPL-3.0-only") + "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1" '("LGPL-2.1-only") + "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999" '("LGPL-2.1-only") + "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0" '("LGPL-3.0-only") + "GNU LGPL 3" '("LGPL-3.0-only") + "GNU LGPL v2.1" '("LGPL-2.1-only") + "GNU LGPL v3" '("LGPL-3.0-only") + "GNU LGPL version 3" '("LGPL-3.0-only") + "GNU LGPL-3.0" '("LGPL-3.0-only") + "GNU LGPLv3 " '("LGPL-3.0-only") + "GNU Lesser GPL" '("LGPL-3.0-only") + "GNU Lesser General Public Licence" '("LGPL-3.0-only") + "GNU Lesser General Public Licence 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License" '("LGPL-3.0-only") + "GNU Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "GNU Lesser General Public License (LGPL) Version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License - v 3" '("LGPL-3.0-only") + "GNU Lesser General Public License - v 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License - v3" '("LGPL-3.0-only") + "GNU Lesser General Public License 2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License v2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License v3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License version 3.0" '("LGPL-3.0-only") + "GNU Lesser General Public License, Version 2.1" '("LGPL-2.1-only") + "GNU Lesser General Public License, Version 3" '("LGPL-3.0-only") + "GNU Lesser General Public License, Version 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, v. 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, version 2.1 or newer" '("LGPL-2.1-or-later") + "GNU Lesser General Public License, version 3 or later" '("LGPL-3.0-or-later") + "GNU Lesser General Public License, version 3.0 or (at your option) any later version" '("LGPL-3.0-or-later") + "GNU Lesser General Pulic License v2.1" '("LGPL-2.1-only") + "GNU Lesser Genereal Public License" '("LGPL-3.0-only") + "GNU Lesser Public License" '("LGPL-3.0-only") + "GNU Library General Public License" '("LGPL-3.0-only") + "GNU Library or Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "GNU Library or Lesser General Public License (LGPL) 2.1" '("LGPL-2.1-only") + "GNU Library or Lesser General Public License (LGPL) V2.1" '("LGPL-2.1-only") + "Gnu Lesser Public License" '("LGPL-3.0-only") + "L GPL 3" '("LGPL-3.0-only") + "LGPL" '("LGPL-3.0-only") + "LGPL 2.1" '("LGPL-2.1-only") + "LGPL 3.0" '("LGPL-3.0-only") + "LGPL 3.0 (GNU Lesser General Public License)" '("LGPL-3.0-only") + "LGPL License" '("LGPL-3.0-only") + "LGPL Open Source license" '("LGPL-3.0-only") + "LGPL v3" '("LGPL-3.0-only") + "LGPLv2.1" '("LGPL-2.1-only") + "LGPLv3" '("LGPL-3.0-only") + "LGPLv3+" '("LGPL-3.0-or-later") + "Lesser GPL" '("LGPL-3.0-only") + "Lesser General Public License" '("LGPL-3.0-only") + "Lesser General Public License (LGPL)" '("LGPL-3.0-only") + "Licensed under GNU Lesser General Public License Version 3 or later (the " '("LGPL-3.0-or-later") + "lgpl_v2_1" '("LGPL-2.1-only") + }) + +(def gpl-licenses-and-ids { + " GNU GENERAL PUBLIC LICENSE Version 3" '("GPL-3.0-only") + "GNU" '("GPL-3.0-only") + "GNU GENERAL PUBLIC LICENSE" '("GPL-3.0-only") + "GNU GENERAL PUBLIC LICENSE Version 2, June 1991" '("GPL-2.0-only") + "GNU GPL" '("GPL-3.0-only") + "GNU GPL 3" '("GPL-3.0-only") + "GNU GPL V2+" '("GPL-2.0-or-later") + "GNU GPL v 3.0" '("GPL-3.0-only") + "GNU GPL v. 3" '("GPL-3.0-only") + "GNU GPL v3" '("GPL-3.0-only") + "GNU GPL v3+" '("GPL-3.0-or-later") + "GNU GPL v3.0" '("GPL-3.0-only") + "GNU GPL, version 3, 29 June 2007" '("GPL-3.0-only") + "GNU GPLv3+" '("GPL-3.0-or-later") + "GNU General Public License" '("GPL-3.0-only") + "GNU General Public License (GPL)" '("GPL-3.0-only") + "GNU General Public License 2" '("GPL-2.0-only") + "GNU General Public License V3" '("GPL-3.0-only") + "GNU General Public License Version 3" '("GPL-3.0-only") + "GNU General Public License v2.0" '("GPL-2.0-only") + "GNU General Public License v3" '("GPL-3.0-only") + "GNU General Public License v3.0" '("GPL-3.0-only") + "GNU General Public License v3.0 or later" '("GPL-3.0-or-later") + "GNU General Public License, Version 2" '("GPL-2.0-only") + "GNU General Public License, Version 3" '("GPL-3.0-only") + "GNU General Public License, Version 3 (or later)" '("GPL-3.0-or-later") + "GNU General Public License, version 2" '("GPL-2.0-only") + "GNU General Public License, version 2 (GPL2)" '("GPL-2.0-only") + "GNU General Public License, version 3" '("GPL-3.0-only") + "GNU General Public License, version 3 (GPLv3)" '("GPL-3.0-only") + "GNU General Public License,version 2.0 or (at your option) any later version" '("GPL-2.0-or-later") + "GNU Public License" '("GPL-3.0-only") + "GNU Public License V. 3.0" '("GPL-3.0-only") + "GNU Public License V3" '("GPL-3.0-only") + "GNU Public License v2" '("GPL-2.0-only") + "GNU Public License, Version 2" '("GPL-2.0-only") + "GNU Public License, Version 2.0" '("GPL-2.0-only") + "GNU Public License, v2" '("GPL-2.0-only") + "GNU public licence V3.0" '("GPL-3.0-only") + "GNUv3" '("GPL-3.0-only") + "GPL" '("GPL-3.0-only") + "GPL 2.0+" '("GPL-2.0-or-later") + "GPL 3" '("GPL-3.0-only") + "GPL 3.0" '("GPL-3.0-only") + "GPL V3" '("GPL-3.0-only") + "GPL V3+" '("GPL-3.0-or-later") + "GPL v2" '("GPL-2.0-only") + "GPL v2+" '("GPL-2.0-or-later") + "GPL v3" '("GPL-3.0-only") + "GPL version 3" '("GPL-3.0-only") + "GPL-3" '("GPL-3.0-only") + "GPL3" '("GPL-3.0-only") + "GPLv2" '("GPL-2.0-only") + "GPLv3" '("GPL-3.0-only") + "General Public License 3" '("GPL-3.0-only") + "General Public License v3.0" '("GPL-3.0-only") + "The GNU General Public License" '("GPL-3.0-only") + "The GNU General Public License v3.0" '("GPL-3.0-only") + "The GNU General Public License, Version 2 " '("GPL-2.0-only") + }) + +(def cc-by-licenses-and-ids { + "Attribution 3.0 Unported" '("CC-BY-3.0") + "Attribution 4.0 International" '("CC-BY-4.0") + "Attribution-NonCommercial-NoDerivs 3.0 Unported" '("CC-BY-NC-ND-3.0") + "CC Attribution 4.0 International with exception for binary distribution" '("CC-BY-4.0") + "CC BY-NC" '("CC-BY-NC-4.0") + "Creative Commons 3.0" '("CC-BY-3.0") + "Creative Commons Attribution 2.5 License" '("CC-BY-2.5") + "Creative Commons Attribution License" '("CC-BY-4.0") + "Creative Commons Attribution Share Alike 4.0 International" '("CC-BY-SA-4.0") + "Creative Commons Attribution-NonCommercial 3.0" '("CC-BY-NC-3.0") + "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 Unported License" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0 Unported" '("CC-BY-SA-3.0") + "Creative Commons Attribution-ShareAlike 3.0" '("CC-BY-SA-3.0") + "Creative Commons Legal Code Attribution 3.0 Unported" '("CC-BY-3.0") + }) + +(def gnu-licenses-and-ids (merge agpl-licenses-and-ids lgpl-licenses-and-ids gpl-licenses-and-ids)) + +(def agpl-licenses (set (keys agpl-licenses-and-ids))) +(def lgpl-licenses (set (keys lgpl-licenses-and-ids))) +(def gpl-licenses (set (keys gpl-licenses-and-ids))) + +(def gnu-licenses (set/union agpl-licenses lgpl-licenses gpl-licenses)) + +; For testing individual GNU family regex components in isolation +(def agpl-only-re (lcu/re-concat #"(?i)\b" "(" agpl-re ")" version-re only-or-later-re)) +(def lgpl-only-re (lcu/re-concat #"(?i)\b" "(" lgpl-re ")" version-re only-or-later-re)) +(def gpl-only-re (lcu/re-concat #"(?i)\b" "(" gpl-re ")" version-re only-or-later-re)) + +(def not-nil? (complement nil?)) + +; Add input to result to make troubleshooting test failures easier +(defn test-regex + [re s] + (when-let [result (rencg/re-find-ncg re s)] + (assoc result :input s))) + +(deftest gnu-regex-components-tests + (testing "GNU Family Regexes - correct matching and non-matching - AGPL component" + (is (every? not-nil? (map (partial test-regex agpl-only-re) agpl-licenses))) + (is (every? nil? (map (partial test-regex agpl-only-re) lgpl-licenses))) + (is (every? nil? (map (partial test-regex agpl-only-re) gpl-licenses)))) + (testing "GNU Family Regexes - correct matching and non-matching - LGPL component" + (is (every? nil? (map (partial test-regex lgpl-only-re) agpl-licenses))) + (is (every? not-nil? (map (partial test-regex lgpl-only-re) lgpl-licenses))) + (is (every? nil? (map (partial test-regex lgpl-only-re) gpl-licenses)))) + (testing "GNU Family Regexes - correct matching and non-matching - GPL component" + (is (every? nil? (map (partial test-regex gpl-only-re) agpl-licenses))) + (is (every? nil? (map (partial test-regex gpl-only-re) lgpl-licenses))) + (is (every? not-nil? (map (partial test-regex gpl-only-re) gpl-licenses))))) + +(deftest combined-regex-components-tests + (testing "GNU Family Regexes - correct matching - combined GNU family regex" + (is (every? not-nil? (map (partial test-regex gnu-re) gnu-licenses))))) + +(deftest match-regexes-tests + (testing-with-data "GNU Family Regexes - correct identifier results" #(mapcat keys (matches %)) gnu-licenses-and-ids) + (testing-with-data "CC Family Regexes - correct identifier results" #(mapcat keys (matches %)) cc-by-licenses-and-ids)) diff --git a/test/lice_comb/impl/utils_test.clj b/test/lice_comb/impl/utils_test.clj new file mode 100644 index 0000000..29927c2 --- /dev/null +++ b/test/lice_comb/impl/utils_test.clj @@ -0,0 +1,118 @@ +; +; Copyright © 2023 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.impl.utils-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [clojure.java.io :as io] + [lice-comb.test-boilerplate :refer [fixture]] + [lice-comb.impl.utils :refer [simplify-uri filepath filename]])) + +(use-fixtures :once fixture) + +(def simplified-apache2-uri "http://apache.org/licenses/license-2.0") + +(deftest simplify-uri-tests + (testing "Nil, empty or blank values" + (is (nil? (simplify-uri nil))) + (is (nil? (simplify-uri ""))) + (is (nil? (simplify-uri " "))) + (is (nil? (simplify-uri "\n"))) + (is (nil? (simplify-uri "\t")))) + (testing "Values that are not uris" + (is (= "foo" (simplify-uri "FOO"))) + (is (= "foo" (simplify-uri "foo"))) + (is (= "foobar" (simplify-uri " FoObAr ")))) + (testing "Values that are non-http(s) uris" + (is (= "ftp://user@host/foo/bar.txt" (simplify-uri "ftp://user@host/foo/bar.txt"))) + (is (= "ftp://user@host/foo/bar.txt" (simplify-uri "FTP://USER@HOST/FOO/BAR.TXT"))) + (is (= "mailto:someone@example.com?subject=this%20is%20the%20subject&cc=someone_else@example.com&body=this%20is%20the%20body" + (simplify-uri "mailto:someone@example.com?subject=This%20is%20the%20subject&cc=someone_else@example.com&body=This%20is%20the%20body")))) + (testing "Valid uris that don't get simplified" + (is (= simplified-apache2-uri (simplify-uri simplified-apache2-uri))) + (is (= "http://creativecommons.org/licenses/by-sa/4.0/legalcode" (simplify-uri "http://creativecommons.org/licenses/by-sa/4.0/legalcode")))) + (testing "Valid uris that get simplified" + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0"))) + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= simplified-apache2-uri (simplify-uri "https://www.apache.org/licenses/license-2.0.txt"))) + (is (= simplified-apache2-uri (simplify-uri "http://apache.org/licenses/LICENSE-2.0.pdf"))) + (is (= simplified-apache2-uri (simplify-uri " http://www.apache.org/licenses/LICENSE-2.0.html "))) + (is (= "http://gnu.org/licenses/agpl" (simplify-uri "https://www.gnu.org/licenses/agpl.txt"))) + (is (= "http://gnu.org/software/classpath/license" (simplify-uri "https://www.gnu.org/software/classpath/license.html"))) + (is (= "http://raw.githubusercontent.com/pmonks/lice-comb/main/license" (simplify-uri "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= "http://github.com/pmonks/lice-comb/blob/main/license" (simplify-uri "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))))) + +(deftest filepath-tests + (testing "Nil, empty or blank values" + (is (nil? (filepath nil))) + (is (= "" (filepath ""))) + (is (= "" (filepath " "))) + (is (= "" (filepath "\n"))) + (is (= "" (filepath "\t")))) + (testing "Files" + (is (= "/file.txt" (filepath (io/file "/file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filepath "file.txt"))) + (is (= "/some/path/or/other/file.txt" (filepath "/some/path/or/other/file.txt"))) + (is (= "https://www.google.com/" (filepath "https://www.google.com/"))) + (is (= "https://www.google.com/" (filepath " https://www.google.com/ "))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filepath (java.util.zip.ZipEntry. "file.txt")))) + (is (= "/some/path/or/other/file.txt" (filepath (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "https://www.google.com/" (filepath (io/as-url "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "https://www.google.com/" (filepath (java.net.URI. "https://www.google.com/")))) + (is (= "https://github.com/pmonks/lice-comb/blob/main/deps.edn" (filepath (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filepath (io/input-stream "deps.edn")))))) + +(deftest filename-tests + (testing "Nil, empty or blank values" + (is (nil? (filename nil))) + (is (= "" (filename ""))) + (is (= "" (filename " "))) + (is (= "" (filename "\n"))) + (is (= "" (filename "\t")))) + (testing "Files" + (is (= "file.txt" (filename (io/file "file.txt")))) + (is (= "file.txt" (filename (io/file "/some/path/or/other/file.txt"))))) + (testing "Strings" + (is (= "file.txt" (filename "file.txt"))) + (is (= "file.txt" (filename "/some/path/or/other/file.txt"))) + (is (= "" (filename "https://www.google.com"))) + (is (= "" (filename "https://www.google.com/"))) + (is (= "deps.edn" (filename "https://github.com/pmonks/lice-comb/blob/main/deps.edn")))) + (testing "ZipEntries" + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "file.txt")))) + (is (= "file.txt" (filename (java.util.zip.ZipEntry. "/some/path/or/other/file.txt"))))) + (testing "URLs" + (is (= "" (filename (io/as-url "https://www.google.com/")))) + (is (= "deps.edn" (filename (io/as-url "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "URIs" + (is (= "" (filename (java.net.URI. "https://www.google.com/")))) + (is (= "deps.edn" (filename (java.net.URI. "https://github.com/pmonks/lice-comb/blob/main/deps.edn"))))) + (testing "InputStream" + (is (thrown? clojure.lang.ExceptionInfo (filename (io/input-stream "deps.edn")))))) + diff --git a/test/lice_comb/lein_test.clj b/test/lice_comb/lein_test.clj new file mode 100644 index 0000000..86fb625 --- /dev/null +++ b/test/lice_comb/lein_test.clj @@ -0,0 +1,70 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.lein-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.lein :refer [dep->expressions deps->expressions]])) + +(use-fixtures :once fixture) + +; We keep these short, as this is basically just a thin wrapper around lice-comb.deps +(deftest dep->ids-tests + (testing "Nil deps" + (is (nil? (dep->expressions nil)))) + (testing "Invalid deps" + (is (nil? (dep->expressions ['com.github.pmonks/invalid-project "0.0.1"]))) ; Invalid GA + (is (nil? (dep->expressions ['org.clojure/clojure "1.0.0-SNAPSHOT"])))) ; Invalid V + (testing "Valid deps - single license" + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.pmonks/asf-cat "1.0.12"]))) + (is (= #{"EPL-1.0"} (dep->expressions ['org.clojure/clojure "1.10.3"]))) + (is (= #{"BSD-4-Clause"} (dep->expressions ['org.ow2.asm/asm "5.2"]))) + (is (= #{(lcis/public-domain)} (dep->expressions ['aopalliance/aopalliance "1.0"]))) + (is (= #{"CDDL-1.0"} (dep->expressions ['javax.activation/activation "1.1.1"]))) + (is (= #{"CC0-1.0"} (dep->expressions ['net.i2p.crypto/eddsa "0.3.0"]))) + (is (= #{"GPL-3.0-only"} (dep->expressions ['org.activecomponents.jadex/jadex-distribution-minimal "4.0.250"]))) + (is (= #{"Apache-2.0"} (dep->expressions ['software.amazon.ion/ion-java "1.0.0"])))) + (testing "Valid deps - no licenses in deployed artifacts -> leverage fallbacks" + (is (nil? (dep->expressions ['slipset/deps-deploy "0.2.0"]))) + (is (nil? (dep->expressions ['borkdude/sci.impl.reflector "0.0.1"])))) + (testing "Valid deps - multi license" + (is (= #{"EPL-1.0" "LGPL-3.0-only"} (dep->expressions ['ch.qos.logback/logback-classic "1.2.7"]))) ; Note: implies LGPL-2.1-only, but name is ambiguous + (is (= #{"CDDL-1.1" "GPL-2.0-only WITH Classpath-exception-2.0"} + (dep->expressions ['javax.mail/mail "1.4.7"])))) + (testing "Valid deps - Maven classifiers" +; (is (= #{"Apache-2.0" "LGPL-3.0-or-later"} (dep->expressions ['com.github.jnr/jffi$native "1.3.11}]))))) ; Blocked on https://github.com/jnr/jffi/issues/141 + (is (= #{"Apache-2.0"} (dep->expressions ['com.github.jnr/jffi$native "1.3.11"]))))) + +; Note: we can't use valid= or valid-info= here, since the results from deps->expressions are unique +(deftest deps-expressions-test + (testing "Nil and empty deps" + (is (nil? (deps->expressions nil))) + (is (= {} (deps->expressions [])))) + (testing "Single deps" + (is (= {['org.clojure/clojure "1.10.3"] #{"EPL-1.0"}} + (deps->expressions [['org.clojure/clojure "1.10.3"]])))) + (testing "Multiple deps" + (is (= {'[org.clojure/clojure "1.10.3"] #{"EPL-1.0"} + '[org.clojure/spec.alpha "0.2.194"] #{"EPL-1.0"} + '[cheshire/cheshire "5.10.1"] #{"MIT"} + '[com.fasterxml.jackson.core/jackson-core "2.12.4"] #{"Apache-2.0"}} + (deps->expressions [['org.clojure/clojure "1.10.3"] + ['org.clojure/spec.alpha "0.2.194"] + ['cheshire/cheshire "5.10.1"] + ['com.fasterxml.jackson.core/jackson-core "2.12.4"]]))))) diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj new file mode 100644 index 0000000..0945dea --- /dev/null +++ b/test/lice_comb/matching_test.clj @@ -0,0 +1,774 @@ +; +; Copyright © 2021 Peter Monks +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; SPDX-License-Identifier: Apache-2.0 +; + +(ns lice-comb.matching-test + (:require [clojure.test :refer [deftest testing is use-fixtures]] + [lice-comb.test-boilerplate :refer [fixture valid= valid-info=]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.matching :refer [init! unlisted? proprietary-commercial? text->ids name->expressions name->expressions-info uri->ids]] + [spdx.licenses :as sl] + [spdx.exceptions :as se])) + +(use-fixtures :once fixture) + +(defn unlisted-only? + "Does the given set of ids contain only a single unlisted license?" + [ids] + (and (= 1 (count ids)) + (unlisted? (first ids)))) + +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + +(deftest unlisted?-tests + (testing "Nil, empty or blank ids" + (is (nil? (unlisted? nil))) + (is (false? (unlisted? ""))) + (is (false? (unlisted? " "))) + (is (false? (unlisted? "\n"))) + (is (false? (unlisted? "\t")))) + (testing "Unlisted ids" + (is (true? (unlisted? (lcis/name->unlisted "foo"))))) + (testing "Listed ids" + (is (true? (every? false? (map unlisted? (sl/ids))))) + (is (true? (every? false? (map unlisted? (se/ids))))))) + +(deftest name->expressions-tests + (testing "Nil, empty or blank" + (is (nil? (name->expressions nil))) + (is (nil? (name->expressions ""))) + (is (nil? (name->expressions " "))) + (is (nil? (name->expressions "\n"))) + (is (nil? (name->expressions "\t")))) + (testing "SPDX license ids" + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL-3.0-only"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache-2.0 "))) ; Test whitespace + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "CC-BY-SA-4.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-with-classpath-exception")))) + (testing "Public domain and proprietary/commercial" + (is (valid= #{(lcis/public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(lcis/public-domain)} (name->expressions "Public domain"))) ; Test lower case + (is (valid= #{(lcis/public-domain)} (name->expressions " Public domain "))) ; Test whitespace + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Commercial"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved")))) + (testing "SPDX expressions" + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0 WITH Classpath-exception-2.0"))) + (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache-2.0 OR GPL-3.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)")))) + (testing "Single expressions that are not valid SPDX" + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid= #{"Apache-2.0 OR GPL-3.0-only"} (name->expressions "Apache License version 2.0 or GNU General Public License version 3"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "EPL-2.0 OR (GPL-2.0+ WITH Classpath-exception-2.0) OR MIT OR (BSD-3-Clause AND Apache-2.0)"))) + (is (valid= #{"Apache-2.0 AND MIT"} (name->expressions "Apache & MIT licence"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution Licence")))) + (testing "Expressions with weird operators" + (is (valid= #{"Apache-2.0"} (name->expressions "and and and Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0 or or or"))) + (is (valid= #{"Apache-2.0 OR MIT"} (name->expressions "Apache License 2.0 or or or or or or or or MIT license"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 or and or and or and or and MIT license"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "or and Apache Licence 2.0 or and or and or and or and MIT and or and"))) + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache License 2.0 and/or MIT licence")))) + (testing "Multiple expressions" + (is (valid= #{"MIT" "BSD-4-Clause"} (name->expressions "MIT / BSD"))) + (is (valid= #{"Apache-2.0" "GPL-3.0-only"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3"))) + (is (valid= #{"Apache-2.0" "GPL-3.0-only WITH Classpath-exception-2.0"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3 with classpath exception"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR BSD-3-Clause AND Apache-2.0"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception or MIT Licence or three clause bsd and Apache Licence")))) + (testing "Messed up license expressions" + (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache with MIT")))) + (testing "Names seen in handpicked POMs on Maven Central" + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL) version 3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0 only"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License 1"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License Version 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache License, Version 1.0"))) + (is (valid= #{"Apache-1.0"} (name->expressions "Apache Software License - Version 1.0"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache License, Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "Apache Software License - Version 1.1"))) + (is (valid= #{"Apache-1.1"} (name->expressions "The MX4J License, version 1.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache Software License, Version 2.0 "))) ; Test whitespace + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License - Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License (BSD3)"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD License 3"))) + (is (valid= #{"BSD-3-Clause-Attribution"} (name->expressions "BSD 3-Clause Attribution"))) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Attribution 3.0 Unported"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons Legal Code Attribution 3.0 Unported"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "Attribution 4.0 International"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "Creative Commons Attribution Share Alike 4.0 International"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) + (is (valid= #{"CDDL-1.0"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License, Version 1.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License v2.0 w/Classpath exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 (GPL2), with the classpath exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2+CE"))) ; From JavaMail + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (valid= #{"JSON"} (name->expressions "JSON License"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Library General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MIT"} (name->expressions "Bouncy Castle Licence"))) ; Note spelling of "licence" + (is (valid= #{"MIT"} (name->expressions "MIT License"))) + (is (valid= #{"MIT"} (name->expressions "MIT license"))) ; Test capitalisation + (is (valid= #{"MIT"} (name->expressions "The MIT License"))) + (is (valid= #{"MPL-1.0"} (name->expressions "Mozilla Public License 1"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Plexus"} (name->expressions "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (testing "All names seen in POMs on Clojars as of 2023-07-13" + (is (valid= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0"))) + (is (valid= #{"AGPL-3.0-only" (lcis/proprietary-commercial)} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available."))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL v3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "AGPLv3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero GNU Public License v3"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero General Public License"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "Affero General Public License,"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AFFERO GENERAL PUBLIC LICENSE, Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU AGPLv3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License (AGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License 3.0 (AGPL-3.0)"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License v3.0"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, Version 3"))) + (is (valid= #{"AGPL-3.0-only"} (name->expressions "GNU Affero General Public License, version 3"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License v3 or later (at your option)"))) + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "Affero General Public License version 3 or lator"))) ; Typo in "lator" + (is (valid= #{"AGPL-3.0-or-later"} (name->expressions "GNU AGPL-V3 or later"))) + (is (valid= #{"Apache-2.0 WITH LLVM-exception"} (name->expressions "Apache 2.0 with LLVM Exception"))) + (is (valid= #{"Apache-2.0"} (name->expressions " Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0 (CURRENT)"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE LICENSE, VERSION 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "APACHE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "ASL 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "ASL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2 Public License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2, see LICENSE"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Licence, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License - v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License V2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License V2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License Version 2.0, January 2004"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, Version 2.0."))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, version 2."))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache License, version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Public License, version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License - v 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Software Licesne"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Sofware Licencse 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Sofware License 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache V2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache V2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache license version 2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache license, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache v2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache"))) ; Listed license missing clause info + (is (valid= #{"Apache-2.0"} (name->expressions "Apache, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache-2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "Apache2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache 2 License"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "The Apache Software License, Version 2.0"))) + (is (valid= #{"Apache-2.0"} (name->expressions "apache"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Apache-2.0"} (name->expressions "apache-2.0"))) + (is (valid= #{"Artistic-2.0" "GPL-3.0-only"} (name->expressions "Artistic License/GPL"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"Artistic-2.0"} (name->expressions "Artistic License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"Artistic-2.0"} (name->expressions "Artistic-2.0"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "2-Clause BSD"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (2 Clause)"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (2-Clause)"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD (Type 2) Public License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2 Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2 clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause Licence"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause \"Simplified\" License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD 2-clause \"Simplified\" License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD C2"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "BSD-2-Clause"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "New BSD 2-clause license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Simplified BSD License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Simplified BSD license"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "The BSD 2-Clause License"))) + (is (valid= #{"BSD-2-Clause"} (name->expressions "Two clause BSD license"))) + (is (valid= #{"BSD-2-Clause-FreeBSD"} (name->expressions "FreeBSD License"))) + (is (valid= #{"BSD-3-Clause" "MIT"} (name->expressions "New-BSD / MIT"))) ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-Clause BSD"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause BSD licence (Revised BSD licence), also included in the jar file"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "3-clause license (New BSD License or Modified BSD License)"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Aduna BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3 Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause 'New' or 'Revised' License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause \"New\" or \"Revised\" License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD 3-clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD New, Version 3.0"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD-3"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "BSD-3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Modified BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD License or Modified BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "New BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Revised BSD"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The 3-Clause BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The BSD 3-Clause License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The New BSD License"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "The New BSD license"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "Three Clause BSD-like License"))) +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/clafka/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/faraday-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/graphite-filter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/instrumented-ring-jetty-adapter/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-clojure/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/mr-edda/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/multi-atom/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/party/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/mixradio/radix/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/datagrep/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/riverford/durable-ref/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 +; (is (valid= #{"BSD-3-Clause"} (name->expressions "https://github.com/smsharman/sxm-clojure-ms/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (valid= #{"BSD-3-Clause"} (name->expressions "https://opensource.org/licenses/BSD-3-Clause"))) + (is (valid= #{"BSD-3-Clause"} (name->expressions "new BSD License"))) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD Standard License"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD license"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "BSD-style"))) ; Listed license missing clause info - we assume original (4 clause) + (is (valid= #{"BSD-4-Clause"} (name->expressions "The BSD License"))) + (is (valid= #{"BSL-1.0"} (name->expressions "Boost Software License - Version 1.0"))) + (is (valid= #{"Beerware"} (name->expressions "Beerware 42"))) + (is (valid= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE"))) + (is (valid= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License"))) + (is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0"))) + (is (valid= #{"CC-BY-4.0" (lcis/name->unlisted "exception for binary distribution")} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) ; The exception in this case doesn't map to any listed SPDX identifier (including CC-BY variants) + (is (valid= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0"))) + (is (valid= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0"))) + (is (valid= #{"CC-BY-NC-4.0"} (name->expressions "CC BY-NC"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CC-BY-NC-ND-3.0"} (name->expressions "Attribution-NonCommercial-NoDerivs 3.0 Unported"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA) license"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 US (CC-SA)"))) ; Note: the US suffix here is meaningless, as there is no CC-BY-SA-3.0-US license id + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported License"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0 Unported"))) + (is (valid= #{"CC-BY-SA-3.0"} (name->expressions "Creative Commons Attribution-ShareAlike 3.0"))) + (is (valid= #{"CC-BY-SA-4.0"} (name->expressions "CC BY-SA 4.0"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0 1.0 Universal"))) + (is (valid= #{"CC0-1.0"} (name->expressions "CC0"))) + (is (valid= #{"CC0-1.0"} (name->expressions "Public domain (CC0)"))) + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License (CDDL)"))) ; Listed license missing clause info + (is (valid= #{"CDDL-1.1"} (name->expressions "Common Development and Distribution License"))) ; Listed license missing clause info + (is (valid= #{"CECILL-2.1"} (name->expressions "CeCILL License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"CPL-1.0"} (name->expressions "Common Public License - v 1.0"))) + (is (valid= #{"CPL-1.0"} (name->expressions "Common Public License Version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL-1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "EPL-v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License (EPL) - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - Version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License - v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0 (EPL-1.0)"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License v1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public License, version 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "Eclipse Public Licese - v 1.0"))) + (is (valid= #{"EPL-1.0"} (name->expressions "https://github.com/cmiles74/uio/blob/master/LICENSE"))) + (is (valid= #{"EPL-2.0 AND LGPL-3.0-only"} (name->expressions "Dual: EPL and LGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR Apache-2.0"} (name->expressions "Double licensed under the Eclipse Public License (the same as Clojure) or the Apache Public License 2.0."))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath Exception"))) ; Listed exception missing version - we assume the latest + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "Eclipse Public License 2.0 OR GNU GPL v2+ with Classpath exception"))) + (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later"} (name->expressions "EPL-2.0 OR GPL-2.0-or-later"))) + (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"EPL-2.0 OR GPL-3.0-or-later"} (name->expressions "EPL-2.0 OR GPL-3.0-or-later"))) + (is (valid= #{"EPL-2.0" "MIT"} (name->expressions "Eclipse Public MIT"))) ; Listed license missing version - we assume the latest ; Missing conjunction, so return 2 (singleton) expressions + (is (valid= #{"EPL-2.0"} (name->expressions "Copyright (C) 2013 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (valid= #{"EPL-2.0"} (name->expressions "Copyright (C) 2014 Mathieu Gauthron. Distributed under the Eclipse Public License."))) + (is (valid= #{"EPL-2.0"} (name->expressions "Distributed under the Eclipse Public License, the same as Clojure."))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "ECLIPSE PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "EPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "EPL-2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "EPLv2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License (EPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License - v 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License 2.0,"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License v2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License version 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v. 2.0"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Public License, v2"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse Pulic License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse public license, the same as Clojure"))) + (is (valid= #{"EPL-2.0"} (name->expressions "Eclipse"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EPL-2.0"} (name->expressions "Some Eclipse Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"EUPL-1.1"} (name->expressions "European Union Public Licence (EUPL v.1.1)"))) + (is (valid= #{"EUPL-1.1"} (name->expressions "The European Union Public License, Version 1.1"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public Licence v. 1.2"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public License 1.2 or later"))) + (is (valid= #{"EUPL-1.2"} (name->expressions "European Union Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GNU General Public License, Version 2, with the Classpath Exception"))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPLv2 with Classpath exception"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE Version 2, June 1991"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU General Public License, version 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, Version 2.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GNU Public License, v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL v2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPL-2.0"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "GPLv2"))) + (is (valid= #{"GPL-2.0-only"} (name->expressions "The GNU General Public License, Version 2"))) + (is (valid= #{"GPL-2.0-or-later WITH Classpath-exception-2.0"} (name->expressions "GPL-2.0-or-later WITH Classpath-exception-2.0"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License,version 2.0 or (at your option) any later version"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU GPL V2+"))) + (is (valid= #{"GPL-2.0-or-later"} (name->expressions "GPL 2.0+"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions " GNU GENERAL PUBLIC LICENSE Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v. 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU GPL, version 3, 29 June 2007"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License (GPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, Version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3 (GPLv3)"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU General Public License, version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License V. 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU public licence V3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNU"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GNUv3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL 3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL V3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL v3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL version 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL-3.0-only"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPL3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "GPLv3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "General Public License 3"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License v3.0"))) + (is (valid= #{"GPL-3.0-only"} (name->expressions "The GNU General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU GPL v3+"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU GPLv3+"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License v3.0 or later"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GNU General Public License, Version 3 (or later)"))) + (is (valid= #{"GPL-3.0-or-later"} (name->expressions "GPL V3+"))) + (is (valid= #{"Hippocratic-2.1"} (name->expressions "Hippocratic License"))) + (is (valid= #{"ISC WITH Classpath-exception-2.0"} (name->expressions "ISC WITH Classpath-exception-2.0"))) + (is (valid= #{"ISC"} (name->expressions "ISC Licence"))) + (is (valid= #{"ISC"} (name->expressions "ISC License"))) + (is (valid= #{"ISC"} (name->expressions "ISC"))) + (is (valid= #{"ISC"} (name->expressions "MIT/ISC License"))) + (is (valid= #{"ISC"} (name->expressions "MIT/ISC"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU LGPL v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Public License, Version 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Lesser General Pulic License v2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL) V2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL 2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPL-2.1-only"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "LGPLv2.1"))) + (is (valid= #{"LGPL-2.1-only"} (name->expressions "lgpl_v2_1"))) + (is (valid= #{"LGPL-2.1-or-later"} (name->expressions "GNU Lesser General Public License, version 2.1 or newer"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU General Lesser Public License (LGPL) version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LESSER GENERAL PUBLIC LICENSE, Version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPL-3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU LGPLv3 "))) ; Note trailing space + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public Licence"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL) Version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License - v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License v3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License version 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser General Public License, Version 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Genereal Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "GNU Library or Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Gnu Lesser Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "L GPL 3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0 (GNU Lesser General Public License)"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL 3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL Open Source license"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL v3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPL-3.0-only"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "LGPLv3"))) + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser GPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License (LGPL)"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-only"} (name->expressions "Lesser General Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, Version 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, v. 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3 or later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "GNU Lesser General Public License, version 3.0 or (at your option) any later version"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "LGPL-3.0-or-later"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "LGPLv3+"))) + (is (valid= #{"LGPL-3.0-or-later"} (name->expressions "Licensed under GNU Lesser General Public License Version 3 or later (the "))) ; Note trailing space + (is (valid= #{"Libpng"} (name->expressions "zlib/libpng License"))) + (is (valid= #{"MIT" "Apache-2.0" "BSD-3-Clause"} (name->expressions "MIT/Apache-2.0/BSD-3-Clause"))) + (is (valid= #{"MIT"} (name->expressions " MIT License"))) + (is (valid= #{"MIT"} (name->expressions "Distributed under an MIT-style license (see LICENSE for details)."))) + (is (valid= #{"MIT"} (name->expressions "Expat (MIT) license"))) + (is (valid= #{"MIT"} (name->expressions "MIT LICENSE"))) + (is (valid= #{"MIT"} (name->expressions "MIT Licence"))) + (is (valid= #{"MIT"} (name->expressions "MIT Licens"))) + (is (valid= #{"MIT"} (name->expressions "MIT License (MIT)"))) + (is (valid= #{"MIT"} (name->expressions "MIT License"))) + (is (valid= #{"MIT"} (name->expressions "MIT Public License"))) + (is (valid= #{"MIT"} (name->expressions "MIT license"))) + (is (valid= #{"MIT"} (name->expressions "MIT public License"))) + (is (valid= #{"MIT"} (name->expressions "MIT public license"))) + (is (valid= #{"MIT"} (name->expressions "MIT"))) + (is (valid= #{"MIT"} (name->expressions "MIT-style license (see LICENSE for details)."))) + (is (valid= #{"MIT"} (name->expressions "THE MIT LICENSE"))) + (is (valid= #{"MIT"} (name->expressions "The MIT Licence"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT) "))) ; Note trailing space + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT) | Open Source Initiative"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License (MIT)"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License"))) + (is (valid= #{"MIT"} (name->expressions "The MIT License."))) + (is (valid= #{"MIT"} (name->expressions "http://opensource.org/licenses/MIT"))) +; (is (valid= #{"MIT"} (name->expressions "https://github.com/clanhr/clanhr-service/blob/master/LICENSE"))) ; Failing due to https://github.com/spdx/Spdx-Java-Library/issues/182 + (is (valid= #{"MPL-1.0"} (name->expressions "Mozilla Public License Version 1.0"))) + (is (valid= #{"MPL-1.1"} (name->expressions "Mozilla Public License Version 1.1"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL 2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL v2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MPL-2.0"} (name->expressions "MPL-2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL-v2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "MPL2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public Licence 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License (Version 2.0)"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License Version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License v2.0+"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License version 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License"))) ; Listed license missing version - we assume the latest + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License, v. 2.0"))) + (is (valid= #{"MPL-2.0"} (name->expressions "Mozilla Public License, version 2.0"))) + (is (valid= #{"NASA-1.3"} (name->expressions "NASA OPEN SOURCE AGREEMENT VERSION 1.3"))) + (is (valid= #{"NASA-1.3"} (name->expressions "NASA Open Source Agreement, Version 1.3"))) + (is (valid= #{"NCSA"} (name->expressions "University of Illinois/NCSA Open Source License"))) + (is (valid= #{"Ruby"} (name->expressions "Ruby License"))) + (is (valid= #{"SGI-B-2.0"} (name->expressions "SGI"))) ; Listed license missing version - we assume the latest + (is (valid= #{"SMPPL"} (name->expressions "SMPPL"))) + (is (valid= #{"Unlicense"} (name->expressions "The UnLicense"))) + (is (valid= #{"Unlicense"} (name->expressions "The Unlicence"))) + (is (valid= #{"Unlicense"} (name->expressions "The Unlicense"))) + (is (valid= #{"Unlicense"} (name->expressions "UnLicense"))) + (is (valid= #{"Unlicense"} (name->expressions "Unlicense License"))) + (is (valid= #{"Unlicense"} (name->expressions "Unlicense"))) + (is (valid= #{"Unlicense"} (name->expressions "unlicense"))) + (is (valid= #{"W3C"} (name->expressions "W3C Software license"))) + (is (valid= #{"WTFPL"} (name->expressions "DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE"))) + (is (valid= #{"WTFPL"} (name->expressions "DO-WTF-U-WANT-2"))) + (is (valid= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License"))) + (is (valid= #{"WTFPL"} (name->expressions "Do What The Fuck You Want To Public License, Version 2"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL v2"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL – Do What the Fuck You Want to Public License"))) + (is (valid= #{"WTFPL"} (name->expressions "WTFPL"))) + (is (valid= #{"X11"} (name->expressions "MIT X11 License"))) + (is (valid= #{"X11"} (name->expressions "MIT/X11"))) + (is (valid= #{"Zlib"} (name->expressions "Zlib License"))) + (is (valid= #{"Zlib"} (name->expressions "zlib License"))) + (is (valid= #{"Zlib"} (name->expressions "zlib license"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All Rights Reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "All rights reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright & all rights reserved Lean Pixel"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright 2013 The Fresh Diet. All rights reserved."))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Copyright 2017 All Rights Reserved"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Not fit for public use so formally proprietary software - this is not open-source"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Private License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Private"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietary"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Proprietory. Copyright Jayaraj Poroor. All Rights Reserved."))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Tulos Commercial License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "Wildbit Proprietary License"))) + (is (valid= #{(lcis/proprietary-commercial)} (name->expressions "proprietary"))) + (is (valid= #{(lcis/public-domain)} (name->expressions "Public Domain"))) + (is (valid= #{(str "GPL-2.0-or-later OR " (lcis/name->unlisted "Swiss Ephemeris"))} (name->expressions "GPL v2+ or Swiss Ephemeris"))) + (is (valid= #{(str "MIT AND " (lcis/proprietary-commercial))} (name->expressions "Dual MIT & Proprietary"))) + (is (unlisted-only? (name->expressions "${license.id}"))) + (is (unlisted-only? (name->expressions "A Clojure library for Google Cloud Pub/Sub."))) + (is (unlisted-only? (name->expressions "APGL"))) ; Probable typo + (is (unlisted-only? (name->expressions "Amazon Software License"))) + (is (unlisted-only? (name->expressions "BankersBox License"))) + (is (unlisted-only? (name->expressions "Bespoke"))) + (is (unlisted-only? (name->expressions "Bloomberg Open API"))) + (is (unlisted-only? (name->expressions "Bostock"))) + (is (unlisted-only? (name->expressions "Built In Project License"))) + (is (unlisted-only? (name->expressions "CRAPL License"))) + (is (unlisted-only? (name->expressions "Contact JMonkeyEngine forums for license details"))) + (is (unlisted-only? (name->expressions "Copyright (C) 2015 by Glowbox LLC"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2011 Drew Colthorp"))) + (is (unlisted-only? (name->expressions "Copyright (c) 2017, Lingchao Xin"))) + (is (unlisted-only? (name->expressions "Copyright 2016, klaraHealth, Inc."))) + (is (unlisted-only? (name->expressions "Copyright 2017 Zensight"))) + (is (unlisted-only? (name->expressions "Copyright 4A Volcano. 2015."))) + (is (unlisted-only? (name->expressions "Copyright Ona Systems Inc."))) + (is (unlisted-only? (name->expressions "Copyright meissa GmbH"))) + (is (unlisted-only? (name->expressions "Copyright © SparX 2014"))) + (is (unlisted-only? (name->expressions "Copyright"))) + (is (unlisted-only? (name->expressions "Custom"))) + (is (unlisted-only? (name->expressions "Cydeas Public License"))) + (is (unlisted-only? (name->expressions "Don't steal my stuff"))) + (is (unlisted-only? (name->expressions "Dropbox ToS"))) + (is (unlisted-only? (name->expressions "FIXME: choose"))) + (is (unlisted-only? (name->expressions "Firebase ToS"))) + (is (unlisted-only? (name->expressions "GG Public License"))) + (is (unlisted-only? (name->expressions "Google Maps ToS"))) + (is (unlisted-only? (name->expressions "GraphiQL license"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation Ltd"))) + (is (unlisted-only? (name->expressions "Hackthorn Innovation copyright"))) + (is (unlisted-only? (name->expressions "Heap ToS"))) + (is (unlisted-only? (name->expressions "Interel"))) + (is (unlisted-only? (name->expressions "JLGL Backend"))) + (is (unlisted-only? (name->expressions "Jedis License"))) + (is (unlisted-only? (name->expressions "Jiegao Owned"))) + (is (unlisted-only? (name->expressions "LICENSE"))) + (is (unlisted-only? (name->expressions "Libre Uso MX"))) + (is (unlisted-only? (name->expressions "License of respective package"))) + (is (unlisted-only? (name->expressions "License"))) + (is (unlisted-only? (name->expressions "Like Clojure."))) + (is (unlisted-only? (name->expressions "Mixed"))) + (is (unlisted-only? (name->expressions "Multiple"))) + (is (unlisted-only? (name->expressions "OTN License Agreement"))) + (is (unlisted-only? (name->expressions "Open Source Community License - Type C version 1.0"))) + (is (unlisted-only? (name->expressions "Other License"))) + (is (unlisted-only? (name->expressions "Provisdom"))) + (is (unlisted-only? (name->expressions "Research License 1.0"))) + (is (unlisted-only? (name->expressions "Restricted Distribution."))) + (is (unlisted-only? (name->expressions "SYNNEX China Owned"))) + (is (unlisted-only? (name->expressions "See the LICENSE file"))) + (is (unlisted-only? (name->expressions "Shen License"))) + (is (unlisted-only? (name->expressions "Slick2D License"))) + (is (unlisted-only? (name->expressions "Stripe ToS"))) + (is (unlisted-only? (name->expressions "TODO"))) + (is (unlisted-only? (name->expressions "TODO: Choose a license"))) + (is (unlisted-only? (name->expressions "The I Haven't Got Around To This Yet License"))) + (is (unlisted-only? (name->expressions "To ill!"))) + (is (unlisted-only? (name->expressions "UNLICENSED"))) + (is (unlisted-only? (name->expressions "University of Buffalo Public License"))) + (is (unlisted-only? (name->expressions "Unknown"))) + (is (unlisted-only? (name->expressions "VNETLPL - Limited Public License"))) + (is (unlisted-only? (name->expressions "VNet PL"))) + (is (unlisted-only? (name->expressions "Various"))) + (is (unlisted-only? (name->expressions "Vimeo License"))) + (is (unlisted-only? (name->expressions "WIP"))) + (is (unlisted-only? (name->expressions "YouTube ToS"))) + (is (unlisted-only? (name->expressions "avi license"))) + (is (unlisted-only? (name->expressions "esl-sdk-external-signer-verification"))) + (is (unlisted-only? (name->expressions "https://github.com/jaycfields/jry/blob/master/README.md#license"))) ; We don't support full text matching in Markdown yet + (is (unlisted-only? (name->expressions "jank license"))) + (is (unlisted-only? (name->expressions "name"))) + (is (unlisted-only? (name->expressions "none"))) + (is (unlisted-only? (name->expressions "state-node license"))) + (is (unlisted-only? (name->expressions "trove"))) + (is (unlisted-only? (name->expressions "url"))) + (is (unlisted-only? (name->expressions "wisdragon"))) + (is (unlisted-only? (name->expressions "wiseloong"))))) + +(deftest name->expressions-info-tests + (testing "Nil, empty or blank" + (is (nil? (name->expressions-info nil))) + (is (nil? (name->expressions-info ""))) + (is (nil? (name->expressions-info " "))) + (is (nil? (name->expressions-info "\n"))) + (is (nil? (name->expressions-info "\t")))) + (testing "SPDX license ids" + (is (valid-info= {"AGPL-3.0-only" (list {:type :declared :strategy :spdx-expression :source (list "AGPL-3.0")})} + (name->expressions-info "AGPL-3.0"))) + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0-with-classpath-exception")})} + (name->expressions-info "GPL-2.0-with-classpath-exception")))) + (testing "SPDX expressions" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :declared :strategy :spdx-expression :source (list "GPL-2.0 WITH Classpath-exception-2.0")})} + (name->expressions-info "GPL-2.0 WITH Classpath-exception-2.0")))) + (testing "License ids that aren't SPDX ids" + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "Apache Software License version 2.0")})} + (name->expressions-info "Apache Software License version 2.0"))) + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :medium :strategy :regex-matching :source (list "Apache License 2")})} + (name->expressions-info "Apache License 2"))) + (is (valid-info= {"Apache-2.0" (list {:id "Apache-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "Apache")})} + (name->expressions-info "Apache")))) + (testing "Single expressions that are not valid SPDX" + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :low :strategy :expression-inference :source (list "GNU General Public License, version 2 with the GNU Classpath Exception")} + {:id "GPL-2.0-only" :type :concluded :confidence :medium :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "GNU General Public License, version 2")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :low :strategy :regex-matching :source (list "GNU General Public License, version 2 with the GNU Classpath Exception" "the GNU Classpath Exception" "Classpath Exception")})} + (name->expressions-info "GNU General Public License, version 2 with the GNU Classpath Exception"))) + (is (valid-info= {"GPL-2.0-only WITH Classpath-exception-2.0" (list {:type :concluded :confidence :high :strategy :expression-inference :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0")} + {:id "GPL-2.0-only" :type :concluded :confidence :high :strategy :regex-matching :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0" "GNU General Public License, version 2.0")} + {:id "Classpath-exception-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "GNU General Public License, version 2.0 with the Classpath Exception 2.0" "the Classpath Exception 2.0" "Classpath Exception 2.0")})} + (name->expressions-info "GNU General Public License, version 2.0 with the Classpath Exception 2.0")))) + (testing "Multiple expressions" + (is (valid-info= {"BSD-4-Clause" (list {:id "BSD-4-Clause" :type :concluded :confidence :low :strategy :regex-matching :source (list "MIT / BSD" "BSD")}) + "MIT" (list {:id "MIT" :type :concluded :confidence :high :strategy :regex-matching :source (list "MIT / BSD" "MIT")})} + (name->expressions-info "MIT / BSD")))) + (testing "Some names from Clojars" + (is (valid-info= {"BSD-3-Clause" (list {:id "BSD-3-Clause" :type :concluded :confidence :high :strategy :spdx-listed-uri :source (list "https://opensource.org/licenses/BSD-3-Clause")})} + (name->expressions-info "https://opensource.org/licenses/BSD-3-Clause"))) + (is (valid-info= {"EPL-2.0" (list {:id "EPL-2.0" :type :concluded :confidence :high :strategy :regex-matching :source (list "Eclipse Public License - v 2.0")})} + (name->expressions-info "Eclipse Public License - v 2.0"))))) + +(deftest uri->ids-tests + (testing "Nil, empty or blank uri" + (is (nil? (uri->ids nil))) + (is (nil? (uri->ids ""))) + (is (nil? (uri->ids " "))) + (is (nil? (uri->ids "\n"))) + (is (nil? (uri->ids "\t")))) + (testing "URIs that appear verbatim in the SPDX license or exception lists" + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids " http://www.apache.org/licenses/LICENSE-2.0.html "))) ; Test whitespace + (is (= #{"AGPL-3.0-or-later"} (uri->ids "https://www.gnu.org/licenses/agpl.txt"))) + (is (= #{"CC-BY-SA-4.0"} (uri->ids "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) + (is (= #{"Classpath-exception-2.0"} (uri->ids "https://www.gnu.org/software/classpath/license.html")))) + (testing "URI variations that should be handled identically" + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0.html"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt"))) + (is (= #{"Apache-2.0"} (uri->ids "http://apache.org/licenses/LICENSE-2.0.pdf")))) + (testing "URIs that appear in licensey things, but aren't in the SPDX license list as shown" + (is (= #{"Apache-2.0"} (uri->ids "http://www.apache.org/licenses/LICENSE-2.0"))) + (is (= #{"Apache-2.0"} (uri->ids "https://www.apache.org/licenses/LICENSE-2.0.txt")))) + (testing "URIs that aren't in the SPDX license list, but do match via retrieval and full text matching" + (is (= #{"Apache-2.0"} (uri->ids "https://raw.githubusercontent.com/pmonks/lice-comb/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "https://github.com/pmonks/lice-comb/blob/main/LICENSE"))) + (is (= #{"Apache-2.0"} (uri->ids "HTTPS://GITHUB.COM/pmonks/lice-comb/blob/main/LICENSE"))))) diff --git a/test/lice_comb/maven_test.clj b/test/lice_comb/maven_test.clj index df7287b..1478dfa 100644 --- a/test/lice_comb/maven_test.clj +++ b/test/lice_comb/maven_test.clj @@ -18,41 +18,47 @@ (ns lice-comb.maven-test (:require [clojure.test :refer [deftest testing is use-fixtures]] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.maven :refer [pom->ids]])) + [lice-comb.test-boilerplate :refer [fixture valid=]] + [lice-comb.impl.spdx :as lcis] + [lice-comb.maven :refer [init! pom->expressions]])) (use-fixtures :once fixture) (def test-data-path "./test/lice_comb/data") -(deftest pom->ids-tests +(deftest init!-tests + (testing "Nil response" + (is (nil? (init!))))) + +(deftest pom->expressions-tests (testing "Nil pom" - (is (nil? (pom->ids nil)))) + (is (nil? (pom->expressions nil)))) (testing "Invalid filenames" - (is (thrown? java.io.FileNotFoundException (pom->ids ""))) - (is (thrown? java.io.FileNotFoundException (pom->ids " "))) - (is (thrown? java.io.FileNotFoundException (pom->ids "\t"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "this-file-doesnt-exist.pom"))) - (is (thrown? java.io.FileNotFoundException (pom->ids "./this/path/and/file/doesnt/exist.pom")))) + (is (thrown? java.io.FileNotFoundException (pom->expressions ""))) + (is (thrown? java.io.FileNotFoundException (pom->expressions " "))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\t"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "\n"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "this-file-doesnt-exist.pom"))) + (is (thrown? java.io.FileNotFoundException (pom->expressions "./this/path/and/file/doesnt/exist.pom")))) (testing "Synthetic pom files" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/simple.pom")))) - (is (= #{"BSD-3-Clause"} (pom->ids (str test-data-path "/no-xml-ns.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/simple.pom")))) + (is (valid= #{"BSD-3-Clause"} (pom->expressions (str test-data-path "/no-xml-ns.pom")))) + (is (valid= #{"Apache-2.0" "MIT" "GPL-2.0-only WITH Classpath-exception-2.0" "BSD-3-Clause" "Unlicense AND CC0-1.0"} (pom->expressions (str test-data-path "/complex.pom"))))) (testing "Real pom files - local" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/asf-cat-1.0.12.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/asf-cat-1.0.12.pom"))))) (testing "Real pom files - remote" - (is (= #{"Apache-2.0"} (pom->ids "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) - (is (= #{"NON-SPDX-Public-Domain"} (pom->ids "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX - (is (= #{"EPL-1.0"} (pom->ids "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) - (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) - (is (= #{"Apache-2.0"} (pom->ids "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) - (is (nil? (pom->ids "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom - (is (= #{"CDDL-1.0"} (pom->ids "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) - (is (= #{"Plexus"} (pom->ids "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (is (= #{"GPL-3.0"} (pom->ids "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/software/amazon/ion/ion-java/1.0.2/ion-java-1.0.2.pom"))) + (is (valid= #{(lcis/public-domain)} (pom->expressions "https://repo1.maven.org/maven2/aopalliance/aopalliance/1.0/aopalliance-1.0.pom"))) ; Note: non-SPDX + (is (valid= #{"EPL-1.0"} (pom->expressions "https://repo.clojars.org/org/clojure/clojure/1.4.0/clojure-1.4.0.pom"))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/com/github/pmonks/asf-cat/1.0.12/asf-cat-1.0.12.pom"))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo.clojars.org/http-kit/http-kit/2.5.3/http-kit-2.5.3.pom"))) + (is (nil? (pom->expressions "https://repo.clojars.org/borkdude/sci.impl.reflector/0.0.1/sci.impl.reflector-0.0.1.pom"))) ; This project has no license information in its pom + (is (valid= #{"CDDL-1.0"} (pom->expressions "https://repo1.maven.org/maven2/javax/activation/activation/1.1.1/activation-1.1.1.pom"))) + (is (valid= #{"Plexus"} (pom->expressions "https://repo1.maven.org/maven2/org/jdom/jdom2/2.0.6.1/jdom2-2.0.6.1.pom"))) ; See https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html + (is (valid= #{"GPL-3.0-only"} (pom->expressions "https://repo1.maven.org/maven2/org/activecomponents/jadex/jadex-kernel-component/3.0.117/jadex-kernel-component-3.0.117.pom")))) (testing "Real pom files - remote - dual-licensed" - (is (= #{"GPL-2.0-with-classpath-exception" "MIT"} (pom->ids "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) + (is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0" "MIT"} (pom->expressions "https://repo1.maven.org/maven2/org/checkerframework/checker-compat-qual/2.5.5/checker-compat-qual-2.5.5.pom")))) (testing "Synthetic pom files with licenses in parent - local" - (is (= #{"Apache-2.0"} (pom->ids (str test-data-path "/with-parent.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions (str test-data-path "/with-parent.pom"))))) (testing "Real pom files with licenses in parent - remote" - (is (= #{"Apache-2.0"} (pom->ids "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) + (is (valid= #{"Apache-2.0"} (pom->expressions "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.12.69/aws-java-sdk-core-1.12.69.pom"))))) diff --git a/test/lice_comb/spdx_test.clj b/test/lice_comb/spdx_test.clj deleted file mode 100644 index fab4bf6..0000000 --- a/test/lice_comb/spdx_test.clj +++ /dev/null @@ -1,153 +0,0 @@ -; -; Copyright © 2021 Peter Monks -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; SPDX-License-Identifier: Apache-2.0 -; - -(ns lice-comb.spdx-test - (:require [clojure.test :refer [deftest testing is use-fixtures]] - [clojure.java.io :as io] - [lice-comb.test-boilerplate :refer [fixture]] - [lice-comb.spdx :refer [name->ids uri->id text->ids]])) - -(use-fixtures :once fixture) - -; Note: these tests should be extended indefinitely, as it exercises the guts of the matching algorithm -(deftest name->ids-tests - (testing "Nil, empty or blank names" - (is (nil? (name->ids nil))) - (is (nil? (name->ids ""))) - (is (nil? (name->ids " "))) - (is (nil? (name->ids "\n"))) - (is (nil? (name->ids "\t")))) - (testing "Names that are SPDX license ids" - (is (= #{"AGPL-3.0"} (name->ids "AGPL-3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "AGPL-3.0-only"))) - (is (= #{"Apache-2.0"} (name->ids " Apache-2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache-2.0"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "CC-BY-SA-4.0"))) - (is (= #{"GPL-2.0"} (name->ids "GPL-2.0"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GPL-2.0-with-classpath-exception")))) - (testing "Names" - (is (= #{"AGPL-3.0"} (name->ids "GNU Affero General Public License (AGPL) version 3.0"))) - (is (= #{"AGPL-3.0"} (name->ids "GNU Affero General Public License v3.0"))) - (is (= #{"AGPL-3.0-only"} (name->ids "GNU Affero General Public License v3.0 only"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache License, Version 1.0"))) - (is (= #{"Apache-1.0"} (name->ids "Apache Software License - Version 1.0"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache License, Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "Apache Software License - Version 1.1"))) - (is (= #{"Apache-1.1"} (name->ids "The MX4J License, version 1.0"))) - (is (= #{"Apache-2.0"} (name->ids " Apache Software License, Version 2.0 "))) ; Test whitespace - (is (= #{"Apache-2.0"} (name->ids "Apache 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License - Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License v2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache Software License, Version 2.0"))) - (is (= #{"Apache-2.0"} (name->ids "Apache v2"))) - (is (= #{"Apache-2.0"} (name->ids "The Apache Software License, Version 2.0"))) - (is (= #{"MIT"} (name->ids "Bouncy Castle Licence"))) ; Note spelling of "licence" - (is (= #{"BSD-3-Clause"} (name->ids "3-Clause BSD License"))) - (is (= #{"BSD-3-Clause"} (name->ids "BSD 3-Clause License"))) - (is (= #{"BSD-3-Clause"} (name->ids "The BSD 3-Clause License (BSD3)"))) - (is (= #{"BSD-3-Clause-Attribution"} (name->ids "BSD 3-Clause Attribution"))) - (is (= #{"CC-BY-3.0"} (name->ids "Attribution 3.0 Unported"))) - (is (= #{"CC-BY-3.0"} (name->ids "Creative Commons Legal Code Attribution 3.0 Unported"))) - (is (= #{"CC-BY-4.0"} (name->ids "Attribution 4.0 International"))) - (is (= #{"CC-BY-SA-4.0"} (name->ids "Creative Commons Attribution Share Alike 4.0 International"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0"))) - (is (= #{"CDDL-1.0"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.1"))) - (is (= #{"CDDL-1.1"} (name->ids "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License (EPL)"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License - v 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License 1.0"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License"))) - (is (= #{"EPL-1.0"} (name->ids "Eclipse Public License, Version 1.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License 2.0"))) - (is (= #{"EPL-2.0"} (name->ids "Eclipse Public License version 2"))) - (is (= #{"GPL-2.0"} (name->ids "GNU General Public License, version 2"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License, version 2 (GPL2), with the classpath exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License, version 2 with the GNU Classpath Exception"))) - (is (= #{"GPL-2.0-with-classpath-exception"} (name->ids "GNU General Public License v2.0 w/Classpath exception"))) - (is (= #{"JSON"} (name->ids "JSON License"))) - (is (= #{"LGPL-2.0"} (name->ids "GNU Library General Public License"))) - (is (= #{"LGPL-2.1"} (name->ids "GNU Lesser General Public License (LGPL)"))) - (is (= #{"LGPL-2.1"} (name->ids "GNU Lesser General Public License"))) - (is (= #{"MIT"} (name->ids "MIT License"))) - (is (= #{"MIT"} (name->ids "MIT license"))) ; Test capitalisation - (is (= #{"MIT"} (name->ids "The MIT License"))) - (is (= #{"MPL-1.0"} (name->ids "Mozilla Public License"))) - (is (= #{"MPL-2.0"} (name->ids "Mozilla Public License Version 2.0"))) - (is (= #{"Plexus"} (name->ids "Similar to Apache License but with the acknowledgment clause removed")))) ; JDOM - see https://lists.linuxfoundation.org/pipermail/spdx-legal/2014-December/001280.html - (testing "Names that appear in licensey things, but are ambiguous" - (is (nil? (name->ids "BSD")))) - (testing "Names that appear in licensey things, but aren't in the SPDX license list, and don't have identified SPDX identifiers" - (is (= #{"NON-SPDX-Public-Domain"} (name->ids "Public Domain"))) - (is (= #{"NON-SPDX-Public-Domain"} (name->ids "Public domain"))))) - -(deftest uri->id-tests - (testing "Nil, empty or blank uri" - (is (nil? (uri->id nil))) - (is (nil? (uri->id ""))) - (is (nil? (uri->id " "))) - (is (nil? (uri->id "\n"))) - (is (nil? (uri->id "\t")))) - (testing "URIs that appear verbatim in the SPDX license list" - (is (= "Apache-2.0" (uri->id "https://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->id "http://www.apache.org/licenses/LICENSE-2.0.html"))) - (is (= "Apache-2.0" (uri->id "https://apache.org/licenses/LICENSE-2.0.txt"))) - (is (= "Apache-2.0" (uri->id " https://www.apache.org/licenses/LICENSE-2.0 "))) ; Test whitespace - (is (let [license-id (uri->id "https://www.gnu.org/licenses/agpl.txt")] - (or (= "AGPL-3.0" license-id) - (= "AGPL-3.0-only" license-id)))) - (is (= "CC-BY-SA-4.0" (uri->id "https://creativecommons.org/licenses/by-sa/4.0/legalcode"))) - (is (= "GPL-2.0-with-classpath-exception" (uri->id "https://www.gnu.org/software/classpath/license.html")))) - (testing "URIs that appear in licensey things, but aren't in the SPDX license list" - (is (= "Apache-2.0" (uri->id "http://www.apache.org/licenses/LICENSE-2.0"))) - (is (= "Apache-2.0" (uri->id "https://www.apache.org/licenses/LICENSE-2.0.txt"))))) - -(defn- string-text->ids - [s] - (with-open [is (io/input-stream (.getBytes s "UTF-8"))] - (text->ids is))) - -(deftest text->ids-tests - (testing "Nil, empty or blank text" - (is (nil? (text->ids nil))) - (is (thrown? java.io.FileNotFoundException (text->ids ""))) - (is (thrown? java.io.FileNotFoundException (text->ids " "))) - (is (thrown? java.io.FileNotFoundException (text->ids "\n"))) - (is (thrown? java.io.FileNotFoundException (text->ids "\t")))) - (testing "Text" - (is (= #{"Apache-2.0"} (string-text->ids "Apache License\nVersion 2.0, January 2004"))) - (is (= #{"Apache-2.0"} (string-text->ids " Apache License\n Version 2.0, January 2004 "))) - (is (= #{"AGPL-3.0"} (string-text->ids "GNU AFFERO GENERAL PUBLIC LICENSE\nVersion 3, 19 November 2007"))) - (is (= #{"CC-BY-SA-4.0"} (string-text->ids "Creative Commons Attribution-ShareAlike\n4.0 International Public License"))) - (is (= #{"JSON"} (string-text->ids "Copyright (c) 2002 JSON.org"))))) diff --git a/test/lice_comb/test_boilerplate.clj b/test/lice_comb/test_boilerplate.clj index 4bb5e82..d231f12 100644 --- a/test/lice_comb/test_boilerplate.clj +++ b/test/lice_comb/test_boilerplate.clj @@ -17,21 +17,90 @@ ; (ns lice-comb.test-boilerplate - (:require [clojure.spec.alpha :as spec])) + (:require [clojure.spec.alpha :as spec] + [spdx.expressions :as sexp])) + +(println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) ; Here we hack up a "global once" function -(def ^:private global-setup (memoize (fn [] - ; Because java.util.logging is a hot mess - (org.slf4j.bridge.SLF4JBridgeHandler/removeHandlersForRootLogger) - (org.slf4j.bridge.SLF4JBridgeHandler/install) +(def ^:private global-setup (delay + ; Because java.util.logging is a hot mess + (org.slf4j.bridge.SLF4JBridgeHandler/removeHandlersForRootLogger) + (org.slf4j.bridge.SLF4JBridgeHandler/install) - ; Enable spec validation - (spec/check-asserts true) + ; Enable spec validation + (spec/check-asserts true) - (println "\n☔️ Running tests on Clojure" (clojure-version) "/ JVM" (System/getProperty "java.version") (str "(" (System/getProperty "java.vm.name") " v" (System/getProperty "java.vm.version") ")\n")) - ))) + nil)) (defn fixture [f] - (global-setup) + @global-setup (f)) + +(def not-nil? (complement nil?)) + +(defn when-pred + [val pred then] + (if (pred val) + (then val) + val)) + +(defmacro testing-with-data + "A form of `clojure.test/testing` that generates multiple `clojure.test/is` + clauses, based on applying f to the keys in m, and comparing to the associated + value in m." + [name f m] + `(clojure.test/testing ~name + ~@(map #(list `clojure.test/is `(= (~f ~(key %)) ~(when-pred (val %) list? (partial list 'quote)))) + (if (isa? (type m) clojure.lang.Symbol) + @(resolve m) + m)))) + +(defn valid= + "Returns true if all of the following are true: + * actual is a set + * actual equals expected + * everything in actual is a valid SPDX license expression + + Also prints (to stdout) which of the above is not true, in the event that any + of them are not true." + [expected actual] + (let [is-a-set? (or (nil? actual) (set? actual)) + is-equal? (= (set expected) actual) + all-valid-expressions? (and is-a-set? (every? true? (map sexp/valid? actual))) + result (and is-a-set? + is-equal? + all-valid-expressions?)] + ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not is-a-set? (print "\n* Not a set:" (type actual))) + (when-not is-equal? (print "\n* Not equal to expected")) + (when-not all-valid-expressions? (print "\n* Not all valid SPDX expressions")) + result)) + +(defn valid-info= + "Returns true if all of the following are true: + * actual is a map + * the keys in actual are identical to expected-keys + * all vals in actual are lists + * every key in actual is a valid SPDX license expression + + Also prints (to stdout) which of the above is not true, in the event that any + of them are not true." + [expected actual] + (let [is-a-map? (or (nil? actual) (map? actual)) + is-equal? (= expected actual) + values-are-sequentials? (or (nil? actual) (every? sequential? (vals actual))) + all-valid-expressions? (and is-a-map? (every? true? (map sexp/valid? (keys actual)))) + result (and values-are-sequentials? + is-a-map? + is-equal? + all-valid-expressions?)] + ; Yes print here is deliberate, to ensure the output lines are grouped with the associated test failure message + (when-not result (print "\n☔️☔️☔️ Invalid result produced:")) + (when-not is-a-map? (print "\n* Not a map:" (type actual))) + (when-not is-equal? (print "\n* Not equal to expected")) + (when-not values-are-sequentials? (print "\n* Not all values are sequential:" (pr-str (map type (vals actual))))) + (when-not all-valid-expressions? (print "\n* Not all keys are valid SPDX expressions")) + result))