Skip to content

Commit

Permalink
Merge pull request #26 from pmonks/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
pmonks authored Sep 24, 2023
2 parents b0bcc11 + 97cf443 commit 2a8cf24
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 104 deletions.
2 changes: 1 addition & 1 deletion pbr.clj
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
(def lib 'com.github.pmonks/lice-comb)

#_{:clj-kondo/ignore [:unresolved-namespace]}
(def version (format "2.0.%s-RC2" (b/git-count-revs nil)))
(def version (format "2.0.%s-RC3" (b/git-count-revs nil)))

(defn set-opts
[opts]
Expand Down
4 changes: 2 additions & 2 deletions src/lice_comb/files.clj
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@
(lciei/prepend-source filepath
(cond (= lfname "pom.xml") (lcmvn/pom->expressions-info f fname)
(s/ends-with? lfname ".pom") (lcmvn/pom->expressions-info f fname)
(instance? java.io.InputStream f) (doall (lcmtch/text->ids-info f))
:else (with-open [is (io/input-stream f)] (doall (lcmtch/text->ids-info is))))))))) ; Default is to assume it's a plain text file containing license text(s)
(instance? java.io.InputStream f) (doall (lcmtch/text->expressions-info f))
:else (with-open [is (io/input-stream f)] (doall (lcmtch/text->expressions-info is))))))))) ; Default is to assume it's a plain text file containing license text(s)

(defn file->expressions
"Returns a set of SPDX expressions (Strings) for the given file (an
Expand Down
103 changes: 57 additions & 46 deletions src/lice_comb/impl/matching.clj
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
(:require [clojure.string :as s]
[clojure.set :as set]
[clojure.java.io :as io]
[spdx.licenses :as sl]
[spdx.exceptions :as se]
[spdx.matching :as sm]
[lice-comb.impl.spdx :as lcis]
Expand All @@ -34,18 +35,6 @@

(def ^:private cursed-names-d (delay (lcid/load-edn-resource "lice_comb/names.edn")))

(def ^:private direct-replacements-map {
#{"GPL-2.0-only" "Classpath-exception-2.0"} #{"GPL-2.0-only WITH Classpath-exception-2.0"}
#{"GPL-2.0-or-later" "Classpath-exception-2.0"} #{"GPL-2.0-or-later WITH Classpath-exception-2.0"}
#{"GPL-3.0-only" "Classpath-exception-2.0"} #{"GPL-3.0-only WITH Classpath-exception-2.0"}
#{"GPL-3.0-or-later" "Classpath-exception-2.0"} #{"GPL-3.0-or-later WITH Classpath-exception-2.0"}
})

(defn- direct-replacements
"Self-evident direct replacements."
[ids]
(get direct-replacements-map ids ids))

(def ^:private gpl-ids-with-only-or-later #{"AGPL-1.0"
"AGPL-3.0"
"GFDL-1.1"
Expand All @@ -65,10 +54,10 @@
(map? associative) (apply dissoc associative ks)))

(defn- fix-gpl-only-or-later
"If the keys of ids includes both an 'only' and an 'or-later' variant of the
same underlying GNU family identifier, remove the 'only' variant."
[ids]
(loop [result ids
"If the keys of expressions includes both an 'only' and an 'or-later' variant
of the same underlying GNU family identifier, remove the 'only' variant."
[expressions]
(loop [result expressions
f (first gpl-ids-with-only-or-later)
r (rest gpl-ids-with-only-or-later)]
(if f
Expand All @@ -81,40 +70,62 @@
result)))

(defn- fix-public-domain-cc0
"If the keys of ids includes both CC0-1.0 and lice-comb's public domain
LicenseRef, remove the LicenseRef as it's redundant."
[ids]
(if (and (contains? ids (lcis/public-domain))
(contains? ids "CC0-1.0"))
(dis ids (lcis/public-domain))
ids))
"If the keys of expressions includes both CC0-1.0 and lice-comb's public
domain LicenseRef, remove the LicenseRef as it's redundant."
[expressions]
(if (and (contains? expressions (lcis/public-domain))
(contains? expressions "CC0-1.0"))
(dis expressions (lcis/public-domain))
expressions))

(defn- fix-mpl-2
"If the keys of ids includes both MPL-2.0 and MPL-2.0-no-copyleft-exception,
remove the MPL-2.0-no-copyleft-exception as it's redundant."
[ids]
(if (and (contains? ids "MPL-2.0")
(contains? ids "MPL-2.0-no-copyleft-exception"))
(dis ids "MPL-2.0-no-copyleft-exception")
ids))
"If the keys of expressions includes both MPL-2.0 and
MPL-2.0-no-copyleft-exception, remove MPL-2.0-no-copyleft-exception as it's
redundant."
[expressions]
(if (and (contains? expressions "MPL-2.0")
(contains? expressions "MPL-2.0-no-copyleft-exception"))
(dis expressions "MPL-2.0-no-copyleft-exception")
expressions))

(defn- fix-license-id-with-exception-id
"Combines instances where there are two keys, one of them a license identifier
and the other an exception identifier."
[expressions]
(if (= 2 (count expressions))
(if (set? expressions)
; expressions is a set
(let [license-id (first (seq (filter sl/listed-id? expressions)))
exception-id (first (seq (filter se/listed-id? expressions)))]
(if (and license-id exception-id)
#{(str license-id " WITH " exception-id)}
expressions))
; expressions is a map
(let [exprs (keys expressions)
license-id (first (seq (filter sl/listed-id? exprs)))
exception-id (first (seq (filter se/listed-id? exprs)))]
(if (and license-id exception-id)
{(str license-id " WITH " exception-id) (reduce concat (vals expressions))}
expressions)))
expressions))

(defn manual-fixes
"Manually fix certain invalid combinations of license identifiers in a set or
map."
[ids]
(some-> ids
direct-replacements
map of expressions."
[expressions]
(some-> expressions
fix-gpl-only-or-later
fix-public-domain-cc0
fix-mpl-2))
fix-mpl-2
fix-license-id-with-exception-id))

(defmulti text->ids
(defmulti text->expressions
"Returns an expressions-map for the given license text, or nil if no matches
are found."
{:arglists '([text])}
type)

(defmethod text->ids java.lang.String
(defmethod text->expressions java.lang.String
[s]
; These clj-spdx APIs are *expensive*, so we paralellise them
(let [f-lic (future (sm/licenses-within-text s @lcis/license-ids-d))
Expand All @@ -123,23 +134,23 @@
(when ids
(manual-fixes (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-text-matching})) ids))))))

(defmethod text->ids java.io.Reader
(defmethod text->expressions java.io.Reader
[r]
(let [sw (java.io.StringWriter.)]
(io/copy r sw)
(text->ids (str sw))))
(text->expressions (str sw))))

(defmethod text->ids java.io.InputStream
(defmethod text->expressions java.io.InputStream
[is]
(text->ids (io/reader is)))
(text->expressions (io/reader is)))

(defmethod text->ids :default
(defmethod text->expressions :default
[src]
(when src
(with-open [r (io/reader src)]
(text->ids r))))
(text->expressions r))))

(defn uri->ids
(defn uri->expressions
"Returns an expressions-map for the given license uri, or nil if no matches
are found."
[uri]
Expand All @@ -153,7 +164,7 @@

; 2. attempt to retrieve the text/plain contents of the uri and perform license text matching on it
(when-let [license-text (lcihttp/get-text uri)]
(when-let [ids (text->ids license-text)]
(when-let [ids (text->expressions license-text)]
ids))))))))

(defn- string->ids-info
Expand All @@ -178,7 +189,7 @@
(map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-listed-name :source (list s)})) ids))

; 3. Might it be a URI? (this is to handle some dumb corner cases that exist in pom.xml files hosted on Clojars & Maven Central)
(when-let [ids (uri->ids s)]
(when-let [ids (uri->expressions s)]
(map #(hash-map (key %) (val %)) ids))

; 4. Attempt regex name matching
Expand Down
3 changes: 2 additions & 1 deletion src/lice_comb/impl/spdx.clj
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@
(defn proprietary-commercial?
"Is the given id lice-comb's custom 'proprietary / commercial' LicenseRef?"
[id]
(= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref)))
(when id
(= (s/lower-case id) (s/lower-case proprietary-commercial-license-ref))))

(def ^{:doc "Constructs a valid SPDX id (a LicenseRef specific to lice-comb)
representing a proprietary / commercial license."
Expand Down
41 changes: 15 additions & 26 deletions src/lice_comb/matching.clj
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
(unlisted? id) (lcis/unlisted->name id)
:else id)))

(defn text->ids-info
(defn text->expressions-info
"Returns an expressions-info map for the given license text (a String, Reader,
InputStream, or something that is accepted by clojure.java.io/reader - File,
URL, URI, Socket, etc.), or nil if no expressions were found.
Expand All @@ -111,9 +111,9 @@
* you cannot pass a String representation of a filename to this method - you
should pass filenames through clojure.java.io/file (or similar) first"
[text]
(lcim/text->ids text))
(lcim/text->expressions text))

(defn text->ids
(defn text->expressions
"Returns a set of SPDX expressions (Strings) for the given license text (a
String, Reader, InputStream, or something that is accepted by
clojure.java.io/reader - File, URL, URI, Socket, etc.), or nil if no
Expand All @@ -127,23 +127,15 @@
* you cannot pass a String representation of a filename to this method - you
should pass filenames through clojure.java.io/file (or similar) first"
[text]
(some-> (text->ids-info text)
(some-> (text->expressions-info text)
keys
set))

(defn uri->ids-info
(defn uri->expressions-info
"Returns an exceptions-info map for the given license uri (a String, URL, or
URI).
Notes:
* This is done
Returns the SPDX license and/or exception identifiers (a map) for the given
uri, or nil if there aren't any. It does this via two steps:
1. Seeing if the given URI is in the license or exception list, and returning
the ids of the associated licenses and/or exceptions if so
URI), or nil if no expressions were found. It does this via two steps:
1. Seeing if the given URI is in the SPDX license or exception lists, and
returning the ids of the associated licenses and/or exceptions if so
2. Attempting to retrieve the plain text content of the given URI and
performing full SPDX license matching on the result if there was one
Expand All @@ -154,14 +146,11 @@
ignoring extensions representing MIME types (.txt vs .html, etc.), etc.
See lice-comb.impl.utils/simplify-uri for exact details.
2. URIs in the SPDX license and exception lists are not unique - the same URI
may represent multiple licenses and/or exceptions.
The keys in the maps are the detected SPDX license and exception identifiers,
and each value contains information about how that identifiers was determined."
may represent multiple licenses and/or exceptions."
[uri]
(lcim/uri->ids uri))
(lcim/uri->expressions uri))

(defn uri->ids
(defn uri->expressions
"Returns the SPDX license and/or exception identifiers (a set of Strings) for
the given uri, or nil if there aren't any. It does this via two steps:
1. Seeing if the given URI is in the license or exception list, and returning
Expand All @@ -178,7 +167,7 @@
2. URIs in the SPDX license and exception lists are not unique - the same URI
may represent multiple licenses and/or exceptions."
[uri]
(some-> (uri->ids-info uri)
(some-> (uri->expressions-info uri)
keys
set))

Expand All @@ -188,7 +177,7 @@
1. Determining whether the name is a valid SPDX license expression, and if so
normalising it (see clj-spdx's spdx.expressions/normalise fn)
2. Checking if the name is actually a URI, and if so performing URL matching
on it (as per url->ids-info)
on it (as per url->expressions-info)
3. attempting to construct one or more SPDX license expressions from the
name
Expand All @@ -202,7 +191,7 @@
{normalised-expression (list {:type :declared :strategy :spdx-expression :source (list name)})}
; 2. If it's a URI, use URI matching (this is to handle messed up real world cases where license names in POMs contain a URI)
(if (lciu/valid-http-uri? name)
(if-let [ids (uri->ids-info name)]
(if-let [ids (uri->expressions-info name)]
ids
{(lcis/name->unlisted name) (list {:type :concluded :confidence :low :strategy :unlisted :source (list name)})}) ; It was a URL, but we weren't able to resolve it to any ids, so return it as unlisted
; 3. Attempt to build SPDX expression(s) from the name
Expand All @@ -214,7 +203,7 @@
1. Determining whether the name is a valid SPDX license expression, and if so
normalising (see clj-spdx's spdx.expressions/normalise fn) and returning it
2. Checking if the name is actually a URI, and if so performing URL matching
on it (as per url->ids)
on it (as per url->expressions)
3. attempting to construct one or more SPDX license expressions from the
name"
[name]
Expand Down
8 changes: 4 additions & 4 deletions src/lice_comb/maven.clj
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
[lice-comb.impl.http :as lcihttp]
[lice-comb.impl.utils :as lciu]))

(xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0")

(defn- licenses-from-pair
"Attempts to determine the license(s) (a map) from a POM license name/URL
pair. Returns nil if no matches were found."
Expand All @@ -37,10 +39,8 @@
(if-let [name-expressions (lciei/prepend-source "<name>" (lcmtch/name->expressions-info name))]
name-expressions
; 2. If the names didn't give us any licenses, look in the url field(s) (this tends to be slower and less accurate)
(when-let [uri-ids (lciei/prepend-source "<url>" (lcmtch/uri->ids-info url))]
uri-ids)))

(xml/alias-uri 'pom "http://maven.apache.org/POM/4.0.0")
(when-let [uri-expressions (lciei/prepend-source "<url>" (lcmtch/uri->expressions-info url))]
uri-expressions)))

(defn- xml-find-all-alts
"As for xi/find-all, but supports an alternative fallback set of tags (to
Expand Down
Loading

0 comments on commit 2a8cf24

Please sign in to comment.