Skip to content

Commit 0dc92fa

Browse files
authored
Merge pull request #43 from pmonks/dev
Release 2.0.272
2 parents feabcc7 + 51715b8 commit 0dc92fa

File tree

8 files changed

+92
-30
lines changed

8 files changed

+92
-30
lines changed

src/lice_comb/impl/matching.clj

+47-11
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,41 @@
236236
[s]
237237
(when-not (s/blank? s)
238238
(->> (s/split (s/trim s) #"(?i)\band[/-\\]+or\b")
239-
(map-split-and-interpose #"(?i)(\band\b|\&)(?!\s+(distribution|all\s+rights\s+reserved))" :and)
240-
(map-split-and-interpose #"(?i)\bor\b(?!\s*(-?(any\s+)?later|(any\s+)?lator|(any\s+)?newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?)))" :or)
241-
(map-split-and-interpose #"(?i)\b(with\b|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)" :with)
239+
(map-split-and-interpose #"(?i)(\band\b|\&)(?!\s+(distribution|all\s+rights\s+reserved))"
240+
:and)
241+
(map-split-and-interpose #"(?i)\bor\b(?!\s*(-?(greater|(any\s+)?later|(any\s+)?lator|(any\s+)?newer|lesser|library|\(?at\s+your\s+(option|discretion)\)?|([\"']?(Revised|Modified)[\"']?))))"
242+
:or)
243+
(map-split-and-interpose #"(?i)\b(with\b|w/)(?!\s+the\s+acknowledgment\s+clause\s+removed)"
244+
:with)
242245
filter-blanks
243246
(map #(if (string? %) (s/trim %) %)))))
244247

248+
(defn- collapse-unlisted-exceptions
249+
"Collapses exception fragments with a LicenseRef on the right side (which is
250+
not valid in SPDX v2.3, returning a single LicenseRef for the entire fragment
251+
instead.
252+
253+
Note: this will need to change substantially as part of https://github.com/pmonks/lice-comb/issues/42"
254+
[l]
255+
(loop [f (take 3 l)
256+
r (rest l)
257+
result nil]
258+
(if (< (count f) 3)
259+
(concat result f)
260+
(let [left (first f)
261+
op (second f)
262+
right (second (rest f))]
263+
(if (and (= :with op) (lcis/unidentified? (first (keys right))))
264+
(let [skip2 (rest (rest r))
265+
left-name (first (:source (first (first (vals left)))))
266+
right-name (lcis/unidentified->name (first (keys right)))
267+
new-name (str left-name " with " right-name)
268+
new-id (lcis/name->unidentified new-name)]
269+
(recur (take 3 skip2) (rest skip2)
270+
(concat result (list {new-id (list {:id new-id :type :concluded :confidence :low :strategy :unidentified :source (list new-name)})}))))
271+
(recur (take 3 r) (rest r)
272+
(concat result (list left))))))))
273+
245274
(def ^:private push conj) ; With lists-as-stacks conj == push
246275

247276
(defn- calculate-confidence-for-expression
@@ -308,14 +337,21 @@
308337
(get @cursed-names-d name)
309338

310339
; 2. Construct an expressions-info map from the name
311-
(some->> (split-on-operators name)
312-
(drop-while keyword?)
313-
(lc3/rdrop-while keyword?)
314-
(map #(if (keyword? %) % (string->ids-info %)))
315-
flatten
316-
seq
317-
build-expressions-info-map
318-
(lciu/mapfonk sexp/normalise)))))))
340+
(let [partial-result (some->> (split-on-operators name)
341+
(drop-while keyword?)
342+
(lc3/rdrop-while keyword?)
343+
(map #(if (keyword? %) % (string->ids-info %)))
344+
flatten
345+
collapse-unlisted-exceptions
346+
seq)
347+
ids-only (seq (mapcat keys (filter map? partial-result)))]
348+
; Check whether all we have are unidentified LicenseRefs, and if so just return the entire thing as a single unidentified LicenseRef
349+
(if (every? lcis/unidentified? ids-only)
350+
(let [id (lcis/name->unidentified (s/trim name))]
351+
{id (list {:id id :type :concluded :confidence :low :strategy :unidentified :source (list)})})
352+
(some->> partial-result
353+
build-expressions-info-map
354+
(lciu/mapfonk sexp/normalise)))))))))
319355

320356
(defn init!
321357
"Initialises this namespace upon first call (and does nothing on subsequent

src/lice_comb/impl/regex_matching.clj

+5-2
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@
215215
(def lgpl-re #"(?<lgpl>L\s?GPL|GNU\s+(Library|Lesser)|(Library|Lesser)\s+(L?GPL|General\s+Public\s+Licen[cs]e))(\s+or\s+Lesser)?(\s+General)?(\s+Pub?lic)?(\s+Licen[cs]e)?(\s+\(?LGPL\)?)?")
216216
(def gpl-re #"(?<!(Affero|Lesser|Library)\s+)(?<gpl>GNU(?!\s+Classpath)|(?<!(L|A)\s*)GPL|General\s+Public\s+Licen[cs]e)(?!\s+(Affero|Library|Lesser|General\s+Lesser|General\s+Library|LGPL|AGPL))((\s+General)?(?!\s+(Affero|Lesser|Library))\s+Public\s+Licen[cs]e)?(\s+\(?GPL\)?)?")
217217
(def version-re #"[\s,-]*(_?V(ersion)?)?[\s\._]*(?<version>\d+([\._]\d+)?)?")
218-
(def only-or-later-re #"[\s,-]*((?<only>\(?only\)?)|(\(?or(\s+\(?at\s+your\s+(option|discretion)\)?)?(\s+any)?)?([\s-]*(?<orLater>later|lator|newer|\+)))?")
218+
(def only-or-later-re #"[\s,-]*((?<only>\(?only\)?)|(\(?or(\s+\(?at\s+your\s+(option|discretion)\)?)?(\s+any)?)?([\s-]*(?<orLater>lat[eo]r|newer|greater|\+)))?")
219219
(def gnu-re (lciu/re-concat "(?x)(?i)\\b(\n# Alternative 1: AGPL\n"
220220
agpl-re
221221
"\n# Alternative 2: LGPL\n|"
@@ -284,7 +284,7 @@
284284
:pad-ver? true
285285
:latest-ver "1.0"}
286286
{:id "Creative commons family"
287-
:regex #"(?i)(\bCC[\s-]BY|Creative[\s-]+Commons(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?<!BSD[\s-]+(\d|two|three|four)[\s-]+Clause\s+)Attribution)(\s+Licen[cs]e)?([\s,-]*((?<noncommercial>Non\s*Commercial|NC)|(?<noderivatives>No[\s-]*Deriv(ative)?s?|ND)|(?<sharealike>Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?<version>\d+(\.\d+)?)?\s*(?<region>Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?"
287+
:regex #"(?i)(\bCC[\s-]BY|Creative[\s-]+Commons(?![\s-]+CC0)(?!([\s-]+Legal[\s-]+Code)?[\s-]+Attribution)|(Creative[\s-]+Commons[\s-]+([\s-]+Legal[\s-]+Code)?)?(?<!BSD[\s-]+(\d|two|three|four)[\s-]+Clause\s+)Attribution)(\s+Licen[cs]e)?([\s,-]*((?<noncommercial>Non\s*Commercial|NC)|(?<noderivatives>No[\s-]*Deriv(ative)?s?|ND)|(?<sharealike>Share[\s-]*Alike|SA)))*(V(ersion)?)?\s*(?<version>\d+(\.\d+)?)?\s*(?<region>Australia|Austria|England((\s+and|\&)?\s+Wales)?|France|Germany|IGO|Japan|Netherlands|UK|United\s+States|USA?)?"
288288
:fn cc-id-constructor
289289
:pad-ver? true
290290
:latest-ver "4.0"}
@@ -348,6 +348,9 @@
348348
{:id "Unlicense"
349349
:regex #"(?i)\bUnlicen[cs]e\b"
350350
:fn (constantly ["Unlicense" :high])}
351+
{:id "UPL"
352+
:regex #"(?i)\bUniversal\s+Permissive(\s+Licen[cs]e)?([\s,-]+(V(ersion)?)?\s*(?<version>\d+(\.\d+)?)?)?\b"
353+
:fn (constantly ["UPL-1.0" :high])} ; There are no other listed versions of this license
351354
{:id "WTFPL"
352355
:regex #"(?i)\b(WTFPL|DO-WTF-U-WANT-2|Do\s+What\s+The\s+Fuck\s+You\s+Want\s+To(\s+Public)?(\s+Licen[cs]e)?)\b"
353356
:fn (constantly ["WTFPL" :high])}

src/lice_comb/impl/spdx.clj

+16-5
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,22 @@
115115
nil or is not a lice-comb unidentified LicenseRef."
116116
[id]
117117
(when (unidentified? id)
118-
(str "Unidentified ("
119-
(if (> (count id) (count unidentified-license-ref-prefix))
120-
(lciu/base62-decode (subs id (inc (count unidentified-license-ref-prefix))))
121-
"-original name not available-")
122-
")")))
118+
(if (> (count id) (count unidentified-license-ref-prefix))
119+
(lciu/base62-decode (subs id (inc (count unidentified-license-ref-prefix))))
120+
"")))
121+
122+
(defn unidentified->human-readable-name
123+
"Returns the string 'Unidentified' with the original name of the given
124+
unidentified license in parens. Returns nil if id is nil or is not a
125+
lice-comb unidentified LicenseRef."
126+
[id]
127+
(when (unidentified? id)
128+
(let [original-name (unidentified->name id)]
129+
(str "Unidentified ("
130+
(if (s/blank? original-name)
131+
"-original name not available-")
132+
original-name
133+
")"))))
123134

124135
(defn init!
125136
"Initialises this namespace upon first call (and does nothing on subsequent

src/lice_comb/impl/utils.clj

+3-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@
111111
"Decodes the given Base62/UTF-8 string."
112112
[^String s]
113113
(when s
114-
(java.lang.String. ^bytes (base62/decode s) (java.nio.charset.StandardCharsets/UTF_8))))
114+
(if (re-matches #"\p{Alnum}*" s)
115+
(java.lang.String. ^bytes (base62/decode s) (java.nio.charset.StandardCharsets/UTF_8))
116+
(throw (ex-info (str "Invalid BASE62 value provided: " s) {}))))) ; Because clj-base62 has crappy error messages
115117

116118
(defn valid-http-uri?
117119
"Returns true if given string is a valid HTTP or HTTPS URI."

src/lice_comb/matching.clj

+4-3
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@
7979
(lcis/unidentified? id))
8080

8181
(defn unidentified->name
82-
"Get the original name of the given unidentified license. Returns nil if id is nil
83-
or is not a lice-comb unidentified LicenseRef."
82+
"Returns the string 'Unidentified' with the original name of the given
83+
unidentified license in parens. Returns nil if id is nil or is not a
84+
lice-comb unidentified LicenseRef."
8485
[id]
85-
(lcis/unidentified->name id))
86+
(lcis/unidentified->human-readable-name id))
8687

8788
(defn id->name
8889
"Returns the human readable name of the given license or exception identifier;

test/lice_comb/impl/matching_test.clj

+8-6
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,11 @@
6161
(is (= '("Apache" :with "MIT") (split-on-operators "Apache w/ MIT")))
6262
(is (= '("Apache" :with "MIT") (split-on-operators "Apache w/MIT"))))
6363
(testing "Complex non-splits"
64-
(is (= '("COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0") (split-on-operators "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0")))
65-
(is (= '("Copyright & all rights reserved Lean Pixel") (split-on-operators "Copyright & all rights reserved Lean Pixel")))
66-
(is (= '("GNU General Public License v3.0 or later") (split-on-operators "GNU General Public License v3.0 or later")))
67-
(is (= '("GNU General Public License, Version 3 (or later)") (split-on-operators "GNU General Public License, Version 3 (or later)")))
68-
(is (= '("GNU Lesser General Public License, version 2.1 or newer") (split-on-operators "GNU Lesser General Public License, version 2.1 or newer")))
69-
(is (= '("LGPL-3.0-or-later") (split-on-operators "LGPL-3.0-or-later")))))
64+
(is (= '("COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0") (split-on-operators "COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0")))
65+
(is (= '("Copyright & all rights reserved Lean Pixel") (split-on-operators "Copyright & all rights reserved Lean Pixel")))
66+
(is (= '("GNU General Public License v3.0 or later") (split-on-operators "GNU General Public License v3.0 or later")))
67+
(is (= '("GNU General Public License, Version 3 (or later)") (split-on-operators "GNU General Public License, Version 3 (or later)")))
68+
(is (= '("GNU Lesser General Public License, version 2.1 or newer") (split-on-operators "GNU Lesser General Public License, version 2.1 or newer")))
69+
(is (= '("GNU General Public License, v2.0 or greater") (split-on-operators "GNU General Public License, v2.0 or greater")))
70+
(is (= '("GNU General Public License, version 3.0 or any later version") (split-on-operators "GNU General Public License, version 3.0 or any later version")))
71+
(is (= '("LGPL-3.0-or-later") (split-on-operators "LGPL-3.0-or-later")))))

test/lice_comb/impl/regex_matching_test.clj

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"GNU AFFERO GENERAL PUBLIC LICENSE Version 3" '("AGPL-3.0-only")
4343
"GNU AFFERO GENERAL PUBLIC LICENSE, Version 3" '("AGPL-3.0-only")
4444
"GNU AGPL-V3 or later" '("AGPL-3.0-or-later")
45+
"GNU AGPL-V3 or greater" '("AGPL-3.0-or-later")
4546
"GNU AGPLv3" '("AGPL-3.0-only")
4647
"GNU Affero General Public Licence" '("AGPL-3.0-only")
4748
"GNU Affero General Public License (AGPL)" '("AGPL-3.0-only")
@@ -86,6 +87,7 @@
8687
"GNU Lesser General Public License, v. 3 or later" '("LGPL-3.0-or-later")
8788
"GNU Lesser General Public License, version 2.1 or newer" '("LGPL-2.1-or-later")
8889
"GNU Lesser General Public License, version 3 or later" '("LGPL-3.0-or-later")
90+
"GNU Lesser General Public License, version 3 or greater" '("LGPL-3.0-or-later")
8991
"GNU Lesser General Public License, version 3.0 or (at your option) any later version" '("LGPL-3.0-or-later")
9092
"GNU Lesser General Pulic License v2.1" '("LGPL-2.1-only")
9193
"GNU Lesser Genereal Public License" '("LGPL-3.0-only")
@@ -137,6 +139,7 @@
137139
"GNU General Public License v3" '("GPL-3.0-only")
138140
"GNU General Public License v3.0" '("GPL-3.0-only")
139141
"GNU General Public License v3.0 or later" '("GPL-3.0-or-later")
142+
"GNU General Public License v3.0 or greater" '("GPL-3.0-or-later")
140143
"GNU General Public License, Version 2" '("GPL-2.0-only")
141144
"GNU General Public License, Version 3" '("GPL-3.0-only")
142145
"GNU General Public License, Version 3 (or later)" '("GPL-3.0-or-later")

test/lice_comb/matching_test.clj

+6-2
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,11 @@
198198
(is (valid= #{"GPL-2.0-only WITH Classpath-exception-2.0"} (name->expressions "GPL2 w/ CPE"))) ; One of two licenses in javax.xml.bind/jaxb-api@2.4.0-b180830.0359 (via parent)
199199
(is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License (GPL) version 2, or any later version"))) ; One of three licenses in org.bytedeco/javacpp-platform@1.5.10
200200
(is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License (GPL) version 2, or any lator version"))) ; Spelling variation on the previous test
201-
(is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License (GPL) version 2, or any newer version")))) ; Ditto
201+
(is (valid= #{"GPL-2.0-or-later"} (name->expressions "GNU General Public License (GPL) version 2, or any newer version"))) ; Ditto
202+
(is (valid= #{"UPL-1.0"} (name->expressions "Universal Permissive License, Version 1.0")))
203+
(is (valid= #{"CC0-1.0"} (name->expressions "Public Domain, per Creative Commons CC0")))
204+
(is (valid= #{"LicenseRef-lice-comb-UNIDENTIFIED-210UC7nlCWUwBBse5ma6Ntey1j3a0v0J3kvJVbZ38z7UIQnaj"} (name->expressions "provided without support or warranty"))) ; A nasty corner case because of the "or"
205+
(is (valid= #{(str (lcis/name->unidentified "CC Attribution 4.0 International with exception for binary distribution") " OR Apache-2.0")} (name->expressions "CC Attribution 4.0 International with exception for binary distribution or apache 2.0")))) ; A nasty corner case of a non-standard exception in an otherwise valid expression
202206
(testing "All names seen in POMs on Clojars as of 2023-07-13"
203207
(is (valid= #{"AFL-3.0"} (name->expressions "Academic Free License 3.0")))
204208
(is (valid= #{"AGPL-3.0-only" (lcis/proprietary-commercial)} (name->expressions "GNU Affero General Public License Version 3; Other commercial licenses available.")))
@@ -368,7 +372,7 @@
368372
(is (valid= #{"Beerware"} (name->expressions "THE BEER-WARE LICENSE")))
369373
(is (valid= #{"CC-BY-2.5"} (name->expressions "Creative Commons Attribution 2.5 License")))
370374
(is (valid= #{"CC-BY-3.0"} (name->expressions "Creative Commons 3.0")))
371-
(is (valid= #{"CC-BY-4.0" (lcis/name->unidentified "exception for binary distribution")} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) ; The exception in this case doesn't map to any listed SPDX identifier (including CC-BY variants)
375+
(is (valid= #{(lcis/name->unidentified "CC Attribution 4.0 International with exception for binary distribution")} (name->expressions "CC Attribution 4.0 International with exception for binary distribution"))) ; The exception in this case doesn't map to any listed SPDX identifier (including CC-BY variants), and as of SPDX 2.0 unidentified exceptions are to be "rolled in" with the associated license identifier, and reported as a (single) LicenseRef. In SPDX v3.0+ this will change with the addition of the AdditionRef construct.
372376
(is (valid= #{"CC-BY-4.0"} (name->expressions "CC-BY-4.0")))
373377
(is (valid= #{"CC-BY-4.0"} (name->expressions "Creative Commons Attribution License"))) ; Listed license missing version - we assume the latest
374378
(is (valid= #{"CC-BY-NC-3.0"} (name->expressions "Creative Commons Attribution-NonCommercial 3.0")))

0 commit comments

Comments
 (0)