Skip to content

Commit 9fdcb84

Browse files
committed
🚧 WIP on replace-first algorithm - refine GNU family regexes
1 parent dccbad3 commit 9fdcb84

File tree

1 file changed

+24
-15
lines changed
  • src/lice_comb/impl/substitutions

1 file changed

+24
-15
lines changed

src/lice_comb/impl/substitutions/gnu.clj

+24-15
Original file line numberDiff line numberDiff line change
@@ -119,64 +119,73 @@
119119
(when confidence-explanations {:confidence-explanations confidence-explanations}))))
120120

121121
; Generic GNU family word regexes
122-
(def ^:private gnu-words [#"\(?The" #"GNU" #"GPL" #"Genere?al" #"Pub?lic" #"Licen[cs]ed?(?:[\s\-–—]+Under)?" #"Open[\s\-–—]+Source" #"FOSS" #"OSS"])
122+
(def ^:private gnu-words [#"The" #"GNU" #"GPL" #"Genere?al" #"Pub?lic" #"Licen[cs]ed?(?:[\s\-–—]+Under)?" #"Open[\s\-–—]+Source" #"FOSS" #"OSS"])
123123

124124
; AGPL regexes
125-
(def ^:private fre-agpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?AGPL[\s\-–—v\d\.]*\)?" "Affero"]))))
125+
;####TEST!!!!
126+
;(def ^:private fre-agpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?AGPL[\s\-–—v\d\.]*\)?" "Affero"]))))
127+
(def ^:private fre-agpl-words-before (re/grp (apply re/alt (concat gnu-words [#"\(?AGPL[\s\-–—]*\)?" "Affero"]))))
128+
(def ^:private fre-agpl-words-after (re/grp (apply re/alt (concat gnu-words [#"\(?AGPL[\s\-–—v\d\.]*\)?" "Affero"])))) ; Only include version variants *after* the actual version
126129
(def re-agpl (re/join #"(?iuUx)(?<!\w)" ; Only public for ease of testing
127130
"\n\n#### Leading word salad ####\n"
128-
(re/zom-grp fre-agpl-words lcir/fre-mws)
131+
(re/zom-grp fre-agpl-words-before lcir/fre-mws)
129132
"\n\n#### Matching words ####\n"
130-
(re/ncg "agpl" #"(?:A\s?GPL|Affero)")
133+
(re/ncg "agpl" #"(?:A[\s\-–—]*GPL|Affero)")
131134
"\n\n#### Pre-version word salad ####\n"
132-
(re/zom-grp lcir/fre-mws fre-agpl-words)
135+
(re/zom-grp lcir/fre-mws fre-agpl-words-before)
133136
"\n\n#### Version and version qualifier ####\n"
134137
(re/opt-grp lcir/fre-ows lcir/fre-version)
135138
(re/opt-grp lcir/fre-ows lcir/fre-only-or-later)
136139
"\n\n#### Post-version word salad ####\n"
137-
(re/zom-grp lcir/fre-mws fre-agpl-words)
140+
(re/zom-grp lcir/fre-mws fre-agpl-words-after)
138141
"\n\n#### Date ####\n"
139142
(re/opt-grp lcir/fre-mws lcir/fre-date)
140143
"\n\n#### Coda ####\n"
141144
#"(?!\w)"))
142145

143146
; LGPL regexes
144147
(def ^:private fre-lesser-or-library (re/or-grp "Lesser" "Library" (re/join lcir/fre-mws "or" lcir/fre-mws)))
145-
(def ^:private fre-lgpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?LGPL[\s\-–—v\d\.]*\)?" fre-lesser-or-library]))))
148+
;####TEST!!!!
149+
;(def ^:private fre-lgpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?LGPL[\s\-–—v\d\.]*\)?" fre-lesser-or-library]))))
150+
(def ^:private fre-lgpl-words-before (re/grp (apply re/alt (concat gnu-words [#"\(?LGPL[\s\-–—]*\)?" fre-lesser-or-library]))))
151+
(def ^:private fre-lgpl-words-after (re/grp (apply re/alt (concat gnu-words [#"\(?LGPL[\s\-–—v\d\.]*\)?" fre-lesser-or-library])))) ; Only include version variants *after* the actual version
146152
(def re-lgpl (re/join #"(?iuUx)(?<!\w)" ; Only public for ease of testing
147153
"\n\n#### Leading word salad ####\n"
148-
(re/zom-grp fre-lgpl-words lcir/fre-mws)
154+
(re/zom-grp fre-lgpl-words-before lcir/fre-mws)
149155
"\n\n#### Matching words ####\n"
150156
(re/ncg "lgpl"
151-
(re/alt #"L\s*GPL"
157+
(re/alt #"L[\s\-–—]*GPL"
152158
(re/join #"(?:GNU|GPL)" lcir/fre-mws fre-lesser-or-library)
153159
(re/join fre-lesser-or-library lcir/fre-mws #"(?:GNU|GPL|General)")))
154160
"\n\n#### Pre-version word salad ####\n"
155-
(re/zom-grp lcir/fre-mws fre-lgpl-words)
161+
(re/zom-grp lcir/fre-mws fre-lgpl-words-before)
156162
"\n\n#### Version and version qualifier ####\n"
157163
(re/opt-grp lcir/fre-ows lcir/fre-version)
158164
(re/opt-grp lcir/fre-ows lcir/fre-only-or-later)
159165
"\n\n#### Post-version word salad ####\n"
160-
(re/zom-grp lcir/fre-mws fre-lgpl-words)
166+
(re/zom-grp lcir/fre-mws fre-lgpl-words-after)
161167
"\n\n#### Date ####\n"
162168
(re/opt-grp lcir/fre-mws lcir/fre-date)
163169
"\n\n#### Coda ####\n"
164170
#"(?!\w)"))
165171

166172
; GPL regexes
167-
(def ^:private fre-gpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?GPL[\s\-–—v\d\.]*\)?"]))))
173+
;####TEST!!!!
174+
;(def ^:private fre-gpl-words (re/grp (apply re/alt (concat gnu-words [#"\(?GPL[\s\-–—v\d\.]*\)?"]))))
175+
(def ^:private fre-gpl-words-before (re/grp (apply re/alt (concat gnu-words [#"\(?GPL[\s\-–—]*\)?"]))))
176+
(def ^:private fre-gpl-words-after (re/grp (apply re/alt (concat gnu-words [#"\(?GPL[\s\-–—v\d\.]*\)?"])))) ; Only include version variants *after* the actual version
168177
(def re-gpl (re/join #"(?iuUx)(?<!\w)" ; Only public for ease of testing
169178
"\n\n#### Leading word salad ####\n"
170-
(re/zom-grp fre-gpl-words lcir/fre-mws)
179+
(re/zom-grp fre-gpl-words-before lcir/fre-mws)
171180
"\n\n#### Matching words ####\n"
172181
(re/ncg "gpl" #"(?:GNU|GPL|(?:Genere?al(?:[\s\-–—]+Pub?lic)?(?:[\s\-–—]+Licen[cs]e)?))")
173182
"\n\n#### Pre-version word salad ####\n"
174-
(re/zom-grp lcir/fre-mws fre-gpl-words)
183+
(re/zom-grp lcir/fre-mws fre-gpl-words-before)
175184
"\n\n#### Version and version qualifier ####\n"
176185
(re/opt-grp lcir/fre-ows lcir/fre-version)
177186
(re/opt-grp lcir/fre-ows lcir/fre-only-or-later)
178187
"\n\n#### Post-version word salad ####\n"
179-
(re/zom-grp lcir/fre-mws fre-gpl-words)
188+
(re/zom-grp lcir/fre-mws fre-gpl-words-after)
180189
"\n\n#### Date ####\n"
181190
(re/opt-grp lcir/fre-mws lcir/fre-date)
182191
"\n\n#### Coda ####\n"

0 commit comments

Comments
 (0)