Skip to content

Commit 405a223

Browse files
Release/550 (#1567)
* spark-nlp-jsl==5.5.0 requirements (#1544) * bump versions * implemented spark-nlp-jsl==550 requirements --------- Co-authored-by: C-K-Loan <christian.kasim.loan@gmail.com> * bump pydantic support to > 2.X, various refactors and deprecation fixes (#1560) * 550 BugFix (#1563) * fix listener config if nlp=False * update conditions * fix deprecated @validator decerator (#1565) * Pydantic2 migration fix env var model parsing (#1566) * fix deprecated @validator decerator * fix env var pydantic model creation bug * bump verisons --------- Co-authored-by: Mehmet Butgul <109360261+mehmetbutgul@users.noreply.github.com>
1 parent a34ce3f commit 405a223

File tree

12 files changed

+138
-47
lines changed

12 files changed

+138
-47
lines changed

docs/en/jsl/jsl_release_notes.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,27 @@ sidebar:
1616
See [Github Releases](https://github.com/JohnSnowLabs/johnsnowlabs/releases) for detailed information on Release History and Features
1717

1818

19+
20+
## 5.5.0
21+
Release date: 10-23-2024
22+
23+
The John Snow Labs 5.5.0 Library released with the following pre-installed and recommended dependencies
24+
25+
{:.table-model-big}
26+
| Library | Version |
27+
|-----------------------------------------------------------------------------------------|------------|
28+
| [Visual NLP](https://nlp.johnsnowlabs.com/docs/en/spark_ocr_versions/ocr_release_notes) | `5.4.1` |
29+
| [Enterprise NLP](https://nlp.johnsnowlabs.com/docs/en/licensed_annotators) | `5.5.0` |
30+
| [Finance NLP](https://nlp.johnsnowlabs.com/docs/en/financial_release_notes) | `1.X.X` |
31+
| [Legal NLP](https://nlp.johnsnowlabs.com/docs/en/legal_release_notes) | `1.X.X` |
32+
| [NLU](https://github.com/JohnSnowLabs/nlu/releases) | `5.4.1` |
33+
| [Spark-NLP-Display](https://sparknlp.org/docs/en/display) | `5.0` |
34+
| [Spark-NLP](https://github.com/JohnSnowLabs/spark-nlp/releases/) | `5.5.0` |
35+
| [Pyspark](https://spark.apache.org/docs/latest/api/python/) | `3.4.0` |
36+
37+
38+
39+
1940
## 5.4.5
2041
Release date: 9-27-2024
2142

johnsnowlabs/auto_install/jsl_home.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,15 @@ def download_deps_and_create_info(
4848
overwrite=False,
4949
):
5050
"""Download a list of deps to given lib_dir folder and creates info_file at info_file_path."""
51-
info, old_info = {}, {}
51+
info, old_info = {}, None
5252
if os.path.exists(info_file_path):
5353
# keep old infos, we assume they are up-to-date and compatible
54-
old_info = InstallFolder.parse_file(info_file_path)
54+
if os.path.join("java_installs","info.json") in info_file_path:
55+
old_info = InstallFolder.java_folder_from_home()
56+
elif os.path.join("py_installs","info.json") in info_file_path:
57+
old_info = InstallFolder.py_folder_from_home()
5558

5659
for p in deps:
57-
5860
# print_prefix = Software.for_name(p.product_name).logo
5961
print_prefix = ProductLogo.from_name(p.product_name.name).value
6062
if p.dependency_type in JvmHardwareTarget:
@@ -88,12 +90,19 @@ def download_deps_and_create_info(
8890
install_type=p.dependency_type.value,
8991
product_version=p.dependency_version.as_str(),
9092
)
93+
info[p.file_name].compatible_spark_version = p.spark_version.value.as_str()
94+
info[p.file_name].product_version = p.dependency_version.as_str()
95+
9196
if info:
9297
info = InstallFolder(**{"infos": info})
9398
if old_info:
9499
info.infos.update(old_info.infos)
95-
info.write(info_file_path, indent=4)
100+
with open(info_file_path, "w") as f:
101+
for k, v in info.infos.items():
102+
v.product_version = str(v.product_version)
103+
v.compatible_spark_version = str(v.compatible_spark_version)
96104

105+
f.write(info.model_dump_json())
97106

98107
def setup_jsl_home(
99108
secrets: Optional[JslSecrets] = None,
@@ -183,9 +192,10 @@ def setup_jsl_home(
183192
java_deps, settings.java_dir, settings.java_info_file, overwrite
184193
)
185194

186-
RootInfo(version=settings.raw_version_jsl_lib, run_from=sys.executable).write(
187-
settings.root_info_file, indent=4
188-
)
195+
root_info = RootInfo(version=settings.raw_version_jsl_lib, run_from=sys.executable)
196+
root_info.version = root_info.version.as_str()
197+
with open(settings.root_info_file, "w") as f:
198+
f.write(root_info.model_dump_json())
189199
print(f"🙆 JSL Home setup in {settings.root_dir}")
190200

191201
return
@@ -258,9 +268,8 @@ def get_install_suite_from_jsl_home(
258268
if os.path.exists(settings.py_info_file):
259269
py_folder = InstallFolder.py_folder_from_home()
260270

261-
info = RootInfo.parse_file(settings.root_info_file)
271+
info = RootInfo.get_from_jsl_home()
262272
# Read all dependencies from local ~/.johnsnowlabs folder
263-
264273
suite = InstallSuite(
265274
nlp=LocalPy4JLib(
266275
java_lib=java_folder.get_product_entry(ProductName.nlp, jvm_hardware_target)

johnsnowlabs/finance.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@
123123
LargeFewShotClassifierModel,
124124
Mapper2Chunk,
125125
DocumentFiltererByNER,
126+
REChunkMerger,
127+
ContextualEntityFilterer,
126128
)
127129

128130
from sparknlp_jsl.modelTracer import ModelTracer

johnsnowlabs/legal.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@
121121
LargeFewShotClassifierModel,
122122
Mapper2Chunk,
123123
DocumentFiltererByNER,
124+
REChunkMerger,
125+
ContextualEntityFilterer,
124126
)
125127
from sparknlp_jsl.modelTracer import ModelTracer
126128
from sparknlp_jsl.pipeline_tracer import PipelineTracer

johnsnowlabs/medical.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@
9898
LargeFewShotClassifierModel,
9999
Mapper2Chunk,
100100
DocumentFiltererByNER,
101+
REChunkMerger,
102+
ContextualEntityFilterer,
101103
)
102104
from sparknlp_jsl.structured_deidentification import StructuredDeidentification
103105
from sparknlp_jsl.text_to_documents_columns import TextToDocumentsColumns
@@ -130,6 +132,7 @@
130132
TextMatcherInternalModel as TextMatcherModel,
131133
RegexMatcherInternal as RegexMatcher,
132134
RegexMatcherInternalModel as RegexMatcherModel,
135+
MedicalLLM as AutoGGUFModel,
133136
)
134137
from sparknlp_jsl.compatibility import Compatibility
135138
from sparknlp_jsl.pretrained import InternalResourceDownloader

johnsnowlabs/py_models/install_info.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from johnsnowlabs.py_models.lib_version import LibVersion
1010
from johnsnowlabs.utils.enums import JvmHardwareTarget, ProductName, PyInstallTypes
1111

12-
12+
import json
1313
class InstallFileInfoBase(WritableBaseModel):
1414
file_name: str
1515
product: ProductName
@@ -71,7 +71,12 @@ def __init__(self, *args, **kwargs):
7171

7272
@staticmethod
7373
def get_from_jsl_home():
74-
return RootInfo.parse_file(settings.root_info_file)
74+
import json
75+
if os.path.exists(settings.root_info_file):
76+
with open(settings.root_info_file, "r") as f:
77+
json_data = json.loads(f.read())
78+
return RootInfo(run_from=json_data["run_from"],
79+
version=json_data["version"])
7580

7681

7782
class InstallFolder(WritableBaseModel):
@@ -93,13 +98,26 @@ def get_product_entry(
9398
@staticmethod
9499
def java_folder_from_home():
95100
if os.path.exists(settings.java_info_file):
96-
return InstallFolder.parse_file(settings.java_info_file)
101+
with open(settings.java_info_file, "r") as f:
102+
json_data = json.loads(f.read())
103+
infos = {}
104+
for k,v in json_data['infos'].items():
105+
if k.endswith(".jar"):
106+
infos[k] = JvmInstallInfo(**v)
107+
return InstallFolder(infos=infos)
97108
return False
98109

99110
@staticmethod
100111
def py_folder_from_home():
101112
if os.path.exists(settings.py_info_file):
102-
return InstallFolder.parse_file(settings.py_info_file)
113+
with open(settings.py_info_file, "r") as f:
114+
json_data = json.loads(f.read())
115+
infos = {}
116+
for k,v in json_data['infos'].items():
117+
if k.endswith(".whl") or k.endswith(".tar.gz"):
118+
infos[k] = PyInstallInfo(**v)
119+
120+
return InstallFolder(infos=infos)
103121
return False
104122

105123

johnsnowlabs/py_models/jsl_secrets.py

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pathlib import Path
55
from typing import Dict, List, Optional, Union
66

7-
from pydantic import validator
7+
from pydantic import field_validator
88

99
from johnsnowlabs import settings
1010
from johnsnowlabs.abstract_base.pydantic_model import WritableBaseModel
@@ -62,17 +62,17 @@ class JslSecrets(WritableBaseModel):
6262
methods for reading/storing found_secrets and managing .jslhome folder
6363
"""
6464

65-
HC_SECRET: Secret = None
66-
HC_LICENSE: Secret = None
65+
HC_SECRET: Optional[str] = None
66+
HC_LICENSE: Optional[str] = None
6767
HC_VERSION: Optional[LibVersionIdentifier] = None
68-
OCR_SECRET: Secret = None
69-
OCR_LICENSE: Secret = None
68+
OCR_SECRET: Optional[str] = None
69+
OCR_LICENSE: Optional[str] = None
7070
OCR_VERSION: Optional[LibVersionIdentifier] = None
71-
AWS_ACCESS_KEY_ID: Secret = None
72-
AWS_SECRET_ACCESS_KEY: Secret = None
71+
AWS_ACCESS_KEY_ID: Optional[str] = None
72+
AWS_SECRET_ACCESS_KEY: Optional[str] = None
7373
NLP_VERSION: Optional[LibVersionIdentifier] = None
74-
JSL_LEGAL_LICENSE: Secret = None
75-
JSL_FINANCE_LICENSE: Secret = None
74+
JSL_LEGAL_LICENSE: Optional[str] = None
75+
JSL_FINANCE_LICENSE: Optional[str] = None
7676

7777
@staticmethod
7878
def raise_invalid_version():
@@ -82,7 +82,7 @@ def raise_invalid_version():
8282
)
8383
raise ValueError("Invalid secrets")
8484

85-
@validator("HC_SECRET")
85+
@field_validator("HC_SECRET")
8686
def hc_version_check(cls, HC_SECRET):
8787
global hc_validation_logged
8888
try:
@@ -114,7 +114,7 @@ def is_ocr_secret_correct_version(ocr_secret: Optional[str]) -> bool:
114114
def is_hc_secret_correct_version(hc_secret: Optional[str]) -> bool:
115115
return hc_secret and hc_secret.split("-")[0] == settings.raw_version_medical
116116

117-
@validator("OCR_SECRET")
117+
@field_validator("OCR_SECRET")
118118
def ocr_version_check(cls, OCR_SECRET):
119119
global ocr_validation_logged
120120
try:
@@ -123,6 +123,8 @@ def ocr_version_check(cls, OCR_SECRET):
123123
and not ocr_validation_logged
124124
):
125125
ocr_validation_logged = True
126+
if not OCR_SECRET:
127+
return OCR_SECRET
126128
print(
127129
f"🚨 Outdated OCR Secrets in license file. Version={(OCR_SECRET.split('-')[0] if OCR_SECRET else None)} but should be Version={settings.raw_version_ocr}"
128130
)
@@ -424,6 +426,13 @@ def search_env_vars() -> Union["JslSecrets", bool]:
424426
]
425427
):
426428
print("👌 License detected in Environment Variables")
429+
if isinstance(hc_version,str):
430+
hc_version = LibVersionIdentifier(hc_version)
431+
if isinstance(ocr_version,str):
432+
ocr_version = LibVersionIdentifier(ocr_version)
433+
if isinstance(nlp_version,str):
434+
nlp_version = LibVersionIdentifier(nlp_version)
435+
427436
return JslSecrets(
428437
HC_SECRET=hc_secret,
429438
HC_LICENSE=hc_license,
@@ -631,6 +640,13 @@ def from_json_dict(secrets, secrets_metadata: Optional = None) -> "JslSecrets":
631640
secrets["JSL_FINANCE_LICENSE"] if "JSL_FINANCE_LICENSE" in secrets else None
632641
)
633642

643+
if isinstance(hc_version,str):
644+
hc_version = LibVersionIdentifier(hc_version)
645+
if isinstance(ocr_version,str):
646+
ocr_version = LibVersionIdentifier(ocr_version)
647+
if isinstance(nlp_version,str):
648+
nlp_version = LibVersionIdentifier(nlp_version)
649+
634650
return JslSecrets(
635651
HC_SECRET=hc_secret,
636652
HC_LICENSE=hc_license,
@@ -659,8 +675,9 @@ def from_jsl_home(
659675
return False
660676

661677
try:
678+
662679
# Try/Catch incase we get validation errors from outdated files
663-
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
680+
license_infos = LicenseInfos.from_home()
664681
if log and not already_logged:
665682
already_logged = True
666683
print(
@@ -692,7 +709,7 @@ def update_outdated_lib_secrets(
692709
for license in os.listdir(settings.license_dir):
693710
if license == "info.json":
694711
continue
695-
secrets = JslSecrets.parse_file(os.path.join(settings.license_dir, license))
712+
secrets = JslSecrets.from_json_file_path(os.path.join(settings.license_dir, license))
696713
if (
697714
secrets.HC_SECRET
698715
and hc_secrets
@@ -768,7 +785,7 @@ def are_credentials_known(found_secrets: "JslSecrets") -> bool:
768785
# Return True, if secrets are already stored in JSL-Home, otherwise False
769786
Path(settings.py_dir).mkdir(parents=True, exist_ok=True)
770787
if os.path.exists(settings.creds_info_file):
771-
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
788+
license_infos = LicenseInfos.from_home()
772789
else:
773790
# If license dir did not exist yet, secrets are certainly new
774791
return False
@@ -786,13 +803,13 @@ def are_lib_secrets_an_upgrade(found_secrets: "JslSecrets") -> bool:
786803
# Return True, if lib are newer than existing ones, if yes upgrade locally stored secrets
787804
Path(settings.py_dir).mkdir(parents=True, exist_ok=True)
788805
if os.path.exists(settings.creds_info_file):
789-
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
806+
license_infos = LicenseInfos.from_home()
790807
else:
791808
# If license dir did not exist yet, secrets are certainly new
792809
return False
793810

794811
# if any stored secrets equal to found_secrets, then we already know them
795-
# check OCR secrets
812+
796813
if found_secrets.HC_SECRET:
797814
if any(
798815
map(
@@ -837,7 +854,7 @@ def store_in_jsl_home_if_new(secrets: "JslSecrets") -> None:
837854
file_name = file_name + "_".join(products) + f".json"
838855

839856
if os.path.exists(settings.creds_info_file):
840-
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
857+
license_infos = LicenseInfos.from_home()
841858
file_name = file_name.format(number=str(len(license_infos.infos)))
842859
license_info = LicenseInfo(
843860
jsl_secrets=secrets, products=products, id=str(len(license_infos.infos))
@@ -848,13 +865,16 @@ def store_in_jsl_home_if_new(secrets: "JslSecrets") -> None:
848865
secrets.write(out_dir)
849866
print(f"📋 Stored new John Snow Labs License in {out_dir}")
850867
else:
851-
file_name = file_name.format(number="0")
852868
license_info = LicenseInfo(jsl_secrets=secrets, products=products, id="0")
853-
LicenseInfos(infos={file_name: license_info}).write(
854-
settings.creds_info_file
855-
)
869+
license_infos = LicenseInfos(infos={file_name: license_info})
870+
with open(settings.creds_info_file, "w") as f:
871+
f.write(license_infos.model_dump_json())
872+
873+
file_name = file_name.format(number="0")
856874
out_dir = os.path.join(settings.license_dir, file_name)
857-
secrets.write(out_dir)
875+
with open(out_dir, "w") as f:
876+
f.write(secrets.model_dump_json())
877+
#secrets.write(out_dir)
858878
print(f"📋 Stored John Snow Labs License in {out_dir}")
859879
# We might load again JSL-Secrets from local
860880
already_logged = True
@@ -877,6 +897,7 @@ class LicenseInfo(WritableBaseModel):
877897
products: List[ProductName]
878898

879899

900+
880901
class LicenseInfos(WritableBaseModel):
881902
"""Representation of a LicenseInfo in ~/.johnsnowlabs/licenses/info.json
882903
Maps file_name to LicenseInfo
@@ -886,6 +907,15 @@ class LicenseInfos(WritableBaseModel):
886907

887908
@staticmethod
888909
def from_home() -> Optional["LicenseInfos"]:
889-
if os.path.exists(settings.creds_info_file):
890-
return LicenseInfos.parse_file(settings.creds_info_file)
891-
return None
910+
if not os.path.exists(settings.creds_info_file):
911+
return None
912+
data = json.load(open(settings.creds_info_file))
913+
infos = {}
914+
for info in data['infos']:
915+
secret = JslSecrets.from_json_dict(data['infos'][info]['jsl_secrets'])
916+
i = LicenseInfo(id=info, jsl_secrets=secret,
917+
products=data['infos'][info]['products'],
918+
)
919+
infos[info] = i
920+
license_infos = LicenseInfos(infos=infos)
921+
return license_infos

johnsnowlabs/py_models/lib_version.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,6 @@ def as_str(self) -> str:
133133
"""Return LibVersion object as canonical str representation"""
134134
# We filter out all values != None soo version checks match up
135135
return ".".join(filter(lambda x: x, [self.major, self.minor, self.patch]))
136+
137+
def __str__(self):
138+
return self.as_str()

0 commit comments

Comments
 (0)