Skip to content

Commit

Permalink
make already trained models forward compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
baixiac committed Aug 5, 2024
1 parent 3a4b646 commit b6f0725
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
1 change: 1 addition & 0 deletions app/model_services/medcat_model_deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def init_model(self) -> None:
self._model = self.load_model(self._model_pack_path)
self._model._addl_ner[0].tokenizer.hf_tokenizer._in_target_context_manager = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "_in_target_context_manager", False)
self._model._addl_ner[0].tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None)
self._model._addl_ner[0].tokenizer.hf_tokenizer.split_special_tokens = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "split_special_tokens", False)
if (self._config.DEVICE.startswith("cuda") and torch.cuda.is_available()) or \
(self._config.DEVICE.startswith("mps") and torch.backends.mps.is_available()) or \
(self._config.DEVICE.startswith("cpu")):
Expand Down
2 changes: 2 additions & 0 deletions app/trainers/medcat_deid_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def run(trainer: "MedcatDeIdentificationSupervisedTrainer",
ner = model._addl_ner[0]
ner.tokenizer.hf_tokenizer._in_target_context_manager = getattr(ner.tokenizer.hf_tokenizer, "_in_target_context_manager", False)
ner.tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(ner.tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None)
ner.tokenizer.hf_tokenizer.split_special_tokens = getattr(ner.tokenizer.hf_tokenizer, "split_special_tokens", False)
_save_pretrained = ner.model.save_pretrained
if ("safe_serialization" in inspect.signature(_save_pretrained).parameters):
ner.model.save_pretrained = partial(_save_pretrained, safe_serialization=(trainer._config.TRAINING_SAFE_MODEL_SERIALISATION == "true"))
Expand Down Expand Up @@ -223,6 +224,7 @@ def run(trainer: "MedcatDeIdentificationSupervisedTrainer",
ner = trainer._model_service._model._addl_ner[0]
ner.tokenizer.hf_tokenizer._in_target_context_manager = getattr(ner.tokenizer.hf_tokenizer, "_in_target_context_manager", False)
ner.tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(ner.tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None)
ner.tokenizer.hf_tokenizer.split_special_tokens = getattr(ner.tokenizer.hf_tokenizer, "split_special_tokens", False)
eval_results, examples = ner.eval(data_file.name)
cui2names = {}
eval_results.sort_values(by=["cui"])
Expand Down

0 comments on commit b6f0725

Please sign in to comment.