Skip to content

Commit 0387e04

Browse files
authored
lossened up the huggingface dependencies (#89)
1 parent 3c4a053 commit 0387e04

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

pyproject.toml

+7-7
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ classifiers = [
2828
dependencies = [
2929
"dask==2024.1.1",
3030
"dask[dataframe]==2024.1.1",
31-
"datasets==2.16.1",
31+
"datasets>=2.16.1",
3232
"evaluate==0.4.1",
3333
"fast-ml==3.68",
3434
"femr==0.2.0",
@@ -39,7 +39,7 @@ dependencies = [
3939
"numpy==1.24.3",
4040
"packaging==23.2",
4141
"pandas==2.2.0",
42-
"peft==0.10.0",
42+
"peft>=0.10.0",
4343
"Pillow==10.3.0",
4444
"pyarrow==15.0.0",
4545
"pydantic==2.6.0",
@@ -50,13 +50,13 @@ dependencies = [
5050
"tensorflow==2.15.0",
5151
"tensorflow-metal==1.1.0; sys_platform == 'darwin'", # macOS only
5252
"tensorflow-datasets==4.5.2",
53-
"tqdm==4.66.1",
53+
"tqdm>=4.66.1",
5454
"torch==2.4.0",
55-
"tokenizers==0.19.0",
56-
"transformers==4.40.0",
57-
"accelerate==0.31.0",
55+
"tokenizers>=0.19.0",
56+
"transformers>=4.40.0",
57+
"accelerate>=0.31.0",
5858
"Werkzeug==3.0.1",
59-
"wandb==0.17.8",
59+
"wandb>=0.17.8",
6060
"xgboost==2.0.3",
6161
"cehrbert_data==0.0.5"
6262
]

src/cehrbert/runners/hf_cehrbert_finetune_runner.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def main():
126126
LOG.exception(e)
127127
dataset = create_dataset_from_meds_reader(data_args, is_pretraining=False)
128128
if not data_args.streaming:
129-
dataset.save_to_disk(meds_extension_path)
129+
dataset.save_to_disk(str(meds_extension_path))
130130
train_set = dataset["train"]
131131
validation_set = dataset["validation"]
132132
test_set = dataset["test"]
@@ -249,7 +249,7 @@ def assign_split(example):
249249
)
250250

251251
if not data_args.streaming:
252-
processed_dataset.save_to_disk(prepared_ds_path)
252+
processed_dataset.save_to_disk(str(prepared_ds_path))
253253

254254
collator = CehrBertDataCollator(tokenizer, model_args.max_position_embeddings, is_pretraining=False)
255255

src/cehrbert/runners/hf_cehrbert_pretrain_runner.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def main():
189189
LOG.exception(e)
190190
dataset = create_dataset_from_meds_reader(data_args, is_pretraining=True)
191191
if not data_args.streaming:
192-
dataset.save_to_disk(meds_extension_path)
192+
dataset.save_to_disk(str(meds_extension_path))
193193
else:
194194
# Load the dataset from the parquet files
195195
dataset = load_parquet_as_dataset(
@@ -225,7 +225,7 @@ def main():
225225
)
226226
# only save the data to the disk if it is not streaming
227227
if not data_args.streaming:
228-
processed_dataset.save_to_disk(prepared_ds_path)
228+
processed_dataset.save_to_disk(str(prepared_ds_path))
229229

230230
def filter_func(examples):
231231
return [_ >= data_args.min_num_tokens for _ in examples["num_of_concepts"]]

0 commit comments

Comments
 (0)