Skip to content

Commit 88089e8

Browse files
committed
Add ability to pass 'name' argument to load_dataset
1 parent 168a7a0 commit 88089e8

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

Diff for: src/axolotl/utils/data.py

+13-14
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def load_tokenized_prepared_datasets(
9494
try:
9595
load_dataset(
9696
d.path,
97+
name=d.name,
9798
streaming=True,
9899
use_auth_token=use_auth_token,
99100
)
@@ -107,13 +108,15 @@ def load_tokenized_prepared_datasets(
107108
if local_path.is_dir():
108109
ds = load_dataset(
109110
d.path,
111+
name=d.name,
110112
data_files=d.data_files,
111113
streaming=False,
112114
split=None,
113115
)
114116
elif local_path.is_file():
115117
ds = load_dataset(
116118
"json",
119+
name=d.name,
117120
data_files=d.path,
118121
streaming=False,
119122
split=None,
@@ -123,26 +126,22 @@ def load_tokenized_prepared_datasets(
123126
"unhandled dataset load: local path exists, but is neither a directory or a file"
124127
)
125128
elif ds_from_hub:
126-
if d.data_files:
127-
ds = load_dataset(
128-
d.path,
129-
streaming=False,
130-
data_files=d.data_files,
131-
use_auth_token=use_auth_token,
132-
)
133-
else:
134-
ds = load_dataset(
135-
d.path,
136-
streaming=False,
137-
use_auth_token=use_auth_token,
138-
)
129+
ds = load_dataset(
130+
d.path,
131+
name=d.name,
132+
streaming=False,
133+
data_files=d.data_files,
134+
use_auth_token=use_auth_token,
135+
)
139136
else:
140137
fp = hf_hub_download(
141138
repo_id=d.path,
142139
repo_type="dataset",
143140
filename=d.data_files,
144141
)
145-
ds = load_dataset("json", data_files=fp, streaming=False, split=None)
142+
ds = load_dataset(
143+
"json", name=d.name, data_files=fp, streaming=False, split=None
144+
)
146145
if not ds:
147146
raise ValueError("unhandled dataset load")
148147
# support for using a subset of the data

0 commit comments

Comments
 (0)