10
10
11
11
from .utility import table_filter , pd_table_value , file_hash , file_storage_size
12
12
from .mapping import bids2openminds_instance
13
- from . import globals
14
13
15
14
16
15
def create_techniques (layout_df ):
17
16
suffixs = layout_df ["suffix" ].unique ().tolist ()
18
17
techniques = []
19
18
not_techniques_index = ["description" , "participants" , "events" ]
20
19
for suffix in suffixs :
21
- #excluding the None and non thechnique indexes
20
+ # excluding the None and non thechnique indexes
22
21
if not (pd .isna (suffix ) or (suffix in not_techniques_index )):
23
- openminds_techniques_cache = bids2openminds_instance (suffix , "MAP_2_TECHNIQUES" )
24
- #Excluding the suffixs that are not in the library or flagged as non technique suffixes
22
+ openminds_techniques_cache = bids2openminds_instance (
23
+ suffix , "MAP_2_TECHNIQUES" )
24
+ # Excluding the suffixs that are not in the library or flagged as non technique suffixes
25
25
if not pd .isna (openminds_techniques_cache ):
26
26
techniques .extend (openminds_techniques_cache )
27
27
else :
28
- warn (f"The { suffix } suffix is currently considered an auxiliary file for already existing techniques or a non technique file." )
28
+ warn (
29
+ f"The { suffix } suffix is currently considered an auxiliary file for already existing techniques or a non technique file." )
29
30
30
31
return techniques or None
31
32
@@ -35,12 +36,13 @@ def create_approaches(layout_df):
35
36
approaches = set ([])
36
37
for datatype in datatypes :
37
38
if not (pd .isna (datatype )):
38
- approaches .update (bids2openminds_instance (datatype , "MAP_2_EXPERIMENTAL_APPROACHES" ))
39
+ approaches .update (bids2openminds_instance (
40
+ datatype , "MAP_2_EXPERIMENTAL_APPROACHES" ))
39
41
40
42
return list (approaches ) or None
41
43
42
44
43
- def create_dataset_version (bids_layout , dataset_description , layout_df , studied_specimens , file_repository ):
45
+ def create_dataset_version (bids_layout , dataset_description , layout_df , studied_specimens , file_repository , collection ):
44
46
45
47
# Fetch the dataset type from dataset description file
46
48
@@ -49,7 +51,8 @@ def create_dataset_version(bids_layout, dataset_description, layout_df, studied_
49
51
# Fetch the digitalIdentifier from dataset description file
50
52
51
53
if "DatasetDOI" in dataset_description :
52
- digital_identifier = omcore .DOI (identifier = dataset_description ["DatasetDOI" ])
54
+ digital_identifier = omcore .DOI (
55
+ identifier = dataset_description ["DatasetDOI" ])
53
56
else :
54
57
digital_identifier = None
55
58
@@ -92,32 +95,34 @@ def create_dataset_version(bids_layout, dataset_description, layout_df, studied_
92
95
techniques = techniques ,
93
96
how_to_cite = how_to_cite ,
94
97
repository = file_repository ,
95
- #other_contributions=other_contribution # needs to be a Contribution object
98
+ # other_contributions=other_contribution # needs to be a Contribution object
96
99
# version_identifier
97
100
)
98
101
99
- globals . collection .add (dataset_version )
102
+ collection .add (dataset_version )
100
103
101
104
return dataset_version
102
105
103
106
104
- def create_dataset (dataset_description , dataset_version ):
107
+ def create_dataset (dataset_description , dataset_version , collection ):
105
108
106
109
if "DatasetDOI" in dataset_description :
107
- digital_identifier = omcore .DOI (identifier = dataset_description ["DatasetDOI" ])
110
+ digital_identifier = omcore .DOI (
111
+ identifier = dataset_description ["DatasetDOI" ])
108
112
else :
109
113
digital_identifier = None
110
114
111
115
dataset = omcore .Dataset (
112
- digital_identifier = digital_identifier , full_name = dataset_description ["Name" ], has_versions = dataset_version
116
+ digital_identifier = digital_identifier , full_name = dataset_description [
117
+ "Name" ], has_versions = dataset_version
113
118
)
114
119
115
- globals . collection .add (dataset )
120
+ collection .add (dataset )
116
121
117
122
return dataset
118
123
119
124
120
- def create_subjects (subject_id , layout_df , layout ):
125
+ def create_subjects (subject_id , layout_df , layout , collection ):
121
126
122
127
sessions = layout .get_sessions ()
123
128
subjects_dict = {}
@@ -126,30 +131,33 @@ def create_subjects(subject_id, layout_df, layout):
126
131
127
132
# Find the participants files in the files table
128
133
participants_paths = table_filter (layout_df , "participants" )
129
- if participants_paths .empty :
130
- #creating emphty subjects just based on file structure
134
+ if participants_paths .empty :
135
+ # creating emphty subjects just based on file structure
131
136
for subject in subject_id :
132
137
subject_name = f"sub-{ subject } "
133
138
state_cache_dict = {}
134
139
state_cache = []
135
- #dealing with condition that have no seasion
140
+ # dealing with condition that have no seasion
136
141
if not sessions :
137
142
state = omcore .SubjectState (
138
- internal_identifier = f"Studied state { subject_name } " .strip (),
143
+ internal_identifier = f"Studied state { subject_name } " .strip (
144
+ ),
139
145
lookup_label = f"Studied state { subject_name } " .strip ()
140
146
)
141
- globals . collection .add (state )
147
+ collection .add (state )
142
148
state_cache_dict ["" ] = state
143
149
state_cache .append (state )
144
150
else :
145
- #create a subject state for each state
151
+ # create a subject state for each state
146
152
for session in sessions :
147
- if not (table_filter (table_filter (layout_df ,session ,"session" ),subject ,"subject" ).empty ):
153
+ if not (table_filter (table_filter (layout_df , session , "session" ), subject , "subject" ).empty ):
148
154
state = omcore .SubjectState (
149
- internal_identifier = f"Studied state { subject_name } { session } " .strip (),
150
- lookup_label = f"Studied state { subject_name } { session } " .strip ()
155
+ internal_identifier = f"Studied state { subject_name } { session } " .strip (
156
+ ),
157
+ lookup_label = f"Studied state { subject_name } { session } " .strip (
158
+ )
151
159
)
152
- globals . collection .add (state )
160
+ collection .add (state )
153
161
state_cache_dict [f"{ session } " ] = state
154
162
state_cache .append (state )
155
163
subject_state_dict [f"{ subject } " ] = state_cache_dict
@@ -159,20 +167,21 @@ def create_subjects(subject_id, layout_df, layout):
159
167
)
160
168
subjects_dict [f"{ subject } " ] = subject_cache
161
169
subjects_list .append (subject_cache )
162
- globals .collection .add (subject_cache )
163
-
170
+ collection .add (subject_cache )
164
171
165
172
return subjects_dict , subject_state_dict , subjects_list
166
173
167
-
168
174
# Select the tsv file of the table
169
- participants_path_tsv = pd_table_value (table_filter (participants_paths , ".tsv" , "extension" ), "path" )
170
- participants_path_json = pd_table_value (table_filter (participants_paths , ".json" , "extension" ), "path" )
175
+ participants_path_tsv = pd_table_value (table_filter (
176
+ participants_paths , ".tsv" , "extension" ), "path" )
177
+ participants_path_json = pd_table_value (table_filter (
178
+ participants_paths , ".json" , "extension" ), "path" )
171
179
172
180
participants_table = pd .read_csv (participants_path_tsv , sep = "\t " , header = 0 )
173
181
for subject in subject_id :
174
182
subject_name = f"sub-{ subject } "
175
- data_subject = table_filter (participants_table , subject_name , "participant_id" )
183
+ data_subject = table_filter (
184
+ participants_table , subject_name , "participant_id" )
176
185
state_cache_dict = {}
177
186
state_cache = []
178
187
if not sessions :
@@ -181,74 +190,84 @@ def create_subjects(subject_id, layout_df, layout):
181
190
value = pd_table_value (data_subject , "age" ),
182
191
unit = controlled_terms .UnitOfMeasurement .year
183
192
),
184
- handedness = bids2openminds_instance (pd_table_value (data_subject , "handedness" ), "MAP_2_HANDEDNESS" , is_list = False ),
193
+ handedness = bids2openminds_instance (pd_table_value (
194
+ data_subject , "handedness" ), "MAP_2_HANDEDNESS" , is_list = False ),
185
195
internal_identifier = f"Studied state { subject_name } " .strip (),
186
196
lookup_label = f"Studied state { subject_name } " .strip ()
187
- )
188
- globals . collection .add (state )
197
+ )
198
+ collection .add (state )
189
199
state_cache_dict ["" ] = state
190
200
state_cache .append (state )
191
201
else :
192
202
for session in sessions :
193
- if not (table_filter (table_filter (layout_df ,session ,"session" ),subject ,"subject" ).empty ):
203
+ if not (table_filter (table_filter (layout_df , session , "session" ), subject , "subject" ).empty ):
194
204
state = omcore .SubjectState (
195
205
age = omcore .QuantitativeValue (
196
206
value = pd_table_value (data_subject , "age" ),
197
207
unit = controlled_terms .UnitOfMeasurement .year
198
208
),
199
- handedness = bids2openminds_instance (pd_table_value (data_subject , "handedness" ), "MAP_2_HANDEDNESS" , is_list = False ),
200
- internal_identifier = f"Studied state { subject_name } { session } " .strip (),
201
- lookup_label = f"Studied state { subject_name } { session } " .strip ()
209
+ handedness = bids2openminds_instance (pd_table_value (
210
+ data_subject , "handedness" ), "MAP_2_HANDEDNESS" , is_list = False ),
211
+ internal_identifier = f"Studied state { subject_name } { session } " .strip (
212
+ ),
213
+ lookup_label = f"Studied state { subject_name } { session } " .strip (
214
+ )
202
215
)
203
- globals . collection .add (state )
216
+ collection .add (state )
204
217
state_cache_dict [f"{ session } " ] = state
205
218
state_cache .append (state )
206
219
subject_state_dict [f"{ subject } " ] = state_cache_dict
207
220
subject_cache = omcore .Subject (
208
- biological_sex = bids2openminds_instance (pd_table_value (data_subject , "sex" ), "MAP_2_SEX" , is_list = False ),
221
+ biological_sex = bids2openminds_instance (pd_table_value (
222
+ data_subject , "sex" ), "MAP_2_SEX" , is_list = False ),
209
223
lookup_label = f"{ subject_name } " ,
210
224
internal_identifier = f"{ subject_name } " ,
211
225
# TODO species should default to homo sapiens
212
- species = bids2openminds_instance (pd_table_value (data_subject , "species" ), "MAP_2_SPECIES" , is_list = False ),
226
+ species = bids2openminds_instance (pd_table_value (
227
+ data_subject , "species" ), "MAP_2_SPECIES" , is_list = False ),
213
228
studied_states = state_cache
214
229
)
215
230
subjects_dict [f"{ subject } " ] = subject_cache
216
231
subjects_list .append (subject_cache )
217
- globals . collection .add (subject_cache )
232
+ collection .add (subject_cache )
218
233
219
234
return subjects_dict , subject_state_dict , subjects_list
220
235
221
236
222
- def create_file (layout_df , BIDS_path ):
237
+ def create_file (layout_df , BIDS_path , collection ):
223
238
224
239
BIDS_directory_path = os .path .dirname (BIDS_path )
225
240
file_repository = omcore .FileRepository ()
226
- globals . collection .add (file_repository )
241
+ collection .add (file_repository )
227
242
files_list = []
228
243
for index , file in layout_df .iterrows ():
229
244
file_format = None
230
245
content_description = None
231
246
data_types = None
232
247
extension = file ["extension" ]
233
248
path = file ["path" ]
234
- iri = IRI (pathlib .Path (path ).as_uri ())
235
- name = os .path .basename (path )
249
+ iri = IRI (pathlib .Path (path ).as_uri ())
250
+ name = os .path .basename (path )
236
251
hashes = file_hash (path )
237
252
storage_size = file_storage_size (path )
238
253
if pd .isna (file ["subject" ]):
239
254
if file ["suffix" ] == "participants" :
240
255
if extension == ".json" :
241
256
content_description = f"A JSON metadata file of participants TSV."
242
- data_types = controlled_terms .DataType .by_name ("associative array" )
243
- file_format = omcore .ContentType .by_name ("application/json" )
257
+ data_types = controlled_terms .DataType .by_name (
258
+ "associative array" )
259
+ file_format = omcore .ContentType .by_name (
260
+ "application/json" )
244
261
elif extension == [".tsv" ]:
245
262
content_description = f"A metadata table for participants."
246
263
data_types = controlled_terms .DataType .by_name ("table" )
247
- file_format = omcore .ContentType .by_name ("text/tab-separated-values" )
264
+ file_format = omcore .ContentType .by_name (
265
+ "text/tab-separated-values" )
248
266
else :
249
267
if extension == ".json" :
250
268
content_description = f"A JSON metadata file for { file ['suffix' ]} of subject { file ['subject' ]} "
251
- data_types = controlled_terms .DataType .by_name ("associative array" )
269
+ data_types = controlled_terms .DataType .by_name (
270
+ "associative array" )
252
271
file_format = omcore .ContentType .by_name ("application/json" )
253
272
elif extension in [".nii" , ".nii.gz" ]:
254
273
content_description = f"Data file for { file ['suffix' ]} of subject { file ['subject' ]} "
@@ -257,8 +276,10 @@ def create_file(layout_df, BIDS_path):
257
276
elif extension == [".tsv" ]:
258
277
if file ["suffix" ] == "events" :
259
278
content_description = f"Event file for { file ['suffix' ]} of subject { file ['subject' ]} "
260
- data_types = controlled_terms .DataType .by_name ("event sequence" )
261
- file_format = omcore .ContentType .by_name ("text/tab-separated-values" )
279
+ data_types = controlled_terms .DataType .by_name (
280
+ "event sequence" )
281
+ file_format = omcore .ContentType .by_name (
282
+ "text/tab-separated-values" )
262
283
file = omcore .File (
263
284
iri = iri ,
264
285
content_description = content_description ,
@@ -271,7 +292,7 @@ def create_file(layout_df, BIDS_path):
271
292
# special_usage_role
272
293
storage_size = storage_size ,
273
294
)
274
- globals . collection .add (file )
295
+ collection .add (file )
275
296
files_list .append (file )
276
297
277
298
return files_list , file_repository
0 commit comments