10
10
import edu .kit .datamanager .ro_crate .entities .contextual .ContextualEntity ;
11
11
import edu .kit .datamanager .ro_crate .entities .data .DataEntity ;
12
12
import edu .kit .datamanager .ro_crate .entities .data .RootDataEntity ;
13
+ import edu .kit .datamanager .ro_crate .special .IdentifierUtils ;
13
14
import edu .kit .datamanager .ro_crate .special .JsonUtilFunctions ;
14
- import static edu .kit .datamanager .ro_crate .special .IdentifierUtils .decode ;
15
- import static edu .kit .datamanager .ro_crate .special .IdentifierUtils .isUrl ;
16
15
17
16
import edu .kit .datamanager .ro_crate .validation .JsonSchemaValidation ;
18
17
import edu .kit .datamanager .ro_crate .validation .Validator ;
19
- import org .apache .commons .io .FileUtils ;
20
18
21
19
import java .io .File ;
22
- import java .nio .file .Path ;
23
20
import java .util .*;
24
21
import java .util .stream .Collectors ;
22
+ import java .util .stream .Stream ;
25
23
import java .util .stream .StreamSupport ;
26
24
27
25
/**
28
26
* This class allows reading crates from the outside into the library in order
29
27
* to inspect or modify it.
30
- *
28
+ * <p>
31
29
* The class takes a strategy to support different ways of importing the crates.
32
30
* (from zip, folder, etc.)
33
31
*/
34
32
public class RoCrateReader {
35
33
34
+ /**
35
+ * This is a private inner class that shall not be exposed.
36
+ * **Do not make it public or protected.** It serves only the
37
+ * purpose of unsafe operations while reading a crate and
38
+ * may be specific to this implementation.
39
+ */
40
+ private static class RoCrateUnsafe extends RoCrate {
41
+ public void addDataEntityWithoutRootHasPart (DataEntity entity ) {
42
+ this .metadataContext .checkEntity (entity );
43
+ this .roCratePayload .addDataEntity (entity );
44
+ }
45
+ }
46
+
36
47
/**
37
48
* If the number of JSON entities in the crate is larger than this number,
38
49
* parallelization will be used.
@@ -80,34 +91,37 @@ public RoCrate readCrate(String location) {
80
91
JsonNode context = metadataJson .get (PROP_CONTEXT );
81
92
82
93
CrateMetadataContext crateContext = new RoCrateMetadataContext (context );
83
- RoCrate crate = new RoCrate ();
94
+ RoCrateUnsafe crate = new RoCrateUnsafe ();
84
95
crate .setMetadataContext (crateContext );
85
96
JsonNode graph = metadataJson .get (PROP_GRAPH );
86
97
87
98
if (graph .isArray ()) {
88
-
89
99
moveRootEntitiesFromGraphToCrate (crate , (ArrayNode ) graph );
90
- for (JsonNode node : graph ) {
91
- // if the id is in the root hasPart list, we know this entity is a data entity
92
- RootDataEntity root = crate .getRootDataEntity ();
93
- if (root != null && root .hasInHasPart (node .get (PROP_ID ).asText ())) {
94
- // data entity
95
- DataEntity .DataEntityBuilder dataEntity = new DataEntity .DataEntityBuilder ()
96
- .setAll (node .deepCopy ());
97
-
98
- // Handle data entities with corresponding file
99
- File loc = checkFolderHasFile (node .get (PROP_ID ).asText (), files );
100
- if (loc != null ) {
101
- usedFiles .add (loc .getPath ());
102
- dataEntity .setLocationWithExceptions (loc .toPath ())
103
- .setId (loc .getName ());
104
- }
100
+ RootDataEntity root = crate .getRootDataEntity ();
101
+ if (root != null ) {
102
+ Set <String > dataEntityIds = getDataEntityIds (root , graph );
103
+ for (JsonNode entityJson : graph ) {
104
+ String eId = unpackId (entityJson );
105
+ if (dataEntityIds .contains (eId )) {
106
+ // data entity
107
+ DataEntity .DataEntityBuilder dataEntity = new DataEntity .DataEntityBuilder ()
108
+ .setAll (entityJson .deepCopy ());
109
+
110
+ // Handle data entities with corresponding file
111
+ checkFolderHasFile (entityJson .get (PROP_ID ).asText (), files ).ifPresent (file -> {
112
+ usedFiles .add (file .getPath ());
113
+ dataEntity .setLocationWithExceptions (file .toPath ())
114
+ .setId (file .getName ());
115
+ });
105
116
106
- crate .addDataEntity (dataEntity .build (), false );
107
- } else {
108
- // contextual entity
109
- crate .addContextualEntity (
110
- new ContextualEntity .ContextualEntityBuilder ().setAll (node .deepCopy ()).build ());
117
+ crate .addDataEntityWithoutRootHasPart (dataEntity .build ());
118
+ } else {
119
+ // contextual entity
120
+ crate .addContextualEntity (
121
+ new ContextualEntity .ContextualEntityBuilder ()
122
+ .setAll (entityJson .deepCopy ())
123
+ .build ());
124
+ }
111
125
}
112
126
}
113
127
}
@@ -123,18 +137,79 @@ public RoCrate readCrate(String location) {
123
137
return crate ;
124
138
}
125
139
126
- protected File checkFolderHasFile (String id , File file ) {
127
- if (isUrl (id )) return null ;
128
- Path path = file .toPath ().resolve (decode (id ).get ());
129
- if (path .toFile ().exists ()) {
130
- return path .toFile ();
140
+ /**
141
+ * Extracts graph connections from top to bottom.
142
+ * <p>
143
+ * Example: (connections.get(parent) -> children)
144
+ *
145
+ * @param graph the ArrayNode with all Entities.
146
+ * @return the graph connections.
147
+ */
148
+ protected Map <String , Set <String >> makeEntityGraph (JsonNode graph ) {
149
+ Map <String , Set <String >> connections = new HashMap <>();
150
+
151
+ Map <String , JsonNode > idToNodes = new HashMap <>();
152
+ StreamSupport .stream (graph .spliterator (), false )
153
+ .forEach (jsonNode -> idToNodes .put (unpackId (jsonNode ), jsonNode ));
154
+
155
+ for (JsonNode entityNode : graph ) {
156
+ String currentId = unpackId (entityNode );
157
+ StreamSupport .stream (entityNode .path ("hasPart" ).spliterator (), false )
158
+ .map (this ::unpackId )
159
+ .map (s -> idToNodes .getOrDefault (s , null ))
160
+ .filter (Objects ::nonNull )
161
+ .forEach (child -> connections .computeIfAbsent (currentId , key -> new HashSet <>())
162
+ .add (unpackId (child )));
163
+ StreamSupport .stream (entityNode .path ("isPartOf" ).spliterator (), false )
164
+ .map (this ::unpackId )
165
+ .map (s -> idToNodes .getOrDefault (s , null ))
166
+ .filter (Objects ::nonNull )
167
+ .forEach (parent -> connections .computeIfAbsent (unpackId (parent ), key -> new HashSet <>())
168
+ .add (currentId ));
131
169
}
132
- return null ;
170
+ return connections ;
171
+ }
172
+
173
+ protected Set <String > getDataEntityIds (RootDataEntity root , JsonNode graph ) {
174
+ if (root == null ) { return Set .of (); }
175
+ Map <String , Set <String >> network = makeEntityGraph (graph );
176
+ Set <String > directDataEntities = new HashSet <>(root .hasPart );
177
+ return Stream .concat (
178
+ directDataEntities .stream (),
179
+ directDataEntities .stream ().flatMap (entity -> getDataEntityIdsRecursive (entity , network ))
180
+ ).collect (Collectors .toSet ());
181
+ }
182
+
183
+ protected Stream <String > getDataEntityIdsRecursive (
184
+ String parent ,
185
+ Map <String , Set <String >> network
186
+ ) {
187
+ return Stream .concat (
188
+ Stream .of (parent ),
189
+ network .getOrDefault (parent , new HashSet <>()).stream ()
190
+ .flatMap (s -> getDataEntityIdsRecursive (s , network ))
191
+ .filter (Objects ::nonNull )
192
+ );
193
+ }
194
+
195
+ protected String unpackId (JsonNode node ) {
196
+ if (node .isTextual ()) {
197
+ return node .asText ();
198
+ } else /*if (node.isObject())*/ {
199
+ return node .path (PROP_ID ).asText ();
200
+ }
201
+ }
202
+
203
+ protected Optional <File > checkFolderHasFile (String filepathOrId , File folder ) {
204
+ if (IdentifierUtils .isUrl (filepathOrId )) { return Optional .empty (); }
205
+ return IdentifierUtils .decode (filepathOrId )
206
+ .map (decoded -> folder .toPath ().resolve (decoded ).toFile ())
207
+ .filter (File ::exists );
133
208
}
134
209
135
210
/**
136
211
* Moves the descriptor and the root entity from the graph to the crate.
137
- *
212
+ * <p>
138
213
* Extracts the root data entity and the Metadata File Descriptor from the graph
139
214
* and inserts them into the crate object. It also deletes it from the graph.
140
215
* We will need the root dataset to distinguish between data entities and
@@ -170,7 +245,7 @@ protected void moveRootEntitiesFromGraphToCrate(RoCrate crate, ArrayNode graph)
170
245
171
246
/**
172
247
* Find the metadata descriptor.
173
- *
248
+ * <p>
174
249
* Currently prefers algorithm of version 1.1 over the one of 1.2-DRAFT.
175
250
*
176
251
* @param graph the graph to search the descriptor in.
@@ -200,10 +275,12 @@ protected Optional<JsonNode> getMetadataDescriptor(ArrayNode graph) {
200
275
/**
201
276
* Extracts the root entity from the graph, using the information from the
202
277
* descriptor.
203
- *
278
+ * <p>
204
279
* Basically implements step 5 of the algorithm described here:
205
- * https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#finding-the-root-data-entity
206
- *
280
+ * <a href="https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#finding-the-root-data-entity">
281
+ * https://www.researchobject.org/ro-crate/1.1/root-data-entity.html#finding-the-root-data-entity
282
+ * </a>
283
+ *
207
284
* @param graph the graph from the metadata JSON-LD file
208
285
* @param descriptor the RO-Crate descriptor
209
286
* @return the root entity, if found
0 commit comments