35
35
* Class to handle loading data from MARC records
36
36
*/
37
37
public class MarcUtil {
38
- private static HashMap <String , Set <String >> marcRecordFieldListCache = new HashMap <>();
38
+ private static final HashMap <String , Set <String >> marcRecordFieldListCache = new HashMap <>();
39
39
private static long lastRecordHashCode ;
40
40
/**
41
41
* Get Set of Strings as indicated by tagStr. For each field spec in the
42
- * tagStr that is NOT about bytes (i.e. not a 008[7-12] type fieldspec ), the
42
+ * tagStr that is NOT about bytes (i.e. not a 008[7-12] type field spec ), the
43
43
* result string is the concatenation of all the specific subfields.
44
44
*
45
45
* @param record
@@ -87,24 +87,24 @@ public static Set<String> getFieldList(org.marc4j.marc.Record record, String tag
87
87
// Process Subfields
88
88
String subfield = tag1 .substring (3 );
89
89
boolean havePattern = false ;
90
- int subend = 0 ;
90
+ int subfieldEnd = 0 ;
91
91
// brackets indicate parsing for individual characters or as pattern
92
92
int bracket = tag1 .indexOf ('[' );
93
93
if (bracket != -1 ) {
94
- String [] sub = tag1 .substring (bracket + 1 ).split ("[\\ ]\\ [\\ -, ]+" );
94
+ String [] sub = tag1 .substring (bracket + 1 ).split ("[]\\ [\\ -, ]+" );
95
95
try {
96
96
// if bracket expression is digits, expression is treated as character
97
97
// positions
98
- int substart = Integer .parseInt (sub [0 ]);
99
- subend = (sub .length > 1 ) ? Integer .parseInt (sub [1 ]) + 1 : substart + 1 ;
100
- String subfieldWObracket = subfield .substring (0 , bracket - 3 );
101
- result .addAll (getSubfieldDataAsSet (record , tag , subfieldWObracket , substart , subend ));
98
+ int subfieldStart = Integer .parseInt (sub [0 ]);
99
+ subfieldEnd = (sub .length > 1 ) ? Integer .parseInt (sub [1 ]) + 1 : subfieldStart + 1 ;
100
+ String subfieldWithoutBracket = subfield .substring (0 , bracket - 3 );
101
+ result .addAll (getSubfieldDataAsSet (record , tag , subfieldWithoutBracket , subfieldStart , subfieldEnd ));
102
102
} catch (NumberFormatException e ) {
103
103
// assume brackets expression is a pattern such as [a-z]
104
104
havePattern = true ;
105
105
}
106
106
}
107
- if (subend == 0 ) // don't want specific characters.
107
+ if (subfieldEnd == 0 ) // don't want specific characters.
108
108
{
109
109
String separator = null ;
110
110
if (subfield .indexOf ('\'' ) != -1 ) {
@@ -162,17 +162,16 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
162
162
}
163
163
164
164
// Loop through Data and Control Fields
165
- List <VariableField > varFlds = record .getVariableFields (fldTag );
166
- for (VariableField vf : varFlds ) {
167
- if (!isControlField (fldTag ) && subfield != null ) {
168
- // Data Field
169
- DataField dfield = (DataField ) vf ;
170
- resultSet .addAll (dfield .getSubfieldDataAsSet (subfield , beginIx , endIx ));
171
- } else // Control Field
172
- {
173
- String cfldData = ((ControlField ) vf ).getData ();
174
- if (cfldData .length () >= endIx )
175
- resultSet .add (cfldData .substring (beginIx , endIx ));
165
+ List <VariableField > variableFields = record .getVariableFields (fldTag );
166
+ for (VariableField vf : variableFields ) {
167
+ if (isControlField (fldTag ) && subfield != null ) {
168
+ String controlFieldData = ((ControlField ) vf ).getData ();
169
+ if (controlFieldData .length () >= endIx ) {
170
+ resultSet .add (controlFieldData .substring (beginIx , endIx ));
171
+ }
172
+ } else {
173
+ DataField dataField = (DataField ) vf ;
174
+ resultSet .addAll (dataField .getSubfieldDataAsSet (subfield , beginIx , endIx ));
176
175
}
177
176
}
178
177
return resultSet ;
@@ -187,8 +186,7 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
187
186
* @param subfieldsStr
188
187
* - the string containing the desired subfields
189
188
* @param separator
190
- * - the separator string to insert between subfield items (if null,
191
- * a " " will be used)
189
+ * - the separator string to insert between subfield items (if null, a " " will be used)
192
190
* @return a Set of String, where each string is the concatenated contents of
193
191
* all the desired subfield values from a single instance of the
194
192
* fldTag
@@ -210,15 +208,15 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
210
208
211
209
// Loop through Data and Control Fields
212
210
// int iTag = new Integer(fldTag).intValue();
213
- List <VariableField > varFlds = record .getVariableFields (fldTag );
214
- if (varFlds == null ){
211
+ List <VariableField > variableFields = record .getVariableFields (fldTag );
212
+ if (variableFields == null ){
215
213
return resultSet ;
216
214
}
217
- for (VariableField vf : varFlds ) {
215
+ for (VariableField vf : variableFields ) {
218
216
if (!isControlField (fldTag ) && subfieldsStr != null ) {
219
217
// DataField
220
- DataField dfield = (DataField ) vf ;
221
- resultSet .addAll (dfield .getSubfieldDataAsSet (subfieldsStr , separator ));
218
+ DataField dataField = (DataField ) vf ;
219
+ resultSet .addAll (dataField .getSubfieldDataAsSet (subfieldsStr , separator ));
222
220
} else {
223
221
// Control Field
224
222
resultSet .add (((ControlField ) vf ).getData ().trim ());
@@ -227,16 +225,11 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
227
225
return resultSet ;
228
226
}
229
227
230
- private static Pattern controlFieldPattern = Pattern .compile ("00[0-9]" );
231
- private static boolean isControlField (String fieldTag ) {
232
- return controlFieldPattern .matcher (fieldTag ).matches ();
233
- }
234
-
235
228
private static boolean isControlField (int fieldTag ) {
236
229
return fieldTag <= 9 ;
237
230
}
238
231
239
- private static HashMap <String , Pattern > subfieldPatterns = new HashMap <>();
232
+ private static final HashMap <String , Pattern > subfieldPatterns = new HashMap <>();
240
233
/**
241
234
* Given a tag for a field, and a list (or regex) of one or more subfields get
242
235
* any linked 880 fields and include the appropriate subfields as a String
@@ -252,8 +245,7 @@ private static boolean isControlField(int fieldTag) {
252
245
* be interpreted as particular bytes, NOT a pattern 100abcd denotes
253
246
* subfields a, b, c, d are desired from the linked 880.
254
247
* @param separator
255
- * - the separator string to insert between subfield items (if null,
256
- * a " " will be used)
248
+ * - the separator string to insert between subfield items (if null, a " " will be used)
257
249
*
258
250
* @return set of Strings containing the values of the designated 880
259
251
* field(s)/subfield(s)
@@ -270,10 +262,10 @@ private static Set<String> getLinkedFieldValue(Record record, String tag, String
270
262
}
271
263
}
272
264
List <DataField > fields = record .getDataFields (880 );
273
- for (DataField dfield : fields ) {
274
- Subfield link = dfield .getSubfield ('6' );
265
+ for (DataField dataField : fields ) {
266
+ Subfield link = dataField .getSubfield ('6' );
275
267
if (link != null && link .getData ().startsWith (tag )) {
276
- List <Subfield > subList = dfield .getSubfields ();
268
+ List <Subfield > subList = dataField .getSubfields ();
277
269
StringBuilder buf = new StringBuilder ();
278
270
for (Subfield subF : subList ) {
279
271
boolean addIt = false ;
@@ -332,13 +324,13 @@ public static Set<String> getAllSubfields(Record record, String fieldSpec, Strin
332
324
String fldTag = fldTag1 .substring (0 , 3 );
333
325
int fldTagAsInt = Integer .parseInt (fldTag );
334
326
335
- String subfldTags = fldTag1 .substring (3 );
327
+ String subfieldTags = fldTag1 .substring (3 );
336
328
337
329
List <DataField > marcFieldList = record .getDataFields (fldTagAsInt );
338
330
if (!marcFieldList .isEmpty ()) {
339
331
for (DataField marcField : marcFieldList ) {
340
332
341
- StringBuilder buffer = getSpecifiedSubfieldsAsString (marcField , subfldTags , separator );
333
+ StringBuilder buffer = getSpecifiedSubfieldsAsString (marcField , subfieldTags , separator );
342
334
if (buffer .length () > 0 ) {
343
335
result .add (AspenStringUtils .cleanDataForSolr (buffer .toString ()));
344
336
}
@@ -353,7 +345,7 @@ public static StringBuilder getSpecifiedSubfieldsAsString(DataField marcField, S
353
345
StringBuilder buffer = new StringBuilder ();
354
346
List <Subfield > subFields = marcField .getSubfields ();
355
347
for (Subfield subfield : subFields ) {
356
- if (validSubfields .length () == 0 || validSubfields .contains ("" + subfield .getCode ())){
348
+ if (validSubfields .isEmpty () || validSubfields .contains ("" + subfield .getCode ())){
357
349
if (buffer .length () > 0 ) {
358
350
buffer .append (separator != null ? separator : " " );
359
351
}
@@ -382,28 +374,24 @@ public static List<DataField> getDataFields(Record marcRecord, int[] tags) {
382
374
public static ControlField getControlField (Record marcRecord , String tag ){
383
375
List <ControlField > variableFields = marcRecord .getControlFields (tag );
384
376
ControlField variableFieldReturn = null ;
385
- for (Object variableField : variableFields ){
386
- if (variableField instanceof ControlField ){
387
- variableFieldReturn = (ControlField )variableField ;
388
- }
377
+ for (ControlField variableField : variableFields ){
378
+ variableFieldReturn = variableField ;
389
379
}
390
380
return variableFieldReturn ;
391
381
}
392
382
393
383
public static ControlField getControlField (Record marcRecord , int tag ){
394
384
List <ControlField > variableFields = marcRecord .getControlFields (tag );
395
385
ControlField variableFieldReturn = null ;
396
- for (Object variableField : variableFields ){
397
- if (variableField instanceof ControlField ){
398
- variableFieldReturn = (ControlField )variableField ;
399
- }
386
+ for (ControlField variableField : variableFields ){
387
+ variableFieldReturn = variableField ;
400
388
}
401
389
return variableFieldReturn ;
402
390
}
403
391
404
392
/**
405
- * Loops through all datafields and creates a field for "keywords"
406
- * searching. Shameless stolen from Vufind Indexer Custom Code
393
+ * Loops through all data fields and creates a field for "keywords"
394
+ * searching. Shameless stolen from VuFind Indexer Custom Code
407
395
*
408
396
* @param lowerBound
409
397
* - the "lowest" marc field to include (e.g. 100)
@@ -414,12 +402,12 @@ public static ControlField getControlField(Record marcRecord, int tag){
414
402
* range indicated by the bound string arguments.
415
403
*/
416
404
public static String getAllSearchableFields (Record record , int lowerBound , int upperBound ) {
417
- StringBuilder buffer = new StringBuilder ("" );
405
+ StringBuilder buffer = new StringBuilder ();
418
406
List <DataField > fields = record .getDataFields ();
419
407
for (DataField field : fields ) {
420
408
// Get all fields starting with the 100 and ending with the 839
421
409
// This will ignore any "code" fields and only use textual fields
422
- int tag = localParseInt (field .getTag (), - 1 );
410
+ int tag = localParseInt (field .getTag ());
423
411
if ((tag >= lowerBound ) && (tag < upperBound )) {
424
412
// Loop through subfields
425
413
List <Subfield > subfields = field .getSubfields ();
@@ -439,7 +427,7 @@ public static String getCustomSearchableFields(Record record, String customMarcF
439
427
440
428
public static String getFirstFieldVal (Record record , String fieldSpec ) {
441
429
Set <String > result = MarcUtil .getFieldList (record , fieldSpec );
442
- if (result .size () == 0 ){
430
+ if (result .isEmpty () ){
443
431
return null ;
444
432
}else {
445
433
return result .iterator ().next ();
@@ -450,11 +438,9 @@ public static String getFirstFieldVal(Record record, String fieldSpec) {
450
438
* return an int for the passed string
451
439
*
452
440
* @param str The String value of the integer to prompt
453
- * @param defValue
454
- * - default value, if string doesn't parse into int
455
441
*/
456
- private static int localParseInt (String str , int defValue ) {
457
- int value = defValue ;
442
+ private static int localParseInt (String str ) {
443
+ int value = - 1 ;
458
444
try {
459
445
value = Integer .parseInt (str );
460
446
} catch (NumberFormatException nfe ) {
@@ -464,7 +450,7 @@ private static int localParseInt(String str, int defValue) {
464
450
return (value );
465
451
}
466
452
467
- private static Pattern specialCharPattern = Pattern .compile ("\\ p{C}" );
453
+ private static final Pattern specialCharPattern = Pattern .compile ("\\ p{C}" );
468
454
public static long getChecksum (Record marcRecord ) {
469
455
CRC32 crc32 = new CRC32 ();
470
456
String marcRecordContents = marcRecord .toString ();
@@ -489,9 +475,9 @@ public static void outputMarcRecord(Record marcRecord, File individualFile, Logg
489
475
writer2 .close ();
490
476
}
491
477
492
- private static SimpleDateFormat oo8DateFormat = new SimpleDateFormat ("yyMMdd" );
493
- private static SimpleDateFormat oo5DateFormat = new SimpleDateFormat ("yyyyMMdd" );
494
- public synchronized static Long getDateAddedForRecord (Record marcRecord , String recordNumber , String source , File individualFile , Logger logger ) {
478
+ private static final SimpleDateFormat oo8DateFormat = new SimpleDateFormat ("yyMMdd" );
479
+ private static final SimpleDateFormat oo5DateFormat = new SimpleDateFormat ("yyyyMMdd" );
480
+ public synchronized static Long getDateAddedForRecord (Record marcRecord , File individualFile , Logger logger ) {
495
481
//Set first detection date based on the creation date of the file
496
482
Long timeAdded = null ;
497
483
if (individualFile .exists ()){
@@ -556,15 +542,15 @@ public static Record readMarcRecordFromFile(File marcFile, BaseIndexingLogEntry
556
542
}
557
543
marcFileStream .close ();
558
544
}catch (FileNotFoundException fne ){
559
- //These will now show up in the suppression so we don't need to add them to notes.
545
+ //These will now show up in the suppression, so we don't need to add them to notes.
560
546
//logEntry.addNote("Could not find marcFile " + marcFile.getAbsolutePath());
561
547
return null ;
562
548
}catch (Exception e ){
563
549
//This happens if the file has too many items. Ignore and read with permissive handler.
564
550
//logEntry.addNote("Could not read marc file, loading permissive " + marcFile.getAbsolutePath() + e.toString());
565
551
}
566
552
567
- //If we got here, it didn't read successfully. Try again using the Permissinve Reader
553
+ //If we got here, it didn't read successfully. Try again using the Permissive Reader
568
554
//The Permissive Reader allows reading large files.
569
555
return readMarcRecordFromFilePermissive (marcFile , logEntry );
570
556
}
@@ -597,20 +583,11 @@ public static Record readJsonFormattedRecord(String identifier, String marcConte
597
583
try {
598
584
Record marcRecord = streamReader .next ();
599
585
marcFileStream .close ();
600
- streamReader = null ;
601
586
return marcRecord ;
602
- }catch (JSONException jse ){
603
- }catch (JsonParser .Escape jse ){
587
+ }catch (JSONException | JsonParser .Escape | MarcException | NullPointerException jse ){
604
588
logEntry .incInvalidRecords (identifier );
605
589
logEntry .addNote (jse .getMessage ());
606
- }catch (MarcException me ){
607
- logEntry .incInvalidRecords (identifier );
608
- logEntry .addNote (me .getMessage ());
609
- }catch (NullPointerException npe ){
610
- logEntry .incInvalidRecords (identifier );
611
- logEntry .addNote (npe .getMessage ());
612
590
}
613
- streamReader = null ;
614
591
marcFileStream .close ();
615
592
}catch (Exception e ){
616
593
logEntry .incErrors ("Could not parse marc in json format for " + identifier , e );
0 commit comments