Skip to content

Commit 931311a

Browse files
committed
General cleanup
- Move more settings to indexing profile and settings - Removed unused imports and functions - cleanup warnings - Remove unused code
1 parent c348b51 commit 931311a

File tree

11 files changed

+69
-238
lines changed

11 files changed

+69
-238
lines changed

code/events_indexer/src/com/turning_leaf_technologies/events/CommunicoIndexer.java

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import java.text.ParseException;
3434
import java.text.SimpleDateFormat;
3535
import java.time.LocalDate;
36-
import java.time.temporal.TemporalAdjusters;
3736
import java.util.*;
3837
import java.util.Date;
3938
import java.util.zip.CRC32;

code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/BaseIndexingSettings.java

+9
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public class BaseIndexingSettings {
2727
String treatUnknownLanguageAs;
2828
String treatUndeterminedLanguageAs;
2929
String customMarcFieldsToIndexAsKeyword;
30+
boolean includePersonalAndCorporateNamesInTopics;
3031

3132
static char getCharFromRecordSet(ResultSet indexingProfilesRS, String fieldName) throws SQLException {
3233
String subfieldString = indexingProfilesRS.getString(fieldName);
@@ -124,4 +125,12 @@ public String getTreatUndeterminedLanguageAs() {
124125
public void setTreatUndeterminedLanguageAs(String treatUndeterminedLanguageAs) {
125126
this.treatUndeterminedLanguageAs = treatUndeterminedLanguageAs;
126127
}
128+
129+
public boolean isIncludePersonalAndCorporateNamesInTopics() {
130+
return includePersonalAndCorporateNamesInTopics;
131+
}
132+
133+
public void setIncludePersonalAndCorporateNamesInTopics(boolean includePersonalAndCorporateNamesInTopics) {
134+
this.includePersonalAndCorporateNamesInTopics = includePersonalAndCorporateNamesInTopics;
135+
}
127136
}

code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/IndexingProfile.java

+2
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ public IndexingProfile(ResultSet indexingProfileRS, BaseIndexingLogEntry logEntr
220220
this.hideUnknownLiteraryForm = indexingProfileRS.getBoolean("hideUnknownLiteraryForm");
221221
this.hideNotCodedLiteraryForm = indexingProfileRS.getBoolean("hideNotCodedLiteraryForm");
222222

223+
this.includePersonalAndCorporateNamesInTopics = indexingProfileRS.getBoolean("includePersonalAndCorporateNamesInTopics");
224+
223225
this.setNoteSubfield(getCharFromRecordSet(indexingProfileRS, "noteSubfield"));
224226

225227
this.setLastUpdateOfChangedRecords(indexingProfileRS.getLong("lastUpdateOfChangedRecords"));

code/java_shared_libraries/src/com/turning_leaf_technologies/indexing/SideLoadSettings.java

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public SideLoadSettings(ResultSet settings) throws SQLException {
2828
this.specifiedFormatCategory = settings.getString("specifiedFormatCategory");
2929
this.specifiedFormatBoost = settings.getInt("specifiedFormatBoost");
3030
this.treatUnknownLanguageAs = settings.getString("treatUnknownLanguageAs");
31+
this.includePersonalAndCorporateNamesInTopics = settings.getBoolean("includePersonalAndCorporateNamesInTopics");
3132

3233
String deletedIdString = settings.getString("deletedRecordsIds");
3334
if (deletedIdString != null && deletedIdString.trim().length() > 0) {

code/java_shared_libraries/src/com/turning_leaf_technologies/marc/MarcUtil.java

+51-74
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
* Class to handle loading data from MARC records
3636
*/
3737
public class MarcUtil {
38-
private static HashMap<String, Set<String>> marcRecordFieldListCache = new HashMap<>();
38+
private static final HashMap<String, Set<String>> marcRecordFieldListCache = new HashMap<>();
3939
private static long lastRecordHashCode;
4040
/**
4141
* Get Set of Strings as indicated by tagStr. For each field spec in the
42-
* tagStr that is NOT about bytes (i.e. not a 008[7-12] type fieldspec), the
42+
* tagStr that is NOT about bytes (i.e. not a 008[7-12] type field spec), the
4343
* result string is the concatenation of all the specific subfields.
4444
*
4545
* @param record
@@ -87,24 +87,24 @@ public static Set<String> getFieldList(org.marc4j.marc.Record record, String tag
8787
// Process Subfields
8888
String subfield = tag1.substring(3);
8989
boolean havePattern = false;
90-
int subend = 0;
90+
int subfieldEnd = 0;
9191
// brackets indicate parsing for individual characters or as pattern
9292
int bracket = tag1.indexOf('[');
9393
if (bracket != -1) {
94-
String[] sub = tag1.substring(bracket + 1).split("[\\]\\[\\-, ]+");
94+
String[] sub = tag1.substring(bracket + 1).split("[]\\[\\-, ]+");
9595
try {
9696
// if bracket expression is digits, expression is treated as character
9797
// positions
98-
int substart = Integer.parseInt(sub[0]);
99-
subend = (sub.length > 1) ? Integer.parseInt(sub[1]) + 1 : substart + 1;
100-
String subfieldWObracket = subfield.substring(0, bracket - 3);
101-
result.addAll(getSubfieldDataAsSet(record, tag, subfieldWObracket, substart, subend));
98+
int subfieldStart = Integer.parseInt(sub[0]);
99+
subfieldEnd = (sub.length > 1) ? Integer.parseInt(sub[1]) + 1 : subfieldStart + 1;
100+
String subfieldWithoutBracket = subfield.substring(0, bracket - 3);
101+
result.addAll(getSubfieldDataAsSet(record, tag, subfieldWithoutBracket, subfieldStart, subfieldEnd));
102102
} catch (NumberFormatException e) {
103103
// assume brackets expression is a pattern such as [a-z]
104104
havePattern = true;
105105
}
106106
}
107-
if (subend == 0) // don't want specific characters.
107+
if (subfieldEnd == 0) // don't want specific characters.
108108
{
109109
String separator = null;
110110
if (subfield.indexOf('\'') != -1) {
@@ -162,17 +162,16 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
162162
}
163163

164164
// Loop through Data and Control Fields
165-
List<VariableField> varFlds = record.getVariableFields(fldTag);
166-
for (VariableField vf : varFlds) {
167-
if (!isControlField(fldTag) && subfield != null) {
168-
// Data Field
169-
DataField dfield = (DataField) vf;
170-
resultSet.addAll(dfield.getSubfieldDataAsSet(subfield, beginIx, endIx));
171-
} else // Control Field
172-
{
173-
String cfldData = ((ControlField) vf).getData();
174-
if (cfldData.length() >= endIx)
175-
resultSet.add(cfldData.substring(beginIx, endIx));
165+
List<VariableField> variableFields = record.getVariableFields(fldTag);
166+
for (VariableField vf : variableFields) {
167+
if (isControlField(fldTag) && subfield != null) {
168+
String controlFieldData = ((ControlField) vf).getData();
169+
if (controlFieldData.length() >= endIx) {
170+
resultSet.add(controlFieldData.substring(beginIx, endIx));
171+
}
172+
} else {
173+
DataField dataField = (DataField) vf;
174+
resultSet.addAll(dataField.getSubfieldDataAsSet(subfield, beginIx, endIx));
176175
}
177176
}
178177
return resultSet;
@@ -187,8 +186,7 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
187186
* @param subfieldsStr
188187
* - the string containing the desired subfields
189188
* @param separator
190-
* - the separator string to insert between subfield items (if null,
191-
* a " " will be used)
189+
* - the separator string to insert between subfield items (if null, a " " will be used)
192190
* @return a Set of String, where each string is the concatenated contents of
193191
* all the desired subfield values from a single instance of the
194192
* fldTag
@@ -210,15 +208,15 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
210208

211209
// Loop through Data and Control Fields
212210
// int iTag = new Integer(fldTag).intValue();
213-
List<VariableField> varFlds = record.getVariableFields(fldTag);
214-
if (varFlds == null){
211+
List<VariableField> variableFields = record.getVariableFields(fldTag);
212+
if (variableFields == null){
215213
return resultSet;
216214
}
217-
for (VariableField vf : varFlds) {
215+
for (VariableField vf : variableFields) {
218216
if (!isControlField(fldTag) && subfieldsStr != null) {
219217
// DataField
220-
DataField dfield = (DataField) vf;
221-
resultSet.addAll(dfield.getSubfieldDataAsSet(subfieldsStr, separator));
218+
DataField dataField = (DataField) vf;
219+
resultSet.addAll(dataField.getSubfieldDataAsSet(subfieldsStr, separator));
222220
} else {
223221
// Control Field
224222
resultSet.add(((ControlField) vf).getData().trim());
@@ -227,16 +225,11 @@ private static Set<String> getSubfieldDataAsSet(org.marc4j.marc.Record record, i
227225
return resultSet;
228226
}
229227

230-
private static Pattern controlFieldPattern = Pattern.compile("00[0-9]");
231-
private static boolean isControlField(String fieldTag) {
232-
return controlFieldPattern.matcher(fieldTag).matches();
233-
}
234-
235228
private static boolean isControlField(int fieldTag) {
236229
return fieldTag <= 9;
237230
}
238231

239-
private static HashMap<String, Pattern> subfieldPatterns = new HashMap<>();
232+
private static final HashMap<String, Pattern> subfieldPatterns = new HashMap<>();
240233
/**
241234
* Given a tag for a field, and a list (or regex) of one or more subfields get
242235
* any linked 880 fields and include the appropriate subfields as a String
@@ -252,8 +245,7 @@ private static boolean isControlField(int fieldTag) {
252245
* be interpreted as particular bytes, NOT a pattern 100abcd denotes
253246
* subfields a, b, c, d are desired from the linked 880.
254247
* @param separator
255-
* - the separator string to insert between subfield items (if null,
256-
* a " " will be used)
248+
* - the separator string to insert between subfield items (if null, a " " will be used)
257249
*
258250
* @return set of Strings containing the values of the designated 880
259251
* field(s)/subfield(s)
@@ -270,10 +262,10 @@ private static Set<String> getLinkedFieldValue(Record record, String tag, String
270262
}
271263
}
272264
List<DataField> fields = record.getDataFields(880);
273-
for (DataField dfield : fields) {
274-
Subfield link = dfield.getSubfield('6');
265+
for (DataField dataField : fields) {
266+
Subfield link = dataField.getSubfield('6');
275267
if (link != null && link.getData().startsWith(tag)) {
276-
List<Subfield> subList = dfield.getSubfields();
268+
List<Subfield> subList = dataField.getSubfields();
277269
StringBuilder buf = new StringBuilder();
278270
for (Subfield subF : subList) {
279271
boolean addIt = false;
@@ -332,13 +324,13 @@ public static Set<String> getAllSubfields(Record record, String fieldSpec, Strin
332324
String fldTag = fldTag1.substring(0, 3);
333325
int fldTagAsInt = Integer.parseInt(fldTag);
334326

335-
String subfldTags = fldTag1.substring(3);
327+
String subfieldTags = fldTag1.substring(3);
336328

337329
List<DataField> marcFieldList = record.getDataFields(fldTagAsInt);
338330
if (!marcFieldList.isEmpty()) {
339331
for (DataField marcField : marcFieldList) {
340332

341-
StringBuilder buffer = getSpecifiedSubfieldsAsString(marcField, subfldTags, separator);
333+
StringBuilder buffer = getSpecifiedSubfieldsAsString(marcField, subfieldTags, separator);
342334
if (buffer.length() > 0) {
343335
result.add(AspenStringUtils.cleanDataForSolr(buffer.toString()));
344336
}
@@ -353,7 +345,7 @@ public static StringBuilder getSpecifiedSubfieldsAsString(DataField marcField, S
353345
StringBuilder buffer = new StringBuilder();
354346
List<Subfield> subFields = marcField.getSubfields();
355347
for (Subfield subfield : subFields) {
356-
if (validSubfields.length() == 0 || validSubfields.contains("" + subfield.getCode())){
348+
if (validSubfields.isEmpty() || validSubfields.contains("" + subfield.getCode())){
357349
if (buffer.length() > 0) {
358350
buffer.append(separator != null ? separator : " ");
359351
}
@@ -382,28 +374,24 @@ public static List<DataField> getDataFields(Record marcRecord, int[] tags) {
382374
public static ControlField getControlField(Record marcRecord, String tag){
383375
List<ControlField> variableFields = marcRecord.getControlFields(tag);
384376
ControlField variableFieldReturn = null;
385-
for (Object variableField : variableFields){
386-
if (variableField instanceof ControlField){
387-
variableFieldReturn = (ControlField)variableField;
388-
}
377+
for (ControlField variableField : variableFields){
378+
variableFieldReturn = variableField;
389379
}
390380
return variableFieldReturn;
391381
}
392382

393383
public static ControlField getControlField(Record marcRecord, int tag){
394384
List<ControlField> variableFields = marcRecord.getControlFields(tag);
395385
ControlField variableFieldReturn = null;
396-
for (Object variableField : variableFields){
397-
if (variableField instanceof ControlField){
398-
variableFieldReturn = (ControlField)variableField;
399-
}
386+
for (ControlField variableField : variableFields){
387+
variableFieldReturn = variableField;
400388
}
401389
return variableFieldReturn;
402390
}
403391

404392
/**
405-
* Loops through all datafields and creates a field for "keywords"
406-
* searching. Shameless stolen from Vufind Indexer Custom Code
393+
* Loops through all data fields and creates a field for "keywords"
394+
* searching. Shameless stolen from VuFind Indexer Custom Code
407395
*
408396
* @param lowerBound
409397
* - the "lowest" marc field to include (e.g. 100)
@@ -414,12 +402,12 @@ public static ControlField getControlField(Record marcRecord, int tag){
414402
* range indicated by the bound string arguments.
415403
*/
416404
public static String getAllSearchableFields(Record record, int lowerBound, int upperBound) {
417-
StringBuilder buffer = new StringBuilder("");
405+
StringBuilder buffer = new StringBuilder();
418406
List<DataField> fields = record.getDataFields();
419407
for (DataField field : fields) {
420408
// Get all fields starting with the 100 and ending with the 839
421409
// This will ignore any "code" fields and only use textual fields
422-
int tag = localParseInt(field.getTag(), -1);
410+
int tag = localParseInt(field.getTag());
423411
if ((tag >= lowerBound) && (tag < upperBound)) {
424412
// Loop through subfields
425413
List<Subfield> subfields = field.getSubfields();
@@ -439,7 +427,7 @@ public static String getCustomSearchableFields(Record record, String customMarcF
439427

440428
public static String getFirstFieldVal(Record record, String fieldSpec) {
441429
Set<String> result = MarcUtil.getFieldList(record, fieldSpec);
442-
if (result.size() == 0){
430+
if (result.isEmpty()){
443431
return null;
444432
}else{
445433
return result.iterator().next();
@@ -450,11 +438,9 @@ public static String getFirstFieldVal(Record record, String fieldSpec) {
450438
* return an int for the passed string
451439
*
452440
* @param str The String value of the integer to prompt
453-
* @param defValue
454-
* - default value, if string doesn't parse into int
455441
*/
456-
private static int localParseInt(String str, int defValue) {
457-
int value = defValue;
442+
private static int localParseInt(String str) {
443+
int value = -1;
458444
try {
459445
value = Integer.parseInt(str);
460446
} catch (NumberFormatException nfe) {
@@ -464,7 +450,7 @@ private static int localParseInt(String str, int defValue) {
464450
return (value);
465451
}
466452

467-
private static Pattern specialCharPattern = Pattern.compile("\\p{C}");
453+
private static final Pattern specialCharPattern = Pattern.compile("\\p{C}");
468454
public static long getChecksum(Record marcRecord) {
469455
CRC32 crc32 = new CRC32();
470456
String marcRecordContents = marcRecord.toString();
@@ -489,9 +475,9 @@ public static void outputMarcRecord(Record marcRecord, File individualFile, Logg
489475
writer2.close();
490476
}
491477

492-
private static SimpleDateFormat oo8DateFormat = new SimpleDateFormat("yyMMdd");
493-
private static SimpleDateFormat oo5DateFormat = new SimpleDateFormat("yyyyMMdd");
494-
public synchronized static Long getDateAddedForRecord(Record marcRecord, String recordNumber, String source, File individualFile, Logger logger) {
478+
private static final SimpleDateFormat oo8DateFormat = new SimpleDateFormat("yyMMdd");
479+
private static final SimpleDateFormat oo5DateFormat = new SimpleDateFormat("yyyyMMdd");
480+
public synchronized static Long getDateAddedForRecord(Record marcRecord, File individualFile, Logger logger) {
495481
//Set first detection date based on the creation date of the file
496482
Long timeAdded = null;
497483
if (individualFile.exists()){
@@ -556,15 +542,15 @@ public static Record readMarcRecordFromFile(File marcFile, BaseIndexingLogEntry
556542
}
557543
marcFileStream.close();
558544
}catch (FileNotFoundException fne){
559-
//These will now show up in the suppression so we don't need to add them to notes.
545+
//These will now show up in the suppression, so we don't need to add them to notes.
560546
//logEntry.addNote("Could not find marcFile " + marcFile.getAbsolutePath());
561547
return null;
562548
}catch (Exception e){
563549
//This happens if the file has too many items. Ignore and read with permissive handler.
564550
//logEntry.addNote("Could not read marc file, loading permissive " + marcFile.getAbsolutePath() + e.toString());
565551
}
566552

567-
//If we got here, it didn't read successfully. Try again using the Permissinve Reader
553+
//If we got here, it didn't read successfully. Try again using the Permissive Reader
568554
//The Permissive Reader allows reading large files.
569555
return readMarcRecordFromFilePermissive(marcFile, logEntry);
570556
}
@@ -597,20 +583,11 @@ public static Record readJsonFormattedRecord(String identifier, String marcConte
597583
try{
598584
Record marcRecord = streamReader.next();
599585
marcFileStream.close();
600-
streamReader = null;
601586
return marcRecord;
602-
}catch (JSONException jse){
603-
}catch (JsonParser.Escape jse){
587+
}catch (JSONException | JsonParser.Escape | MarcException | NullPointerException jse){
604588
logEntry.incInvalidRecords(identifier);
605589
logEntry.addNote(jse.getMessage());
606-
}catch (MarcException me){
607-
logEntry.incInvalidRecords(identifier);
608-
logEntry.addNote(me.getMessage());
609-
}catch (NullPointerException npe){
610-
logEntry.incInvalidRecords(identifier);
611-
logEntry.addNote(npe.getMessage());
612590
}
613-
streamReader = null;
614591
marcFileStream.close();
615592
}catch (Exception e){
616593
logEntry.incErrors("Could not parse marc in json format for " + identifier, e);

code/reindexer/src/com/turning_leaf_technologies/reindexer/IlsRecordProcessor.java

-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ abstract class IlsRecordProcessor extends MarcRecordProcessor {
9595
this.suppressRecordsWithUrlsMatching = Pattern.compile(suppressRecordsWithUrlsMatching, Pattern.CASE_INSENSITIVE);
9696
}
9797

98-
includePersonalAndCorporateNamesInTopics = indexingProfileRS.getBoolean("includePersonalAndCorporateNamesInTopics");
99-
10098
loadHoldsStmt = dbConn.prepareStatement("SELECT ilsId, numHolds from ils_hold_summary where ilsId = ?", ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
10199
addTranslationMapValueStmt = dbConn.prepareStatement("INSERT INTO translation_map_values (translationMapId, value, translation) VALUES (?, ?, ?)");
102100
updateRecordSuppressionReasonStmt = dbConn.prepareStatement("UPDATE ils_records set suppressed=?, suppressionNotes=? where source=? and ilsId=?");

0 commit comments

Comments
 (0)