@@ -13,7 +13,6 @@ import java.util.zip.GZIPInputStream
13
13
* @author Adam
14
14
*/
15
15
@Slf4j
16
- @CompileStatic
17
16
class Records {
18
17
19
18
// private static final Logger logger = log.getLogger(Records.class);
@@ -36,8 +35,8 @@ class Records {
36
35
init(biocache_service_url, q, bbox, filename, region, " names_and_lsid" )
37
36
}
38
37
39
- Records (String biocache_service_url , String q , double [] bbox , String filename , SimpleRegion region , String facetField ) throws IOException {
40
- init(biocache_service_url, q, bbox, filename, region, facetField)
38
+ Records (String biocache_service_url , String q , double [] bbox , String filename , SimpleRegion region , String facetField , Boolean includeYear = true ) throws IOException {
39
+ init(biocache_service_url, q, bbox, filename, region, facetField, includeYear )
41
40
}
42
41
43
42
Records (String filename ) throws IOException {
@@ -307,10 +306,12 @@ class Records {
307
306
}
308
307
}
309
308
310
- void init (String biocache_service_url , String q , double [] bbox , String filename , SimpleRegion region , String facetField ) throws IOException {
309
+ void init (String biocache_service_url , String q , double [] bbox , String filename , SimpleRegion region , String facetField , Boolean includeYear = true ) throws IOException {
311
310
int speciesEstimate = 250000
312
311
int recordsEstimate = 26000000
313
- int pageSize = 50000
312
+ int pageSize = 300000000 // Use a large number as a workaround for paging not working. Paging was added
313
+ // for an introduced nginx timeout value so this could be considered a revert.
314
+ // The biocache-service endpoint is streaming now, except for old sandbox instances.
314
315
315
316
String bboxTerm = null
316
317
if (bbox != null ) {
@@ -336,107 +337,104 @@ class Records {
336
337
if (facetField == null ) {
337
338
facetFieldTerm = ' '
338
339
}
339
- while (start < 300000000 ) {
340
- String url = biocache_service_url + " /webportal/occurrences.gz?q=" + q. replace(" " , " %20" ) + bboxTerm + " &pageSize=" + pageSize + " &fq=year%3A*&start=" + start + " &fl=longitude,latitude" + facetFieldTerm + " ,year"
341
-
342
- int tryCount = 0
343
- InputStream is = null
344
- CSVReader csv = null
345
- int maxTrys = 4
346
- while (tryCount < maxTrys && csv == null ) {
347
- tryCount++
348
- try {
349
- is = getUrlStream(url)
350
- csv = new CSVReader (new InputStreamReader (new GZIPInputStream (is)))
351
- } catch (Exception e) {
352
- log. error(" failed try " + tryCount + " of " + maxTrys + " : " + url, e)
353
- }
354
- }
355
340
356
- if (csv == null ) {
357
- throw new IOException (" failed to get records from biocache." )
341
+ String yearFq = includeYear ? ' &fq=year%3A*' : ' '
342
+
343
+ // no longer using paging
344
+ String url = biocache_service_url + " /webportal/occurrences.gz?q=" + q. replace(" " , " %20" ) + bboxTerm + " &pageSize=" + pageSize + yearFq + " &start=" + start + " &fl=longitude,latitude" + facetFieldTerm + (includeYear ? " ,year" : " " )
345
+
346
+ int tryCount = 0
347
+ InputStream is = null
348
+ CSVReader csv = null
349
+ int maxTrys = 4
350
+ while (tryCount < maxTrys && csv == null ) {
351
+ tryCount++
352
+ try {
353
+ is = getUrlStream(url)
354
+ csv = new CSVReader (new InputStreamReader (new GZIPInputStream (is)))
355
+ } catch (Exception e) {
356
+ log. error(" failed try " + tryCount + " of " + maxTrys + " : " + url, e)
358
357
}
358
+ }
359
359
360
- String [] line
361
- int [] header = new int [4 ] //to contain [0 ]=lsid, [1]=longitude, [2]=latitude, [3]=year
362
- int row = start
363
- int currentCount = 0
364
- while ((line = csv. readNext()) != null ) {
365
- if (raf != null ) {
366
- for (int i = 0 ; i < line. length; i++ ) {
367
- if (i > 0 ) {
368
- raf. write(" ," . bytes)
369
- }
370
- raf. write(line[i]. bytes)
360
+ if (csv == null ) {
361
+ throw new IOException (" failed to get records from biocache." )
362
+ }
363
+
364
+ String [] line
365
+ int [] header = new int [4 ] //to contain [0 ]=lsid, [1]=longitude, [2]=latitude, [3]=year
366
+ int row = start
367
+ int currentCount = 0
368
+ while ((line = csv. readNext()) != null ) {
369
+ if (raf != null ) {
370
+ for (int i = 0 ; i < line. length; i++ ) {
371
+ if (i > 0 ) {
372
+ raf. write(" ," . bytes)
371
373
}
372
- raf. write(" \n " . bytes)
374
+ raf. write(line[i] . bytes)
373
375
}
374
- currentCount++
375
- if (currentCount == 1 ) {
376
- // determine header
377
- for (int i = 0 ; i < line. length; i++ ) {
378
- if (line[i] == facetField) {
379
- header[0 ] = i
380
- }
381
- if (line[i] == " longitude" ) {
382
- header[1 ] = i
383
- }
384
- if (line[i] == " latitude" ) {
385
- header[2 ] = i
386
- }
387
- if (line[i] == " year" ) {
388
- header[3 ] = i
389
- }
376
+ raf. write(" \n " . bytes)
377
+ }
378
+ currentCount++
379
+ if (currentCount == 1 ) {
380
+ // determine header
381
+ for (int i = 0 ; i < line. length; i++ ) {
382
+ if (line[i] == facetField) {
383
+ header[0 ] = i
390
384
}
391
- log. debug(" header info:" + header[0 ] + " ," + header[1 ] + " ," + header[2 ] + " ," + header[3 ])
392
- } else {
393
- if (line. length >= 3 ) {
394
- try {
395
- double longitude = Double . parseDouble(line[header[1 ]])
396
- double latitude = Double . parseDouble(line[header[2 ]])
397
- if (region == null || region. isWithin_EPSG900913(longitude, latitude)) {
398
- points. add(longitude)
399
- points. add(latitude)
400
- String species = facetField == null ? ' species' : line[header[0 ]]
401
- Integer idx = lsidMap. get(species)
402
- if (idx == null ) {
403
- idx = lsidMap. size()
404
- lsidMap. put(species, idx)
405
- }
406
- lsidIdx. add(idx)
407
- years. add(Short . parseShort(line[header[3 ]]))
385
+ if (line[i] == " longitude" ) {
386
+ header[1 ] = i
387
+ }
388
+ if (line[i] == " latitude" ) {
389
+ header[2 ] = i
390
+ }
391
+ if (line[i] == " year" ) {
392
+ header[3 ] = i
393
+ }
394
+ }
395
+ log. debug(" header info:" + header[0 ] + " ," + header[1 ] + " ," + header[2 ] + " ," + header[3 ])
396
+ } else {
397
+ if (line. length >= 3 ) {
398
+ try {
399
+ double longitude = Double . parseDouble(line[header[1 ]])
400
+ double latitude = Double . parseDouble(line[header[2 ]])
401
+ if (region == null || region. isWithin_EPSG900913(longitude, latitude)) {
402
+ points. add(longitude)
403
+ points. add(latitude)
404
+ String species = facetField == null ? ' species' : line[header[0 ]]
405
+ Integer idx = lsidMap. get(species)
406
+ if (idx == null ) {
407
+ idx = lsidMap. size()
408
+ lsidMap. put(species, idx)
408
409
}
409
- } catch (Exception ignored) {
410
-
411
- } finally {
412
- if (lsidIdx. size() * 2 < points. size()) {
413
- points. remove(points. size() - 1 )
414
- points. remove(points. size() - 1 )
415
- } else if (years. size() < lsidIdx. size()) {
416
- years. add((short ) 0 )
410
+ lsidIdx. add(idx)
411
+ if (includeYear) {
412
+ years. add(Short . parseShort(line[header[3 ]]))
417
413
}
418
414
}
415
+ } catch (Exception ignored) {
416
+
417
+ } finally {
418
+ if (lsidIdx. size() * 2 < points. size()) {
419
+ points. remove(points. size() - 1 )
420
+ points. remove(points. size() - 1 )
421
+ } else if (years. size() < lsidIdx. size()) {
422
+ years. add((short ) 0 )
423
+ }
419
424
}
420
425
}
421
- row++
422
- }
423
- if (start == 0 ) {
424
- start = row - 1 // offset for header
425
426
}
427
+ row++
428
+ }
426
429
427
- csv. close()
428
- is. close()
430
+ csv. close()
431
+ is. close()
429
432
430
- if (is != null ) {
431
- try {
432
- is. close()
433
- } catch (Exception e) {
434
- log. error(e. getMessage(), e)
435
- }
436
- }
437
-
438
- if (currentCount == 0 || currentCount < pageSize) {
439
- break
433
+ if (is != null ) {
434
+ try {
435
+ is. close()
436
+ } catch (Exception e) {
437
+ log. error(e. getMessage(), e)
440
438
}
441
439
}
442
440
@@ -511,15 +509,15 @@ class Records {
511
509
512
510
@Override
513
511
int compare (Integer o1 , Integer o2 ) {
514
- return (h - 1 - (( int ) (( points. get(o1) - mLat) / res)))-(h - 1 - ((int) ((points.get(o2) - mLat) / res)))
512
+ return (h - 1 - (Math . round(( points. get(o1) - mLat) / res)))-(h - 1 - (Math.round ((points.get(o2) - mLat) / res)))
515
513
}
516
514
})
517
515
518
516
// get row starts
519
517
int [] rowStarts = new int [height]
520
518
int row = 0
521
519
for (int i = 0 ; i < sortOrder. length; i++ ) {
522
- int thisRow = (h - 1 - (int ) ((points. get(sortOrder[i]) - mLat) / res))
520
+ int thisRow = (h - 1 - (int ) Math . round ((points. get(sortOrder[i]) - mLat) / res))
523
521
524
522
// handle overflow
525
523
if (thisRow >= height) {
@@ -578,10 +576,10 @@ class Records {
578
576
579
577
@Override
580
578
int compare (Integer o1 , Integer o2 ) {
581
- int v = (( int ) (( points. get(o1) - mLat) / res))-((int) ((points.get(o2) - mLat) / res))
579
+ int v = (int ) (Math . round(( points. get(o1) - mLat) / res) - Math.round ((points.get(o2) - mLat) / res))
582
580
583
581
if (v == 0 ) {
584
- return (( int ) (( points. get(o1 - 1 ) - mLong) / res))-((int) ((points.get(o2 - 1) - mLong) / res))
582
+ return (int ) (Math . round(( points. get(o1 - 1 ) - mLong) / res) - Math.round ((points.get(o2 - 1) - mLong) / res))
585
583
} else {
586
584
return v
587
585
}
0 commit comments