5
5
import java .math .BigInteger ;
6
6
import java .nio .charset .Charset ;
7
7
import java .nio .charset .StandardCharsets ;
8
- import java .util .*;
8
+ import java .util .ArrayList ;
9
+ import java .util .Arrays ;
10
+ import java .util .Stack ;
9
11
10
12
import com .fasterxml .jackson .core .*;
11
13
import com .fasterxml .jackson .core .base .ParserMinimalBase ;
12
14
import com .fasterxml .jackson .core .io .IOContext ;
13
15
import com .fasterxml .jackson .core .io .NumberInput ;
14
16
import com .fasterxml .jackson .core .json .DupDetector ;
15
17
import com .fasterxml .jackson .core .sym .ByteQuadsCanonicalizer ;
16
- import com .fasterxml .jackson .core .util .*;
18
+ import com .fasterxml .jackson .core .util .ByteArrayBuilder ;
19
+ import com .fasterxml .jackson .core .util .JacksonFeatureSet ;
20
+ import com .fasterxml .jackson .core .util .TextBuffer ;
17
21
18
22
import static com .fasterxml .jackson .dataformat .cbor .CBORConstants .*;
19
23
@@ -2289,10 +2293,9 @@ protected void _finishToken() throws IOException
2289
2293
2290
2294
if ((available >= len )
2291
2295
// if not, could we read? NOTE: we do not require it, just attempt to read
2292
- || ((_inputBuffer .length >= len )
2293
- && _tryToLoadToHaveAtLeast (len ))) {
2294
- _finishShortText (len );
2295
- return ;
2296
+ || _tryToLoadToHaveAtLeast (len )) {
2297
+ _finishShortText (len );
2298
+ return ;
2296
2299
}
2297
2300
// If not enough space, need handling similar to chunked
2298
2301
_finishLongText (len );
@@ -2331,11 +2334,9 @@ protected String _finishTextToken(int ch) throws IOException
2331
2334
// due to inputBuffer never being even close to that big).
2332
2335
2333
2336
final int available = _inputEnd - _inputPtr ;
2334
-
2335
2337
if ((available >= len )
2336
2338
// if not, could we read? NOTE: we do not require it, just attempt to read
2337
- || ((_inputBuffer .length >= len )
2338
- && _tryToLoadToHaveAtLeast (len ))) {
2339
+ || _tryToLoadToHaveAtLeast (len )) {
2339
2340
return _finishShortText (len );
2340
2341
}
2341
2342
// If not enough space, need handling similar to chunked
@@ -2364,19 +2365,22 @@ private final String _finishShortText(int len) throws IOException
2364
2365
2365
2366
// Let's actually do a tight loop for ASCII first:
2366
2367
final int end = _inputPtr ;
2367
-
2368
- int i ;
2369
- while (( i = inputBuf [inPtr ]) >= 0 ) {
2368
+ int i = 0 ;
2369
+ while ( inPtr < end && i >= 0 ) {
2370
+ i = inputBuf [inPtr ++];
2370
2371
outBuf [outPtr ++] = (char ) i ;
2371
- if (++inPtr == end ) {
2372
- String str = _textBuffer .setCurrentAndReturn (outPtr );
2373
- if (stringRefs != null ) {
2374
- stringRefs .stringRefs .add (str );
2375
- _sharedString = str ;
2376
- }
2377
- return str ;
2372
+ }
2373
+ if (inPtr == end && i >= 0 ) {
2374
+ String str = _textBuffer .setCurrentAndReturn (outPtr );
2375
+ if (stringRefs != null ) {
2376
+ stringRefs .stringRefs .add (str );
2377
+ _sharedString = str ;
2378
2378
}
2379
+ return str ;
2379
2380
}
2381
+ // Correct extra increments
2382
+ outPtr -= 1 ;
2383
+ inPtr -= 1 ;
2380
2384
final int [] codes = UTF8_UNIT_CODES ;
2381
2385
do {
2382
2386
i = inputBuf [inPtr ++] & 0xFF ;
@@ -2443,10 +2447,17 @@ private final String _finishShortText(int len) throws IOException
2443
2447
2444
2448
private final String _finishLongText (int len ) throws IOException
2445
2449
{
2446
- char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2447
- int outPtr = 0 ;
2448
- final int [] codes = UTF8_UNIT_CODES ;
2450
+ StringRefList stringRefs = null ;
2451
+ if (!_stringRefs .empty () &&
2452
+ shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2453
+ stringRefs = _stringRefs .peek ();
2454
+ }
2455
+ // First a tight loop for ASCII.
2456
+ len = _finishLongTextAscii (len );
2457
+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2458
+ int outPtr = _textBuffer .getCurrentSegmentSize ();
2449
2459
int outEnd = outBuf .length ;
2460
+ final int [] codes = UTF8_UNIT_CODES ;
2450
2461
2451
2462
while (--len >= 0 ) {
2452
2463
int c = _nextByte () & 0xFF ;
@@ -2500,14 +2511,51 @@ private final String _finishLongText(int len) throws IOException
2500
2511
outBuf [outPtr ++] = (char ) c ;
2501
2512
}
2502
2513
String str = _textBuffer .setCurrentAndReturn (outPtr );
2503
- if (!_stringRefs .empty () &&
2504
- shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2505
- _stringRefs .peek ().stringRefs .add (str );
2514
+ if (stringRefs != null ) {
2515
+ stringRefs .stringRefs .add (str );
2506
2516
_sharedString = str ;
2507
2517
}
2508
2518
return str ;
2509
2519
}
2510
2520
2521
+ /**
2522
+ * Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining.
2523
+ */
2524
+ private final int _finishLongTextAscii (int len ) throws IOException
2525
+ {
2526
+ char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2527
+ final byte [] input = _inputBuffer ;
2528
+ while (len > 0 ) {
2529
+ // load as much input as possible
2530
+ int size = Math .min (len , Math .min (outBuf .length , input .length ));
2531
+ if (!_tryToLoadToHaveAtLeast (size )) {
2532
+ return len ;
2533
+ }
2534
+ int outEnd = size ;
2535
+ int outPtr = 0 ;
2536
+ int inPtr = _inputPtr ;
2537
+ int i = 0 ;
2538
+ // Tight loop to copy into the output buffer, bail if a non-ascii char is found
2539
+ while (outPtr < outEnd && i >= 0 ) {
2540
+ i = input [inPtr ++];
2541
+ outBuf [outPtr ++] = (char ) i ;
2542
+ }
2543
+ // Found a non-ascii char, correct pointers and return to the caller.
2544
+ if (i < 0 ) {
2545
+ --outPtr ;
2546
+ _inputPtr = inPtr - 1 ;
2547
+ _textBuffer .setCurrentLength (outPtr );
2548
+ return len - outPtr ;
2549
+ }
2550
+ _inputPtr = inPtr ;
2551
+ if (outPtr >= outBuf .length ) {
2552
+ outBuf = _textBuffer .finishCurrentSegment ();
2553
+ }
2554
+ len -= size ;
2555
+ }
2556
+ return len ;
2557
+ }
2558
+
2511
2559
private final void _finishChunkedText () throws IOException
2512
2560
{
2513
2561
char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
@@ -2532,7 +2580,6 @@ private final void _finishChunkedText() throws IOException
2532
2580
}
2533
2581
break ;
2534
2582
}
2535
- _chunkLeft = len ;
2536
2583
int end = _inputPtr + len ;
2537
2584
if (end <= _inputEnd ) { // all within buffer
2538
2585
_chunkLeft = 0 ;
@@ -2541,19 +2588,22 @@ private final void _finishChunkedText() throws IOException
2541
2588
_chunkLeft = (end - _inputEnd );
2542
2589
_chunkEnd = _inputEnd ;
2543
2590
}
2544
- }
2545
- // besides of which just need to ensure there's content
2546
- if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2547
- loadMoreGuaranteed ();
2548
- int end = _inputPtr + _chunkLeft ;
2549
- if (end <= _inputEnd ) { // all within buffer
2550
- _chunkLeft = 0 ;
2551
- _chunkEnd = end ;
2552
- } else { // stretches beyond
2553
- _chunkLeft = (end - _inputEnd );
2554
- _chunkEnd = _inputEnd ;
2591
+ // start of a new chunk
2592
+ // First a tight loop for ASCII.
2593
+ _textBuffer .setCurrentLength (outPtr );
2594
+ if (_finishChunkedTextAscii ()) {
2595
+ // chunk fully consumed, let's get the next one
2596
+ outBuf = _textBuffer .getBufferWithoutReset ();
2597
+ outPtr = _textBuffer .getCurrentSegmentSize ();
2598
+ outEnd = outBuf .length ;
2599
+ continue ;
2555
2600
}
2601
+ outBuf = _textBuffer .getBufferWithoutReset ();
2602
+ outPtr = _textBuffer .getCurrentSegmentSize ();
2603
+ outEnd = outBuf .length ;
2556
2604
}
2605
+ // besides of which just need to ensure there's content
2606
+ _loadMoreForChunkIfNeeded ();
2557
2607
}
2558
2608
int c = input [_inputPtr ++] & 0xFF ;
2559
2609
int code = codes [c ];
@@ -2563,9 +2613,9 @@ private final void _finishChunkedText() throws IOException
2563
2613
}
2564
2614
2565
2615
switch (code ) {
2566
- case 0 :
2567
- break ;
2568
- case 1 : // 2-byte UTF
2616
+ case 0 :
2617
+ break ;
2618
+ case 1 : // 2-byte UTF
2569
2619
{
2570
2620
int d = _nextChunkedByte ();
2571
2621
if ((d & 0xC0 ) != 0x080 ) {
@@ -2574,24 +2624,24 @@ private final void _finishChunkedText() throws IOException
2574
2624
c = ((c & 0x1F ) << 6 ) | (d & 0x3F );
2575
2625
}
2576
2626
break ;
2577
- case 2 : // 3-byte UTF
2578
- c = _decodeChunkedUTF8_3 (c );
2579
- break ;
2580
- case 3 : // 4-byte UTF
2581
- c = _decodeChunkedUTF8_4 (c );
2582
- // Let's add first part right away:
2583
- if (outPtr >= outBuf .length ) {
2584
- outBuf = _textBuffer .finishCurrentSegment ();
2585
- outPtr = 0 ;
2586
- outEnd = outBuf .length ;
2587
- }
2588
- outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2589
- c = 0xDC00 | (c & 0x3FF );
2590
- // And let the other char output down below
2591
- break ;
2592
- default :
2593
- // Is this good enough error message?
2594
- _reportInvalidInitial (c );
2627
+ case 2 : // 3-byte UTF
2628
+ c = _decodeChunkedUTF8_3 (c );
2629
+ break ;
2630
+ case 3 : // 4-byte UTF
2631
+ c = _decodeChunkedUTF8_4 (c );
2632
+ // Let's add first part right away:
2633
+ if (outPtr >= outBuf .length ) {
2634
+ outBuf = _textBuffer .finishCurrentSegment ();
2635
+ outPtr = 0 ;
2636
+ outEnd = outBuf .length ;
2637
+ }
2638
+ outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2639
+ c = 0xDC00 | (c & 0x3FF );
2640
+ // And let the other char output down below
2641
+ break ;
2642
+ default :
2643
+ // Is this good enough error message?
2644
+ _reportInvalidInitial (c );
2595
2645
}
2596
2646
// Need more room?
2597
2647
if (outPtr >= outEnd ) {
@@ -2602,9 +2652,75 @@ private final void _finishChunkedText() throws IOException
2602
2652
// Ok, let's add char to output:
2603
2653
outBuf [outPtr ++] = (char ) c ;
2604
2654
}
2655
+
2605
2656
_textBuffer .setCurrentLength (outPtr );
2606
2657
}
2607
2658
2659
+ /**
2660
+ * Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the
2661
+ * caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to
2662
+ * finish the current text buffer segment
2663
+ */
2664
+ private final boolean _finishChunkedTextAscii () throws IOException
2665
+ {
2666
+ final byte [] input = _inputBuffer ;
2667
+ int outPtr = _textBuffer .getCurrentSegmentSize ();
2668
+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2669
+ int outEnd = outBuf .length ;
2670
+ while (true ) {
2671
+ // besides of which just need to ensure there's content
2672
+ _loadMoreForChunkIfNeeded ();
2673
+
2674
+ // Find the size of the loop
2675
+ int inSize = _chunkEnd - _inputPtr ;
2676
+ int outSize = outEnd - outPtr ;
2677
+ int inputPtr = _inputPtr ;
2678
+ int inputPtrEnd = _inputPtr + Math .min (inSize , outSize );
2679
+ int i = 0 ;
2680
+ // loop with copying what we can.
2681
+ while (inputPtr < inputPtrEnd && i >= 0 ) {
2682
+ i = input [inputPtr ++];
2683
+ char val = (char ) i ;
2684
+ outBuf [outPtr ++] = val ;
2685
+ }
2686
+ _inputPtr = inputPtr ;
2687
+
2688
+ if (i < 0 ) {
2689
+ // Found a non-ascii char, correct pointers and return to the caller.
2690
+ _inputPtr -= 1 ;
2691
+ _textBuffer .setCurrentLength (outPtr - 1 );
2692
+ // return false to signal this to the calling code to allow the multi-byte code-path to kick.
2693
+ return false ;
2694
+ }
2695
+ // Need more room?
2696
+ if (outPtr >= outEnd ) {
2697
+ outBuf = _textBuffer .finishCurrentSegment ();
2698
+ outPtr = 0 ;
2699
+ outEnd = outBuf .length ;
2700
+ }
2701
+ if (_inputPtr < _chunkEnd || _chunkLeft > 0 ) {
2702
+ continue ;
2703
+ }
2704
+ _textBuffer .setCurrentLength (outPtr );
2705
+ return true ;
2706
+ }
2707
+ }
2708
+
2709
+ private final void _loadMoreForChunkIfNeeded () throws IOException
2710
+ {
2711
+ if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2712
+ loadMoreGuaranteed ();
2713
+ int end = _inputPtr + _chunkLeft ;
2714
+ if (end <= _inputEnd ) { // all within buffer
2715
+ _chunkLeft = 0 ;
2716
+ _chunkEnd = end ;
2717
+ } else { // stretches beyond
2718
+ _chunkLeft = (end - _inputEnd );
2719
+ _chunkEnd = _inputEnd ;
2720
+ }
2721
+ }
2722
+ }
2723
+
2608
2724
private final int _nextByte () throws IOException {
2609
2725
int inPtr = _inputPtr ;
2610
2726
if (inPtr < _inputEnd ) {
@@ -3716,6 +3832,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOExcep
3716
3832
if (_inputStream == null ) {
3717
3833
return false ;
3718
3834
}
3835
+ // The code below assumes this is true, so we check it here.
3836
+ if (_inputBuffer .length < minAvailable ) {
3837
+ return false ;
3838
+ }
3719
3839
// Need to move remaining data in front?
3720
3840
int amount = _inputEnd - _inputPtr ;
3721
3841
if (amount > 0 && _inputPtr > 0 ) {
0 commit comments