Skip to content

Commit fcfbab5

Browse files
committed
Faster promotional object parser - avoids call to roaringbitmap.getCardinality.
1 parent 8bc6c5b commit fcfbab5

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

src/tech/v3/dataset/io/column_parsers.clj

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,12 +146,13 @@
146146

147147

148148
(defn add-missing-values!
149-
[^IMutList container ^RoaringBitmap missing
149+
^long [^IMutList container ^RoaringBitmap missing
150150
missing-value ^long idx]
151151
(let [n-elems (.size container)]
152152
(when (< n-elems idx)
153153
(.add missing (long n-elems) idx)
154-
(.addAllReducible container (hamf/repeat (- idx n-elems) missing-value)))))
154+
(.addAllReducible container (hamf/repeat (- idx n-elems) missing-value)))
155+
(- idx n-elems)))
155156

156157

157158
(defn finalize-parser-data!
@@ -483,6 +484,7 @@
483484
column-name
484485
^:unsynchronized-mutable ^long last-idx
485486
^:unsynchronized-mutable ^long max-idx
487+
^:unsynchronized-mutable ^long mc
486488
options]
487489
dtype-proto/PECount
488490
(ecount [_this] (inc max-idx))
@@ -504,7 +506,7 @@
504506
(let [;;Avoid the pack call if possible
505507
packed-dtype (packing/pack-datatype val-dtype)
506508
container-ecount (.size container)
507-
logical-ecount (- container-ecount (.getCardinality missing))]
509+
logical-ecount (- container-ecount mc)]
508510
;;Setup container
509511
(if (== 0 logical-ecount)
510512
(do
@@ -529,7 +531,7 @@
529531
(set! missing-value (column-base/datatype->missing-value
530532
widest-datatype))))))))
531533
(when (> (- idx last-idx) 1)
532-
(add-missing-values! container missing missing-value idx))
534+
(set! mc (+ mc (add-missing-values! container missing missing-value idx))))
533535
(set! last-idx idx)
534536
(.add container value))))
535537
(finalize [_p rowcount]
@@ -546,4 +548,5 @@
546548
column-name
547549
-1
548550
-1
551+
0
549552
options))

0 commit comments

Comments
 (0)