106
106
[ham-fisted.api :as hamf]
107
107
[ham-fisted.reduce :as hamf-rf]
108
108
[ham-fisted.lazy-noncaching :as lznc]
109
+ [ham-fisted.function :as hamf-fn]
109
110
[ham-fisted.set :as set]
110
111
[ham-fisted.protocols :as hamf-proto])
111
112
(:import [ham_fisted ArrayLists]
@@ -1459,7 +1460,7 @@ Dependent block frames are not supported!!")
1459
1460
(fn parse-boolean-field
1460
1461
[decompressor]
1461
1462
; ;nil subtype means null column
1462
- (if
1463
+ (if nil-subtype?
1463
1464
(col-impl/new-column
1464
1465
(:name field)
1465
1466
(dtype/const-reader false n-elems)
@@ -1830,33 +1831,32 @@ Dependent block frames are not supported!!")
1830
1831
(deftype NextDatasetIter [schema ^Iterator messages fname
1831
1832
^{:unsynchronized-mutable true
1832
1833
:tag long} idx
1833
- ^:unsynchronized-mutable dict-map
1834
+ ^Map dict-map
1834
1835
options]
1835
1836
Iterator
1836
1837
(hasNext [this] (.hasNext messages))
1837
1838
(next [this]
1838
- (loop [msg (.next messages)
1839
- dicts dict-map]
1839
+ (loop [msg (.next messages)]
1840
1840
(if (identical? :dictionary-batch (get msg :message-type ))
1841
- (recur (maybe-next messages)
1842
- (update dicts (get msg :id )
1843
- (fn [old-val]
1844
- (delay
1845
- (let [new-val (dictionary->strings msg)]
1846
- (if (and old-val (new-val :delta? ))
1847
- (update new-val :strings
1848
- (fn [new-strs]
1849
- (let [old-strs (get @old-val :strings )
1850
- new-ec (+ (count new-strs) (count old-strs))
1851
- rv (hamf/wrap-array-growable
1852
- (make-array String new-ec)
1853
- 0 )]
1854
- (.addAllReducible rv old-strs)
1855
- (.addAllReducible rv new-strs)
1856
- rv))))
1857
- new-val)))))
1841
+ (do
1842
+ (.compute dict-map (get msg :id )
1843
+ (hamf-fn/bi-function
1844
+ k old-val
1845
+ (delay
1846
+ (let [new-val (dictionary->strings msg)]
1847
+ (if (and old-val (new-val :delta? ))
1848
+ (let [old-strs (get @old-val :strings )
1849
+ new-strs (get new-val :strings )
1850
+ new-ec (+ (count new-strs) (count old-strs))
1851
+ rv (hamf/wrap-array-growable
1852
+ (make-array String new-ec)
1853
+ 0 )]
1854
+ (.addAllReducible rv old-strs)
1855
+ (.addAllReducible rv new-strs)
1856
+ (assoc new-val :strings rv))
1857
+ new-val)))))
1858
+ (recur (maybe-next messages)))
1858
1859
(let [cur-idx idx]
1859
- (set! dict-map dicts)
1860
1860
(set! idx (inc cur-idx))
1861
1861
(-> (records->ds schema dict-map msg options)
1862
1862
(ds-base/set-dataset-name (format " %s-%03d" fname cur-idx))))))))
@@ -1881,7 +1881,7 @@ Dependent block frames are not supported!!")
1881
1881
(get options :close-input-stream? true ))
1882
1882
(.close ^InputStream input))
1883
1883
(throw (Exception. " Initial message is not a schema message." )))
1884
- (NextDatasetIter. schema messages fname 0 {} options)))
1884
+ (NextDatasetIter. schema messages fname 0 ( hamf/java-hashmap ) options)))
1885
1885
1886
1886
(defn stream->dataset-iterable
1887
1887
" Loads data up to and including the first data record. Returns the a lazy
@@ -1924,9 +1924,9 @@ Dependent block frames are not supported!!")
1924
1924
:arrow-file
1925
1925
(next-dataset-iter (discard input 8 ) fname options)
1926
1926
:arrow-ipc
1927
- (next-dataset-iter input options)
1927
+ (next-dataset-iter input fname options)
1928
1928
:feather-v1
1929
- [( feather->ds input options)] )))))
1929
+ ( iter ( hamf/vector ( feather->ds input options))) )))))
1930
1930
1931
1931
(defn ^:no-doc stream->dataset-seq
1932
1932
" see docs for [[stream->dataset-iterable]]"
0 commit comments