Skip to content

Commit 8e08a40

Browse files
committed
7.045
1 parent 807fda1 commit 8e08a40

40 files changed

+73
-47
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Changelog
2+
# 7.045
3+
* Bulk add-constant! method used for adding missing values.
4+
25
# 7.044
36
* initial support for clearing dataset parsers - resets their row count but does not reset the schema. Use tech.v3.dataset.protocols/ds-clear.
47

deps.edn

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{:paths ["src" "resources" "target/classes"]
22
:deps {;;org.clojure/clojure {:mvn/version "1.11.1"}
3-
cnuernber/dtype-next {:mvn/version "10.131"}
3+
cnuernber/dtype-next {:mvn/version "10.133"}
44
techascent/tech.io {:mvn/version "4.31"
55
:exclusions [org.apache.commons/commons-compress]}
66
org.apache.datasketches/datasketches-java {:mvn/version "4.2.0"}
@@ -14,7 +14,7 @@
1414
:exec-fn codox.main/-main
1515
:exec-args {:group-id "techascent"
1616
:artifact-id "tech.ml.dataset"
17-
:version "7.043"
17+
:version "7.045"
1818
:name "TMD"
1919
:description "A Clojure high performance data processing system"
2020
:metadata {:doc/format :markdown}

docs/000-getting-started.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/100-walkthrough.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/200-quick-reference.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/columns-readers-and-datatypes.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/index.html

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

docs/nippy-serialization-rocks.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/supported-datatypes.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.categorical.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.clipboard.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.column-filters.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.column.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.io.csv.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.io.datetime.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.io.string-row-parser.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.io.univocity.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.join.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.math.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.metamorph.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.modelling.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.print.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.reductions.apache-data-sketch.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.reductions.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.rolling.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.set.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.tensor.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.dataset.zip.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.arrow.html

Lines changed: 5 additions & 5 deletions
Large diffs are not rendered by default.

docs/tech.v3.libs.clj-transit.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.fastexcel.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.guava.cache.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.parquet.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.poi.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docs/tech.v3.libs.tribuo.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/tech/v3/dataset/dynamic_int_list.clj

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
[tech.v3.parallel.for :as parallel-for]
1212
[clj-commons.primitive-math :as pmath])
1313
(:import [tech.v3.datatype LongBuffer]
14-
[ham_fisted IMutList]))
14+
[ham_fisted IMutList Casts]))
1515

1616
(set! *warn-on-reflection* true)
1717
(set! *unchecked-math* :warn-on-boxed)
@@ -68,6 +68,12 @@
6868
(set! backing-store (dtype/make-list :int32 backing-store))
6969
(set! int-width 32)))
7070
(.addLong backing-store value))
71+
(add [this idx ct value]
72+
(when-not (== idx (.size this))
73+
(throw (RuntimeException. "Insertion other than at end is not supported.")))
74+
(let [lv (Casts/longCast value)]
75+
(dotimes [c ct]
76+
(.addLong this lv))))
7177
(getLong [_this idx]
7278
(.getLong backing-store idx))
7379
(readLong [_this idx]

src/tech/v3/dataset/io/column_parsers.clj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,11 @@
148148

149149
(defn add-missing-values!
150150
^long [^IMutList container ^RoaringBitmap missing
151-
missing-value ^long idx]
151+
missing-value ^long idx]
152152
(let [n-elems (.size container)]
153153
(when (< n-elems idx)
154154
(.add missing (long n-elems) idx)
155-
(.addAllReducible container (hamf/repeat (- idx n-elems) missing-value)))
155+
(.add container n-elems (- idx n-elems) missing-value))
156156
(- idx n-elems)))
157157

158158

src/tech/v3/dataset/io/context.clj

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,15 @@
9090
colparser-compute-fn (reify Function
9191
(apply [this col-idx]
9292
(let [colname (col-idx->colname col-idx)
93-
colname (if (empty? colname)
94-
(make-colname col-idx)
95-
(utils/remove-zero-width-spaces colname))
93+
colname (cond
94+
(nil? colname) (make-colname col-idx)
95+
(string? colname) (if (empty? colname)
96+
(make-colname col-idx)
97+
(utils/remove-zero-width-spaces colname))
98+
(sequential? colname) (if (empty? colname)
99+
(make-colname colname)
100+
colname)
101+
:else colname)
96102
colname (if (and ensure-unique-column-names?
97103
(get colname->idx colname))
98104
(unique-column-name-fn col-idx colname)

src/tech/v3/dataset/string_table.clj

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@
7878
retval)))))]
7979
(.addLong data item-idx))
8080
true)
81+
(add [this idx ct value]
82+
(let [value (or value "")
83+
item-idx (int (.computeIfAbsent
84+
str->int
85+
value
86+
(reify Function
87+
(apply [this keyval]
88+
(let [retval (.size int->str)]
89+
(.add int->str keyval)
90+
retval)))))]
91+
(.add data (.size data) ct item-idx)))
8192
(readObject [_this idx]
8293
(.get int->str (.getLong data idx)))
8394
(writeObject [this idx value]

0 commit comments

Comments
 (0)