Fixing unit tests

cnuernber · cnuernber · commit c917c2dc348c · 2024-09-12T11:31:34.000-06:00
diff --git a/deps.edn b/deps.edn
@@ -97,7 +97,7 @@
     org.apache.spark/spark-mllib_2.12 {:mvn/version "3.0.1"}
     org.apache.spark/spark-sql_2.12 {:mvn/version "3.0.1"}
     org.apache.spark/spark-streaming_2.12 {:mvn/version "3.0.1"}
-    org.tribuo/tribuo-all {:mvn/version "4.2.0" :extension "pom"}
+    org.tribuo/tribuo-all {:mvn/version "4.3.1" :extension "pom"}
     }
    :extra-paths ["neanderthal" "test"]}
 
diff --git a/src/tech/v3/dataset.clj b/src/tech/v3/dataset.clj
@@ -73,8 +73,6 @@
   - `:disable-comment-skipping?` - As default, the `#` character is recognised as a
      line comment when found in the beginning of a line of text in a CSV file,
      and the row will be ignored. Set `true` to disable this behavior.
-  - `:disable-na-as-missing?` - As default, the string \"NA\" is (case-insensitively)
-     parsed as `nil`. Set `false` to disable this behavior.
   - `:max-chars-per-column` - Defaults to 4096.  Columns with more characters that this
      will result in an exception.
   - `:max-num-columns` - Defaults to 8192.  CSV,TSV files with more columns than this
@@ -477,8 +475,8 @@ null [6 3]:
 
 
 (defn concat
-  "Concatenate datasets in place using a copying-concatenation.
-  See also concat-inplace as it may be more efficient for your use case if you have
+  "Concatenate datasets using a copying-concatenation.
+  See also [[concat-inplace]] as it may be more efficient for your use case if you have
   a small number (like less than 3) of datasets."
   ([dataset & args]
   (apply tech.v3.dataset.base/concat dataset args))
@@ -644,9 +642,9 @@ null [6 3]:
 
 
 (defn group-by
-  "Produce a map of key-fn-value->dataset.  The argument to key-fn 
+  "Produce a map of key-fn-value->dataset.  The argument to key-fn
   is a map of colname->column-value representing a row in dataset.
-  Each dataset in the resulting map contains all and only rows 
+  Each dataset in the resulting map contains all and only rows
   that produce the same key-fn-value.
 
   Options - options are passed into dtype arggroup:
diff --git a/src/tech/v3/dataset/metamorph.clj b/src/tech/v3/dataset/metamorph.clj
@@ -318,8 +318,8 @@ null [6 3]:
 
 
 (defn concat
-  "Concatenate datasets in place using a copying-concatenation.
-  See also concat-inplace as it may be more efficient for your use case if you have
+  "Concatenate datasets using a copying-concatenation.
+  See also [[concat-inplace]] as it may be more efficient for your use case if you have
   a small number (like less than 3) of datasets."
   ([& args]
   (apply tech.v3.dataset.metamorph-api/concat args))
@@ -489,9 +489,9 @@ null [6 3]:
 
 
 (defn group-by
-  "Produce a map of key-fn-value->dataset.  The argument to key-fn 
+  "Produce a map of key-fn-value->dataset.  The argument to key-fn
   is a map of colname->column-value representing a row in dataset.
-  Each dataset in the resulting map contains all and only rows 
+  Each dataset in the resulting map contains all and only rows
   that produce the same key-fn-value.
 
   Options - options are passed into dtype arggroup:
diff --git a/test/tech/v3/dataset/mapseq_test.clj b/test/tech/v3/dataset/mapseq_test.clj
@@ -80,8 +80,7 @@
 
     ;;forward map from input value to encoded value.
     ;;After ETL, column values are all doubles
-    (let [apple-value (-> (get (ds-mod/inference-target-label-map dataset) :apple)
-                          double)]
+    (let [apple-value (get (ds-mod/inference-target-label-map dataset) :apple)]
       (is (= #{:apple}
              (as-> dataset ds
                  (ds/filter ds #(= apple-value (:fruit-name %)))
@@ -172,7 +171,7 @@
                :apple :fruit-name-apple,
                :lemon :fruit-name-lemon},
               :src-column :fruit-name,
-              :result-datatype :float64}
+              :result-datatype :int64}
              (into {} (first (ds-cat/dataset->one-hot-maps dataset)))))
       (is (= #{:mass :fruit-name-orange :fruit-name-mandarin :width :fruit-name-apple :color-score
 	     :fruit-name-lemon :height}

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@`
`97`	`97`	`org.apache.spark/spark-mllib_2.12 {:mvn/version "3.0.1"}`
`98`	`98`	`org.apache.spark/spark-sql_2.12 {:mvn/version "3.0.1"}`
`99`	`99`	`org.apache.spark/spark-streaming_2.12 {:mvn/version "3.0.1"}`
`100`		`- org.tribuo/tribuo-all {:mvn/version "4.2.0" :extension "pom"}`
	`100`	`+ org.tribuo/tribuo-all {:mvn/version "4.3.1" :extension "pom"}`
`101`	`101`	`}`
`102`	`102`	`:extra-paths ["neanderthal" "test"]}`
`103`	`103`