Skip to content

Commit 4afcf93

Browse files
authored
removed spark RDD and geni (#455)
1 parent 1cc7d1d commit 4afcf93

File tree

3 files changed

+25
-107
lines changed

3 files changed

+25
-107
lines changed

deps.edn

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,7 @@
8282
org.slf4j/slf4j-api]}
8383
org.lz4/lz4-java {:mvn/version "1.8.0"}
8484
com.cnuernber/jarrow {:mvn/version "1.000"}
85-
zero.one/geni {:mvn/version "0.0.34"
86-
:exclusions [commons-codec/commons-codec]}
87-
org.apache.spark/spark-avro_2.12 {:mvn/version "3.0.1"}
88-
org.apache.spark/spark-core_2.12 {:mvn/version "3.0.1"
89-
:exclusions [org.slf4j/slf4j-log4j12]}
90-
org.apache.spark/spark-hive_2.12 {:mvn/version "3.0.1"}
91-
org.apache.spark/spark-mllib_2.12 {:mvn/version "3.0.1"}
92-
org.apache.spark/spark-sql_2.12 {:mvn/version "3.0.1"}
93-
org.apache.spark/spark-streaming_2.12 {:mvn/version "3.0.1"}
85+
9486
org.tribuo/tribuo-all {:mvn/version "4.3.1" :extension "pom"}
9587
}
9688
:extra-paths ["neanderthal" "test"]}

java/tech/v3/dataset/SimpleRDD.java

Lines changed: 0 additions & 65 deletions
This file was deleted.

src/tech/v3/libs/spark.clj

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
[org.apache.spark.sql.types StructType StructField
1616
DataTypes DataType]
1717
[tech.v3.datatype ObjectReader]
18-
[tech.v3.dataset SimpleRDD]
1918
[java.time LocalDate Instant]
2019
[java.util List]))
2120

@@ -167,40 +166,8 @@
167166
(ds->spark-dataset ds session nil)))
168167

169168

170-
(defn default-ds-fn
171-
[src]
172-
(-> (ds-io/->dataset src)
173-
(prepare-ds-for-spark)
174-
(dataset->row-list)))
175169

176170

177-
(defn ds-src-data->rdd
178-
"Given a session, a full namespaced name that resolves to an IFn,
179-
and a list of serializable data produce an RDD."
180-
(^Dataset [^SparkSession spark-session
181-
^String ds-fn-name
182-
ds-src-data]
183-
(SimpleRDD. (.sparkContext spark-session)
184-
(vec ds-src-data)
185-
ds-fn-name)))
186-
187-
188-
(comment
189-
(require '[zero-one.geni.core :as g])
190-
(require '[zero-one.geni.defaults :as geni-defaults])
191-
(def dataframe (g/read-csv! "test/data/stocks.csv"))
192-
(require '[tech.v3.dataset :as ds])
193-
(def stocks (ds/->dataset "test/data/stocks.csv"))
194-
(def session @geni-defaults/spark)
195-
(def schema (-> (ds/->dataset "test/data/stocks.csv")
196-
(prepare-ds-for-spark)
197-
(ds-schema)))
198-
(def rdd (ds-src-data->dataset @geni-defaults/spark
199-
schema
200-
"tech.v3.libs.spark/default-ds-fn"
201-
[[{:a 1} {:a 2}]]))
202-
)
203-
204171

205172
(defn collect-spark-dataset->ds
206173
[^Dataset dataset]
@@ -227,3 +194,27 @@
227194
nil
228195
[]))))
229196
(ds-impl/new-dataset))))
197+
198+
199+
200+
(comment
201+
;; databricks-connect specific classes
202+
;; should work similar for spark-connect
203+
204+
;;Tested with hese deps
205+
;;org.scala-lang/scala-reflect {:mvn/version "2.12.18"}
206+
;;com.databricks/databricks-connect {:mvn/version "16.1.0"}
207+
208+
209+
(import
210+
'[com.databricks.connect DatabricksSession]
211+
'[com.databricks.sdk.core DatabricksConfig])
212+
213+
(def config (.. (DatabricksConfig.) (setProfile "adb-xxxxx")))
214+
(def spark (.. (DatabricksSession/builder) (sdkConfig config) getOrCreate))
215+
216+
(->
217+
(.sql spark "show catalogs;")
218+
collect-spark-dataset->ds)
219+
220+
)

0 commit comments

Comments
 (0)