From ea706b767fefd476d68f80299eb0ff22a8c71989 Mon Sep 17 00:00:00 2001 From: Serge Klochkov <3175289+slvrtrn@users.noreply.github.com> Date: Thu, 23 Mar 2023 13:54:07 +0100 Subject: [PATCH] Exclude .inner MV tables, set Dictionary types for Maps (#150) * Exclude inner MV tables * Set Map type to Dictionary * Add CHANGELOG * Add helper texts for the connection options --- CHANGELOG.md | 8 +- resources/metabase-plugin.yaml | 7 +- src/metabase/driver/clickhouse.clj | 50 ++++++----- test/metabase/driver/clickhouse_test.clj | 86 +++++++++++-------- .../metabase/driver/clickhouse_test_utils.clj | 21 ++++- test/metabase/test/data/clickhouse.clj | 2 +- 6 files changed, 112 insertions(+), 62 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e9d3c4..e21652c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ -# 1.1.4 +# 1.1.3 -### New features +### New features -* Database name can now contain multiple schemas in UI field, which tells the driver to scan selected databases. (separator can be set in `metabase.driver.clickhouse/SEPARATOR`) +* Hide `.inner` tables of Materialized Views. +* Resolve `Map` base type to `type/Dictionary`. +* Database name can now contain multiple schemas in the UI field (space-separated by default), which tells the driver to scan selected databases. Separator can be set in `metabase.driver.clickhouse/SEPARATOR`. (@veschin) # 1.1.2 diff --git a/resources/metabase-plugin.yaml b/resources/metabase-plugin.yaml index 2582f3e..8e46867 100644 --- a/resources/metabase-plugin.yaml +++ b/resources/metabase-plugin.yaml @@ -1,6 +1,6 @@ info: name: Metabase ClickHouse Driver - version: 1.1.2 + version: 1.1.3 description: Allows Metabase to connect to ClickHouse databases. contact-info: name: ClickHouse @@ -18,12 +18,15 @@ driver: - user - password - name: dbname - display-name: Databases Names + display-name: Databases placeholder: default + helper-text: "To specify multiple databases, separate them by the space symbol. For example: default data logs" - name: scan-all-databases display-name: Scan all databases type: boolean default: false + description: Scan all tables from all available ClickHouse databases except the system ones. + - ssl - ssh-tunnel - advanced-options-start diff --git a/src/metabase/driver/clickhouse.clj b/src/metabase/driver/clickhouse.clj index 9cc7cbb..f12c537 100644 --- a/src/metabase/driver/clickhouse.clj +++ b/src/metabase/driver/clickhouse.clj @@ -53,6 +53,7 @@ [#"Int64" :type/BigInteger] [#"IPv4" :type/IPAddress] [#"IPv6" :type/IPAddress] + [#"Map" :type/Dictionary] [#"String" :type/Text] [#"Tuple" :type/*] [#"UInt8" :type/Integer] @@ -64,9 +65,14 @@ (defmethod sql-jdbc.sync/database-type->base-type :clickhouse [_ database-type] (let [base-type (database-type->base-type - (str/replace (name database-type) - #"(?:Nullable|LowCardinality)\((\S+)\)" - "$1"))] + (let [normalized ;; extract the type from Nullable or LowCardinality first + (str/replace (name database-type) + #"(?:Nullable|LowCardinality)\((\S+)\)" + "$1")] + (cond + (str/starts-with? normalized "Array(") "Array" + (str/starts-with? normalized "Map(") "Map" + :else normalized)))] base-type)) (def ^:private excluded-schemas #{"system" "information_schema" "INFORMATION_SCHEMA"}) @@ -74,7 +80,7 @@ (def ^:private default-connection-details {:user "default", :password "", :dbname "default", :host "localhost", :port "8123"}) -(def ^:private product-name "metabase/1.1.2") +(def ^:private product-name "metabase/1.1.3") (defmethod sql-jdbc.conn/connection-details->spec :clickhouse [_ details] @@ -117,6 +123,10 @@ "%" ; tablePattern "%" = match all tables allowed-table-types)) +(defn ^:private not-inner-mv-table? + [table] + (not (str/starts-with? (:table_name table) ".inner"))) + (defn- ->spec [db] (if (u/id db) @@ -128,9 +138,9 @@ (->> (get-tables-from-metadata metadata "%") (jdbc/metadata-result) (vec) - (filter #(->> (get % :table_schem) - (contains? excluded-schemas) - (not))) + (filter #(and + (not (contains? excluded-schemas (:table_schem %))) + (not-inner-mv-table? %))) (tables-set)))) ;; Strangely enough, the tests only work with :db keyword, @@ -140,29 +150,27 @@ (or (get-in db [:details :dbname]) (get-in db [:details :db]))) -;; NOTE: option for collection tables from many schemas -(def ^:private SEPARATOR #" ") -(defn- get-multiple-tables [db] - (->> (for [schema (as-> (or (get-db-name db) "default") schemas - (str/split schemas SEPARATOR) - (remove empty? schemas) - (map (comp #(ddl.i/format-name :clickhouse %) str/trim) schemas))] - (jdbc/with-db-metadata [metadata (->spec db)] +(def ^:private db-names-separator #" ") +(defn- get-tables-in-dbs [db-or-dbs] + (->> (for [db (as-> (or (get-db-name db-or-dbs) "default") dbs + (str/split dbs db-names-separator) + (remove empty? dbs) + (map (comp #(ddl.i/format-name :clickhouse %) str/trim) dbs))] + (jdbc/with-db-metadata [metadata (->spec db-or-dbs)] (jdbc/metadata-result - (get-tables-from-metadata metadata schema)))) + (get-tables-from-metadata metadata db)))) (apply concat) + (filter not-inner-mv-table?) (tables-set))) (defmethod driver/describe-database :clickhouse [_ {{:keys [scan-all-databases]} :details :as db}] {:tables - (cond - scan-all-databases + (if + (boolean scan-all-databases) (get-all-tables db) - - :selected-schemas - (get-multiple-tables db))}) + (get-tables-in-dbs db))}) (defmethod driver/describe-table :clickhouse [_ database table] diff --git a/test/metabase/driver/clickhouse_test.clj b/test/metabase/driver/clickhouse_test.clj index 8144354..b4eb18b 100644 --- a/test/metabase/driver/clickhouse_test.clj +++ b/test/metabase/driver/clickhouse_test.clj @@ -5,6 +5,7 @@ [cljc.java-time.local-date :as local-date] [cljc.java-time.offset-date-time :as offset-date-time] [cljc.java-time.temporal.chrono-unit :as chrono-unit] + [clojure.string :as str] [clojure.test :refer :all] [metabase.driver :as driver] [metabase.driver.clickhouse-test-utils :as ctu] @@ -438,6 +439,21 @@ {:filter [:= $ipvfour "127.0.0.1"] :aggregation [:count]}))))))))) +(defn- map-as-string [^java.util.LinkedHashMap m] (.toString m)) +(deftest clickhouse-simple-map-test + (mt/test-driver + :clickhouse + (is (= [["{key1=1, key2=10}"] ["{key1=2, key2=20}"] ["{key1=3, key2=30}"]] + (qp.test/formatted-rows + [map-as-string] + :format-nil-values + (ctu/do-with-metabase-test-db + (fn [db] + (data/with-db db + (data/run-mbql-query + maps_test + {}))))))))) + (deftest clickhouse-connection-string (testing "connection with no additional options" (is (= default-connection-params @@ -481,8 +497,9 @@ (deftest clickhouse-boolean-tabledef-metadata (mt/test-driver :clickhouse - (let [table_md (ctu/do-with-metabase-test-db - (fn [db] (metabase.driver/describe-table :clickhouse db {:name "boolean_test"}))) + (let [table_md (ctu/do-with-metabase-test-db + (fn [db] + (metabase.driver/describe-table :clickhouse db {:name "boolean_test"}))) colmap (->> (.get table_md :fields) (filter #(= (:name %) "b1")) first) database-type (.get colmap :database-type) @@ -544,19 +561,28 @@ {}))))))))))) (deftest clickhouse-describe-database - (let [[agg-fn-table boolean-table enum-table ipaddress-table] - [{:description nil, - :name "aggregate_functions_filter_test", - :schema "metabase_test"} - {:description nil, - :name "boolean_test", - :schema "metabase_test"} - {:description nil, - :name "enums_test", - :schema "metabase_test"} - {:description nil, - :name "ipaddress_test", - :schema "metabase_test"}]] + (let [test-tables + #{{:description nil, + :name "aggregate_functions_filter_test", + :schema "metabase_test"} + {:description nil, + :name "boolean_test", + :schema "metabase_test"} + {:description nil, + :name "enums_test", + :schema "metabase_test"} + {:description nil, + :name "ipaddress_test", + :schema "metabase_test"} + {:description nil, + :name "wikistat", + :schema "metabase_test"} + {:description nil, + :name "wikistat_mv", + :schema "metabase_test"} + {:description nil, + :name "maps_test", + :schema "metabase_test"}}] (testing "scanning a single database" (mt/with-temp Database [db {:engine :clickhouse @@ -564,8 +590,7 @@ :scan-all-databases nil}}] (let [describe-result (driver/describe-database :clickhouse db)] (is (= - {:tables - #{agg-fn-table boolean-table enum-table ipaddress-table}} + {:tables test-tables} describe-result))))) (testing "scanning all databases" (mt/with-temp Database @@ -574,20 +599,15 @@ :scan-all-databases true}}] (let [describe-result (driver/describe-database :clickhouse db)] ;; check the existence of at least some test tables here - (is (contains? (:tables describe-result) - agg-fn-table)) - (is (contains? (:tables describe-result) - boolean-table)) - (is (contains? (:tables describe-result) - enum-table)) - (is (contains? (:tables describe-result) - ipaddress-table)) + (doseq [table test-tables] + (is (contains? (:tables describe-result) + table))) ;; should not contain any ClickHouse system tables - (is (not (some #(= (get % :schema) "system") + (is (not (some #(= (:schema %) "system") (:tables describe-result)))) - (is (not (some #(= (get % :schema) "information_schema") + (is (not (some #(= (:schema %) "information_schema") (:tables describe-result)))) - (is (not (some #(= (get % :schema) "INFORMATION_SCHEMA") + (is (not (some #(= (:schema %) "INFORMATION_SCHEMA") (:tables describe-result))))))) (testing "scanning multiple databases" (mt/with-temp Database @@ -602,12 +622,10 @@ :description nil :schema "information_schema"}] - ;; NOTE: tables from `metabase_test` - (is (contains? tables agg-fn-table)) - (is (contains? tables boolean-table)) - (is (contains? tables enum-table)) - (is (contains? tables ipaddress-table)) + ;; tables from `metabase_test` + (doseq [table test-tables] + (is (contains? tables table))) - ;; NOTE: tables from `information_schema` + ;; tables from `information_schema` (is (contains? tables tables-table)) (is (contains? tables columns-table))))))) diff --git a/test/metabase/driver/clickhouse_test_utils.clj b/test/metabase/driver/clickhouse_test_utils.clj index 2ae424e..e99b394 100644 --- a/test/metabase/driver/clickhouse_test_utils.clj +++ b/test/metabase/driver/clickhouse_test_utils.clj @@ -56,12 +56,31 @@ " (1, true, true)," " (2, false, true)," " (3, true, false);") + (str "CREATE TABLE `metabase_test`.`maps_test`" + " (m Map(String, UInt64)) ENGINE = Memory;") + (str "INSERT INTO `metabase_test`.`maps_test` VALUES" + " ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});") + ;; Used for testing that AggregateFunction columns are excluded, + ;; while SimpleAggregateFunction columns are preserved (str "CREATE TABLE `metabase_test`.`aggregate_functions_filter_test` (" " idx UInt8, a AggregateFunction(uniq, String), lowest_value SimpleAggregateFunction(min, UInt8)," " count SimpleAggregateFunction(sum, Int64)" ") ENGINE Memory;") (str "INSERT INTO `metabase_test`.`aggregate_functions_filter_test`" - " (idx, lowest_value, count) VALUES (42, 144, 255255);")]] + " (idx, lowest_value, count) VALUES (42, 144, 255255);") + ;; Materialized views (testing .inner tables exclusion) + (str "CREATE TABLE `metabase_test`.`wikistat` (" + " `date` Date," + " `project` LowCardinality(String)," + " `hits` UInt32" + ") ENGINE = Memory;") + (str "CREATE MATERIALIZED VIEW `metabase_test`.`wikistat_mv` ENGINE=Memory AS" + " SELECT date, project, sum(hits) AS hits FROM `metabase_test`.`wikistat`" + " GROUP BY date, project;") + (str "INSERT INTO `metabase_test`.`wikistat` VALUES" + " (now(), 'foo', 10)," + " (now(), 'bar', 10)," + " (now(), 'bar', 20);")]] (jdbc/execute! conn [sql])))) (defn do-with-metabase-test-db diff --git a/test/metabase/test/data/clickhouse.clj b/test/metabase/test/data/clickhouse.clj index 68bcc04..9381d77 100644 --- a/test/metabase/test/data/clickhouse.clj +++ b/test/metabase/test/data/clickhouse.clj @@ -21,7 +21,7 @@ :ssl false :use_no_proxy false :use_server_time_zone_for_dates true - :product_name "metabase/1.1.2"}) + :product_name "metabase/1.1.3"}) (defmethod sql.tx/field-base-type->sql-type [:clickhouse :type/Boolean] [_ _] "Boolean") (defmethod sql.tx/field-base-type->sql-type [:clickhouse :type/BigInteger] [_ _] "Int64")