Skip to content

Commit c307b4d

Browse files
committed
[DH-5723] Add redshift support
1 parent a11d1f1 commit c307b4d

File tree

5 files changed

+56
-6
lines changed

5 files changed

+56
-6
lines changed

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ Once the engine is running, you will want to use it by:
160160
3. Querying the data in natural language
161161

162162
### Connecting to your data warehouses
163-
We currently support connections to Postgres, DuckDB, BigQuery, ClickHouse, Databricks, Snowflake and AWS Athena. You can create connections to these warehouses through the API or at application start-up using the envars.
163+
We currently support connections to Postgres, DuckDB, BigQuery, ClickHouse, Databricks, Snowflake, MySQL/MariaDB, MS SQL Server, Redshift and AWS Athena. You can create connections to these warehouses through the API or at application start-up using the envars.
164164

165165
#### Connecting through the API
166166

Diff for: dataherald/db_scanner/services/redshift_scanner.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import sqlalchemy
2+
from overrides import override
3+
from sqlalchemy.sql import func
4+
from sqlalchemy.sql.schema import Column
5+
6+
from dataherald.db_scanner.models.types import QueryHistory
7+
from dataherald.db_scanner.services.abstract_scanner import AbstractScanner
8+
from dataherald.sql_database.base import SQLDatabase
9+
10+
MIN_CATEGORY_VALUE = 1
11+
MAX_CATEGORY_VALUE = 100
12+
MAX_LOGS = 5_000
13+
14+
15+
class RedshiftScanner(AbstractScanner):
16+
@override
17+
def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None:
18+
query = sqlalchemy.select([func.HLL(column)])
19+
rs = db_engine.engine.execute(query).fetchall()
20+
21+
if (
22+
len(rs) > 0
23+
and len(rs[0]) > 0
24+
and MIN_CATEGORY_VALUE < rs[0][0] <= MAX_CATEGORY_VALUE
25+
):
26+
cardinality_query = sqlalchemy.select([func.distinct(column)]).limit(101)
27+
cardinality = db_engine.engine.execute(cardinality_query).fetchall()
28+
return [str(category[0]) for category in cardinality]
29+
30+
return None
31+
32+
@override
33+
def get_logs(
34+
self, table: str, db_engine: SQLDatabase, db_connection_id: str # noqa: ARG002
35+
) -> list[QueryHistory]:
36+
return []

Diff for: dataherald/db_scanner/sqlalchemy.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from dataherald.db_scanner.services.big_query_scanner import BigQueryScanner
2323
from dataherald.db_scanner.services.click_house_scanner import ClickHouseScanner
2424
from dataherald.db_scanner.services.postgre_sql_scanner import PostgreSqlScanner
25+
from dataherald.db_scanner.services.redshift_scanner import RedshiftScanner
2526
from dataherald.db_scanner.services.snowflake_scanner import SnowflakeScanner
2627
from dataherald.db_scanner.services.sql_server_scanner import SqlServerScanner
2728
from dataherald.sql_database.base import SQLDatabase
@@ -283,13 +284,14 @@ def scan(
283284
services = {
284285
"snowflake": SnowflakeScanner,
285286
"bigquery": BigQueryScanner,
286-
"psycopg2": PostgreSqlScanner,
287-
"pymssql": SqlServerScanner,
288-
"http": ClickHouseScanner,
287+
"postgresql": PostgreSqlScanner,
288+
"mssql": SqlServerScanner,
289+
"clickhouse": ClickHouseScanner,
290+
"redshift": RedshiftScanner,
289291
}
290292
scanner_service = BaseScanner()
291-
if db_engine.engine.driver in services.keys():
292-
scanner_service = services[db_engine.engine.driver]()
293+
if db_engine.engine.dialect.name in services.keys():
294+
scanner_service = services[db_engine.engine.dialect.name]()
293295

294296
inspector = inspect(db_engine.engine)
295297
meta = MetaData(bind=db_engine.engine)

Diff for: docs/api.create_database_connection.rst

+11
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,17 @@ Example::
254254

255255
"connection_uri": snowflake://jon:123456@foo-bar/my-database/public
256256

257+
Redshift
258+
^^^^^^^^^^^^
259+
260+
Uri structure::
261+
262+
"connection_uri": redshift+psycopg2://<user>:<password>@<host>:<port>/<database>
263+
264+
Example::
265+
266+
"connection_uri": redshift+psycopg2://jon:123456@host.amazonaws.com:5439/my-database
267+
257268
ClickHouse
258269
^^^^^^^^^^^^
259270

Diff for: requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ PyMySQL==1.1.0
4343
clickhouse-sqlalchemy==0.2.5
4444
astrapy==0.7.6
4545
pymssql==2.2.11
46+
sqlalchemy-redshift==0.8.14

0 commit comments

Comments
 (0)