From 9762dc3798c889a1168c3471a9fdddb2e62feb9f Mon Sep 17 00:00:00 2001 From: Igor Dykhta Date: Fri, 28 Feb 2025 18:47:03 +0200 Subject: [PATCH] [feat] DuckDb plugin: drag and drop file directly as table (#2952) - DuckDB plugin: drag and drop file directly as table --------- Signed-off-by: Ihor Dykhta --- src/duckdb/src/components/schema-panel.tsx | 70 +++++++++++-- src/duckdb/src/components/sql-panel.tsx | 96 ++++++++++++++++-- src/duckdb/src/table/duckdb-table-utils.ts | 109 ++++++++++++++++++++- 3 files changed, 261 insertions(+), 14 deletions(-) diff --git a/src/duckdb/src/components/schema-panel.tsx b/src/duckdb/src/components/schema-panel.tsx index 06d4808d5b..e5bbd47ce6 100644 --- a/src/duckdb/src/components/schema-panel.tsx +++ b/src/duckdb/src/components/schema-panel.tsx @@ -6,6 +6,7 @@ import {useSelector} from 'react-redux'; import styled from 'styled-components'; import {AsyncDuckDBConnection} from '@duckdb/duckdb-wasm'; +import {LoadingSpinner, Icons} from '@kepler.gl/components'; import {arrowSchemaToFields} from '@kepler.gl/processors'; import {VisState} from '@kepler.gl/schemas'; @@ -29,6 +30,14 @@ const StyledSchemaPanel = styled.div` font-size: 12px; padding: 12px; font-family: ${props => props.theme.fontFamily}; + height: 100%; +`; + +const StyledLoadingSpinnerWrapper = styled.div` + display: flex; + justify-content: center; + align-items: center; + height: 100%; `; async function getColumnSchema(connection: AsyncDuckDBConnection, tableName: string) { @@ -57,16 +66,61 @@ async function getColumnSchema(connection: AsyncDuckDBConnection, tableName: str }; } -function getSchemaSuggestion(result) { +export type SchemaSuggestion = {column_name: string; table_name: string}; + +function getSchemaSuggestion(result: {key: string; children: {key: string}[]}[]) { return result.reduce((accu, data) => { const columns = data.children.map(child => ({ column_name: child.key, table_name: data.key })); return accu.concat(columns); - }, []); + }, [] as SchemaSuggestion[]); } -export const SchemaPanel = ({setTableSchema}) => { + +type SchemaPanelProps = { + setTableSchema: (tableSchema: SchemaSuggestion[]) => void; + droppedFile: File | null; +}; + +const StyledSchemaPanelDropMessage = styled.div` + display: flex; + justify-content: center; + align-items: center; + height: 100%; + flex-direction: column; + text-align: center; + + div { + margin: 5px; + } + .header { + font-size: 15px; + } + .bold { + font-weight: 700; + } +`; + +const StyledAddIcon = styled(Icons.Add)` + display: inline; + margin-top: -3px; +`; + +export const SchemaPanelDropMessage = () => { + return ( + +
+ Add files to DuckDB +
+
Supported formats:
+
.csv, .json, .geojson, .parquet, .arrow
+
Files you add will stay local to your browser.
+
+ ); +}; + +export const SchemaPanel = ({setTableSchema, droppedFile}: SchemaPanelProps) => { const [columnSchemas, setColumnSchemas] = useState[]>([]); const datasets = useSelector((state: State) => state?.demo?.keplerGl?.map?.visState.datasets); @@ -76,7 +130,7 @@ export const SchemaPanel = ({setTableSchema}) => { const tableResult = await c.query('SHOW TABLES;'); - const tableNames = tableResult.getChildAt(0)?.toJSON(); + const tableNames: string[] | undefined = tableResult.getChildAt(0)?.toJSON(); const result = await Promise.all((tableNames || [])?.map(name => getColumnSchema(c, name))); const tableSchema = getSchemaSuggestion(result); @@ -88,7 +142,7 @@ export const SchemaPanel = ({setTableSchema}) => { useEffect(() => { getTableSchema(); - }, [datasets, getTableSchema]); + }, [datasets, droppedFile, getTableSchema]); return ( @@ -107,8 +161,12 @@ export const SchemaPanel = ({setTableSchema}) => { }} /> )) + ) : droppedFile ? ( + + + ) : ( -
No tables found
+ )}
); diff --git a/src/duckdb/src/components/sql-panel.tsx b/src/duckdb/src/components/sql-panel.tsx index 4a2788851a..5a05184221 100644 --- a/src/duckdb/src/components/sql-panel.tsx +++ b/src/duckdb/src/components/sql-panel.tsx @@ -2,20 +2,20 @@ // Copyright contributors to the kepler.gl project import * as arrow from 'apache-arrow'; -import React, {useCallback, useState, useEffect} from 'react'; +import React, {useCallback, useState, useEffect, useRef} from 'react'; import {useDispatch} from 'react-redux'; import styled from 'styled-components'; import {Panel, PanelGroup, PanelResizeHandle} from 'react-resizable-panels'; import {addDataToMap} from '@kepler.gl/actions'; import {generateHashId} from '@kepler.gl/common-utils'; -import {Button, IconButton, Icons, LoadingSpinner, Tooltip} from '@kepler.gl/components'; +import {Button, FileDrop, IconButton, Icons, LoadingSpinner, Tooltip} from '@kepler.gl/components'; import {arrowSchemaToFields} from '@kepler.gl/processors'; import {sidePanelBg, panelBorderColor} from '@kepler.gl/styles'; import {isAppleDevice} from '@kepler.gl/utils'; import MonacoEditor from './monaco-editor'; -import {SchemaPanel} from './schema-panel'; +import {SchemaPanel, SchemaSuggestion} from './schema-panel'; import {PreviewDataPanel, QueryResult} from './preview-data-panel'; import {getDuckDB} from '../init'; import { @@ -26,7 +26,9 @@ import { setGeoArrowWKBExtension, splitSqlStatements, checkIsSelectQuery, - removeSQLComments + removeSQLComments, + tableFromFile, + SUPPORTED_DUCKDB_DROP_EXTENSIONS } from '../table/duckdb-table-utils'; const StyledSqlPanel = styled.div` @@ -125,6 +127,20 @@ const StyledErrorContainer = styled.pre` overflow: auto; `; +interface StyledDragPanelProps { + dragOver?: boolean; +} + +const StyledFileDropArea = styled(FileDrop)` + height: 100%; + border-width: 1px; + border: 1px ${props => (props.dragOver ? 'solid' : 'dashed')} + ${props => (props.dragOver ? props.theme.subtextColorLT : 'transparent')}; + .file-drop-target { + height: 100%; + } +`; + type SqlPanelProps = { initialSql?: string; }; @@ -136,14 +152,18 @@ export const SqlPanel: React.FC = ({initialSql = ''}) => { const params = new URLSearchParams(window.location.search); return params.get('sql') || initialSql; }); + const [droppedFile, setDroppedFile] = useState(null); + const [dragState, setDragState] = useState(false); const [result, setResult] = useState(null); const [error, setError] = useState(null); const [counter, setCounter] = useState(0); - const [tableSchema, setTableSchema] = useState([]); + const [tableSchema, setTableSchema] = useState([]); const [isRunning, setIsRunning] = useState(false); const [isMac] = useState(() => isAppleDevice()); const dispatch = useDispatch(); + const droppedFileAreaRef = useRef(null); + useEffect(() => { const currentUrl = new URL(window.location.href); if (sql) { @@ -244,11 +264,73 @@ export const SqlPanel: React.FC = ({initialSql = ''}) => { setCounter(counter + 1); }, [result, counter, dispatch]); + const isValidFileType = useCallback(filename => { + const fileExt = SUPPORTED_DUCKDB_DROP_EXTENSIONS.find(ext => filename.endsWith(ext)); + return Boolean(fileExt); + }, []); + + const createTableFromDroppedFile = useCallback(async (droppedFile: File | null) => { + if (droppedFile) { + const error = await tableFromFile(droppedFile); + if (error) { + setError(error); + } else { + setError(null); + } + } + + setDroppedFile(null); + setDragState(false); + }, []); + + useEffect(() => { + createTableFromDroppedFile(droppedFile); + }, [droppedFile, createTableFromDroppedFile]); + + const handleFileInput = useCallback( + (fileList: FileList, event: DragEvent) => { + if (event) { + event.preventDefault(); + event.stopPropagation(); + } + + const files = [...fileList].filter(Boolean); + + const disableExtensionFilter = false; + + const filesToLoad: File[] = []; + const errorFiles: string[] = []; + for (const file of files) { + if (disableExtensionFilter || isValidFileType(file.name)) { + filesToLoad.push(file); + } else { + errorFiles.push(file.name); + } + } + + if (filesToLoad.length > 0) { + setDroppedFile(filesToLoad[0]); + } else if (errorFiles.length > 0) { + setError(new Error(`Unsupported file formats: ${errorFiles.join(', ')}`)); + } + }, + [isValidFileType] + ); + return ( - + setDragState(true)} + onDragLeave={() => setDragState(false)} + frame={droppedFileAreaRef.current || document} + onDrop={handleFileInput} + className="file-uploader__file-drop" + > + + @@ -284,7 +366,7 @@ export const SqlPanel: React.FC = ({initialSql = ''}) => { - + {isRunning ? ( diff --git a/src/duckdb/src/table/duckdb-table-utils.ts b/src/duckdb/src/table/duckdb-table-utils.ts index 1b9aa44df2..5874552b7f 100644 --- a/src/duckdb/src/table/duckdb-table-utils.ts +++ b/src/duckdb/src/table/duckdb-table-utils.ts @@ -11,11 +11,15 @@ import * as arrow from 'apache-arrow'; import {DataType} from 'apache-arrow/type'; -import {AsyncDuckDBConnection} from '@duckdb/duckdb-wasm'; +import {AsyncDuckDBConnection, DuckDBDataProtocol} from '@duckdb/duckdb-wasm'; import {GEOARROW_EXTENSIONS, GEOARROW_METADATA_KEY} from '@kepler.gl/constants'; import {ProtoDatasetField} from '@kepler.gl/types'; +import {getDuckDB} from '../init'; + +export const SUPPORTED_DUCKDB_DROP_EXTENSIONS = ['arrow', 'csv', 'geojson', 'json', 'parquet']; + export type DuckDBColumnDesc = {name: string; type: string}; /** @@ -403,3 +407,106 @@ export const dropTableIfExists = async (connection: AsyncDuckDBConnection, table console.error('Dropping table failed', tableName, error); } }; + +/** + * Imports a file into DuckDB as a table, supporting multiple formats from SUPPORTED_DUCKDB_DROP_EXTENSIONS. + * @param file The file to be imported. + * @returns A promise that resolves when the file has been processed into a DuckDB table. + */ +export async function tableFromFile(file: File | null): Promise { + if (!file) return new Error('File Drag & Drop: No file'); + + const fileExt = SUPPORTED_DUCKDB_DROP_EXTENSIONS.find(ext => file.name.endsWith(ext)); + if (!fileExt) { + return new Error("File Drag & Drop: File extension isn't supported"); + } + + const db = await getDuckDB(); + const c = await db.connect(); + + let error: Error | null = null; + + try { + const tableName = sanitizeDuckDBTableName(file.name); + const sourceName = 'temp_file_handle'; + + c.query(`install spatial; + load spatial;`); + + if (fileExt === 'arrow') { + const arrayBuffer = await file.arrayBuffer(); + const uint8Array = new Uint8Array(arrayBuffer); + const arrowTable = arrow.tableFromIPC(uint8Array); + + await c.insertArrowTable(arrowTable, {name: tableName}); + } else { + await db.registerFileHandle(sourceName, file, DuckDBDataProtocol.BROWSER_FILEREADER, true); + + if (fileExt === 'csv') { + await c.query(` + CREATE TABLE '${tableName}' AS + SELECT * + FROM read_csv('${sourceName}', header = true, auto_detect = true, sample_size = -1); + `); + } else if (fileExt === 'json') { + await c.query(` + CREATE TABLE '${tableName}' AS + SELECT * + FROM read_json_auto('${sourceName}'); + `); + } else if (fileExt === 'geojson') { + await c.query(` + CREATE TABLE '${tableName}' AS + SELECT * + FROM ST_READ('${sourceName}', keep_wkb = TRUE); + `); + } else if (fileExt === 'parquet') { + await c.query(` + CREATE TABLE '${tableName}' AS + SELECT * + FROM read_parquet('${sourceName}') + `); + } + } + } catch (errorData) { + if (errorData instanceof Error) { + const message = errorData.message || ''; + // output more readable errors for known issues + if (message.includes('Arrow Type with extension name: geoarrow')) { + error = new Error( + 'The GeoArrow extensions are not implemented in the connected DuckDB version.' + ); + } else if (message.includes("Geoparquet column 'geometry' does not have geometry types")) { + error = new Error( + `Invalid Input Error: Geoparquet column 'geometry' does not have geometry types. +Possible reasons: + - Old .parquet files that don't match the Parquet format specification. + - Unsupported compression.` + ); + } + } + + if (!error) { + error = errorData as Error; + } + } + + c.close(); + + return error; +} + +/** + * Sanitizes a file name to be a valid DuckDB table name. + * @param fileName The input file name to be sanitized. + * @returns A valid DuckDB table name. + */ +export function sanitizeDuckDBTableName(fileName: string): string { + // Replace invalid characters with underscores + let name = fileName.replace(/[^a-zA-Z0-9_]/g, '_'); + // Ensure it doesn't start with a digit + if (/^\d/.test(name)) { + name = 't_' + name; + } + return name || 'default_table'; +}