Skip to content

enhance Spider benchmarking #154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b4ea953
Add Spider evaluation implementation for text2sql solution
minhyeong112 Jan 6, 2025
cbc1435
feat: Improved SQL schema selection and SQLite connector for Spider e…
minhyeong112 Jan 6, 2025
65a7a90
style: Fix trailing whitespace issues
minhyeong112 Jan 6, 2025
40c0f60
style: Fix JSON formatting in Jupyter notebook
minhyeong112 Jan 6, 2025
623b601
style: Apply black formatting to Python files
minhyeong112 Jan 6, 2025
13d6129
style: Apply Ruff fixes
minhyeong112 Jan 6, 2025
d628225
docs: Update Spider dataset and test suite download instructions
minhyeong112 Jan 6, 2025
36fdd69
style: Fix JSON formatting in notebook
minhyeong112 Jan 6, 2025
7cd4aab
refactor: improve SQL connectors and agents for spider evaluation
minhyeong112 Jan 8, 2025
ec2dd2b
feat: Add spider evaluation changes and schema improvements
minhyeong112 Jan 30, 2025
24ab162
chore: resolve merge conflicts with upstream/main
minhyeong112 Jan 30, 2025
33f016a
style: fix trailing whitespace and formatting issues
minhyeong112 Jan 30, 2025
03131f5
fix: resolve merge conflicts and update code structure
minhyeong112 Jan 30, 2025
62201f6
style: apply black formatting to remaining files
minhyeong112 Jan 30, 2025
787175f
style: apply ruff fixes to improve code quality
minhyeong112 Jan 30, 2025
4e6a379
fix: resolve merge conflicts in uv.lock
minhyeong112 Jan 30, 2025
69a5b2e
chore: update dependencies in uv.lock
minhyeong112 Jan 30, 2025
e107868
feat: Add Spider evaluation for text-to-sql solution
minhyeong112 Jan 31, 2025
e902e14
Merge branch 'main' into spider-eval
BenConstable9 Jan 31, 2025
47fbd83
Add .env.example files, update .gitignore, and restore user_message_r…
minhyeong112 Jan 31, 2025
1e2f381
Implement shared schema cache using Azure Cognitive Search
minhyeong112 Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,9 @@ cython_debug/
# Spider data: https://drive.google.com/file/d/1403EGqzIDoHMdQF4c9Bkyl7dZLZ5Wt6J/view
/text_2_sql/test-suite-sql-eval/
/text_2_sql/spider_data/

# Generated samples data
/text_2_sql/data_dictionary/generated_samples/

# Data Dictionary environment file
/text_2_sql/data_dictionary/.env
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ version = "0.1.0"
description = "This repo accelerates development of RAG applications with rich data sources including SQL Warehouses and documents analysed with Azure Document Intelligence."
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"text-2-sql-core[sqlite]",
]

[dependency-groups]
dev = [
Expand All @@ -20,7 +23,7 @@ dev = [
members = ["text_2_sql/text_2_sql_core", "text_2_sql/autogen", "deploy_ai_search_indexes", "image_processing"]

[tool.uv.sources]
text_2_sql_core = { workspace = true }
autogen_text_2_sql = { workspace = true }
deploy_ai_search_indexes = { workspace = true }
image_processing = { workspace = true }
text-2-sql-core = { workspace = true }
59 changes: 59 additions & 0 deletions text_2_sql/autogen/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# OpenAI Configuration (Common for all environments)
OpenAI__CompletionDeployment=gpt-4o-mini
OpenAI__MiniCompletionDeployment=gpt-4o-mini
OpenAI__GroupChatModel=4o-mini
OpenAI__EmbeddingModel=text-embedding-ada-002
OpenAI__Endpoint=your_openai_endpoint
OpenAI__ApiKey=your_api_key
OpenAI__ApiVersion=2024-08-01-preview

# Authentication (Common for all environments)
IdentityType=key

### ENVIRONMENT-SPECIFIC SETTINGS ###
# Uncomment only one section at a time

## Spider Evaluation Baseline Settings (using tables.json) ##
#Text2Sql__DatabaseEngine=SQLite
#Text2Sql__UseQueryCache=False
#Text2Sql__PreRunQueryCache=False
#Text2Sql__UseColumnValueStore=False
#Text2Sql__UseAISearch=False
#Text2Sql__DatabaseName=
#Text2Sql__DatabaseConnectionString=

## Spider Evaluation with Enhanced Schema Settings ##
Text2Sql__DatabaseEngine=SQLite
Text2Sql__UseQueryCache=False
Text2Sql__PreRunQueryCache=False
Text2Sql__UseColumnValueStore=True
Text2Sql__UseAISearch=True
Text2Sql__DatabaseName=
Text2Sql__DatabaseConnectionString=
AIService__AzureSearchOptions__Endpoint=your_search_endpoint
AIService__AzureSearchOptions__Key=your_search_key
AIService__AzureSearchOptions__Text2SqlSchemaStore__Index=text-2-sql-schema-store-index-spider-test
AIService__AzureSearchOptions__Text2SqlSchemaStore__SemanticConfig=text-2-sql-schema-store-semantic-config-spider-test
AIService__AzureSearchOptions__Text2SqlQueryCache__Index=text-2-sql-query-cache-index-spider-test
AIService__AzureSearchOptions__Text2SqlQueryCache__SemanticConfig=text-2-sql-query-cache-semantic-config-spider-test
AIService__AzureSearchOptions__Text2SqlColumnValueStore__Index=text-2-sql-column-value-store-index-spider-test
AIService__AzureSearchOptions__Text2SqlSchemaCache__Index=text-2-sql-schema-cache-index-spider-test

## Production Settings (Commented Out) ##
#Text2Sql__DatabaseEngine=TSQL
#Text2Sql__UseQueryCache=True
#Text2Sql__PreRunQueryCache=True
#Text2Sql__UseColumnValueStore=True
#Text2Sql__UseAISearch=True
#Text2Sql__DatabaseName=SalesLT
#Text2Sql__DatabaseConnectionString=your_database_connection_string
#AIService__AzureSearchOptions__Endpoint=your_search_endpoint
#AIService__AzureSearchOptions__Key=your_search_key
#AIService__AzureSearchOptions__RagDocuments__Index=
#AIService__AzureSearchOptions__RagDocuments__SemanticConfig=
#AIService__AzureSearchOptions__Text2SqlSchemaStore__Index=text-2-sql-schema-store-index
#AIService__AzureSearchOptions__Text2SqlSchemaStore__SemanticConfig=text-2-sql-schema-store-semantic-config
#AIService__AzureSearchOptions__Text2SqlQueryCache__Index=text-2-sql-query-cache-index
#AIService__AzureSearchOptions__Text2SqlQueryCache__SemanticConfig=text-2-sql-query-cache-semantic-config
#AIService__AzureSearchOptions__Text2SqlColumnValueStore__Index=text-2-sql-column-value-store-index
#AIService__AzureSearchOptions__Text2SqlSchemaCache__Index=text-2-sql-schema-cache-index
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
Expand Down
Loading