diff --git a/.aws/petercat-preview.toml b/.aws/petercat-preview.toml index c1b39016..db6d228a 100644 --- a/.aws/petercat-preview.toml +++ b/.aws/petercat-preview.toml @@ -7,4 +7,6 @@ region = "ap-northeast-1" confirm_changeset = true capabilities = "CAPABILITY_IAM" disable_rollback = true -image_repositories = ["FastAPIFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/petercatapipreview49199518/fastapifunctionead79d0drepo", "SQSSubscriptionFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/petercatapipreview49199518/sqssubscriptionfunctiona2fc8b7drepo"] +image_repositories = [ + "FastAPIFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/petercatapipreview49199518/fastapifunctionead79d0drepo", +] diff --git a/.aws/petercat-prod.toml b/.aws/petercat-prod.toml index 08f8429f..36e6fe42 100644 --- a/.aws/petercat-prod.toml +++ b/.aws/petercat-prod.toml @@ -7,4 +7,6 @@ region = "ap-northeast-1" confirm_changeset = true capabilities = "CAPABILITY_IAM" disable_rollback = true -image_repositories = ["FastAPIFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/samapp7427b055/fastapifunctionead79d0drepo", "SQSSubscriptionFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/samapp7427b055/sqssubscriptionfunctiona2fc8b7drepo"] +image_repositories = [ + "FastAPIFunction=654654285942.dkr.ecr.ap-northeast-1.amazonaws.com/samapp7427b055/fastapifunctionead79d0drepo", +] diff --git a/.github/workflows/aws-preview.yml b/.github/workflows/aws-preview.yml index fcd59487..774dafd1 100644 --- a/.github/workflows/aws-preview.yml +++ b/.github/workflows/aws-preview.yml @@ -11,7 +11,6 @@ on: paths: - .github/workflows/aws-preview.yml - server/** - - petercat_utils/** - subscriber/** - template.yml @@ -74,8 +73,9 @@ jobs: ParameterKey=APIIdentifier,ParameterValue=${{ secrets.API_IDENTIFIER }} \ ParameterKey=FastAPISecretKey,ParameterValue=${{ secrets.FASTAPI_SECRET_KEY }} \ ParameterKey=SQSQueueName,ParameterValue=${{ secrets.SQS_QUEUE_NAME }} \ - ParameterKey=SQSQueueUrl,ParameterValue=${{ secrets.SQS_QUEUE_URL }} \ ParameterKey=GitHubToken,ParameterValue=${{ secrets.X_GITHUB_TOKEN }} \ ParameterKey=Auth0Domain,ParameterValue=${{ secrets.AUTH0_DOMAIN }} \ ParameterKey=Auth0ClientId,ParameterValue=${{ secrets.AUTH0_CLIENT_ID }} \ - ParameterKey=Auth0ClientSecret,ParameterValue=${{ secrets.AUTH0_CLIENT_SECRET }} + ParameterKey=Auth0ClientSecret,ParameterValue=${{ secrets.AUTH0_CLIENT_SECRET }} \ + ParameterKey=WhiskerApiKey,ParameterValue=${{ secrets.WHISKER_API_KEY }} \ + ParameterKey=WhiskerApiUrl,ParameterValue=${{ secrets.WHISKER_API_URL }} \ No newline at end of file diff --git a/.github/workflows/aws-prod.yml b/.github/workflows/aws-prod.yml index b72deea4..935d25ab 100644 --- a/.github/workflows/aws-prod.yml +++ b/.github/workflows/aws-prod.yml @@ -6,7 +6,6 @@ on: paths: - .github/workflows/aws-prod.yml - server/** - - petercat_utils/** - subscriber/** - template.yml @@ -68,8 +67,9 @@ jobs: ParameterKey=APIIdentifier,ParameterValue=${{ secrets.API_IDENTIFIER }} \ ParameterKey=FastAPISecretKey,ParameterValue=${{ secrets.FASTAPI_SECRET_KEY }} \ ParameterKey=SQSQueueName,ParameterValue=${{ secrets.SQS_QUEUE_NAME }} \ - ParameterKey=SQSQueueUrl,ParameterValue=${{ secrets.SQS_QUEUE_URL }} \ ParameterKey=GitHubToken,ParameterValue=${{ secrets.X_GITHUB_TOKEN }} \ ParameterKey=Auth0Domain,ParameterValue=${{ secrets.AUTH0_DOMAIN }} \ ParameterKey=Auth0ClientId,ParameterValue=${{ secrets.AUTH0_CLIENT_ID }} \ - ParameterKey=Auth0ClientSecret,ParameterValue=${{ secrets.AUTH0_CLIENT_SECRET }} + ParameterKey=Auth0ClientSecret,ParameterValue=${{ secrets.AUTH0_CLIENT_SECRET }} \ + ParameterKey=WhiskerApiKey,ParameterValue=${{ secrets.WHISKER_API_KEY }} \ + ParameterKey=WhiskerApiUrl,ParameterValue=${{ secrets.WHISKER_API_URL }} diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index 06f38658..c06529ef 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -6,7 +6,6 @@ on: paths: - .github/workflows/** - server/** - - petercat_utils/** - subscriber/** permissions: @@ -52,6 +51,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt + pip install -r requirements-dev.txt pip install ruff pip install pytest pytest-cov diff --git a/.vscode/settings.json b/.vscode/settings.json index a04d963d..6511be99 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,5 +2,10 @@ "prettier.configPath": "./client/.prettierrc.js", "python.analysis.extraPaths": [ "./server" - ] + ], + "python.testing.pytestArgs": [ + "server" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/README.en-US.md b/README.en-US.md index 80f25e2e..815e9141 100644 --- a/README.en-US.md +++ b/README.en-US.md @@ -110,7 +110,6 @@ The project requires environment variables to be set: | `LLM_TOKEN_PUBLIC_NAME` | Optional | The name of the LLM signing public key managed by AWS. If configured, Petercat will use the RSA algorithm to manage the user's LLM Token. | `prod/petercat/llm/pub` | | `STATIC_KEYPAIR_ID` | Optional | The Key Pair ID for AWS CloudFront. If configured, CloudFront signed URLs will be used to protect your resources. For more information, see the [AWS documentation](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html). | `APKxxxxxxxx` | | `S3_TEMP_BUCKET_NAME` | Required | AWS S3 bucket for temporary image files | `xxx-temp` | -| `SQS_QUEUE_URL` | Required | AWS SQS queue URL | `https://sqs.ap-northeast-1.amazonaws.com/xxx/petercat-task-queue` | | **Supabase Related Environment Variables** | | `SUPABASE_URL` | Required | Supabase service URL, found [here](https://supabase.com/dashboard/project/_/settings/database) | `https://***.supabase.co` | | `SUPABASE_SERVICE_KEY` | Required | Supabase service key, found [here](https://supabase.com/dashboard/project/_/settings/database) | `{{SUPABASE_SERVICE_KEY}}` | @@ -132,7 +131,9 @@ The project requires environment variables to be set: | `RATE_LIMIT_ENABLED` | Optional | Whether rate limiting is enabled | `True` | | `RATE_LIMIT_REQUESTS` | Optional | Number of requests for rate limiting | `100` | | `RATE_LIMIT_DURATION` | Optional | Duration for rate limiting (in minutes) | `1` | - +| **RAG server config** | +| `WHISKER_API_URL` | Required | WHISKER RAG Server Path | `http://....` | +| `WHISKER_API_KEY` | Required | WHISKER RAG Server KEY | `sk-xxxx` | ## 🤝 Contributing > PeterCat uses yarn as the package manager. diff --git a/README.ja-JP.md b/README.ja-JP.md index c7b23a43..e8e1dd70 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -1,21 +1,14 @@ -![Frame 294](https://github.com/user-attachments/assets/0656ab69-4352-452b-a8f4-1c05cec108d1) -![Frame 292](https://github.com/user-attachments/assets/49db0363-3f89-48a1-ba2b-e30bd5d083b3) +![Frame 294](https://github.com/user-attachments/assets/0656ab69-4352-452b-a8f4-1c05cec108d1) ![Frame 292](https://github.com/user-attachments/assets/49db0363-3f89-48a1-ba2b-e30bd5d083b3) # PeterCat
- [简体中文](./README.md) | [English](./README.en-US.md) | 日本語 + [简体中文](./README.md) | [English](./README.en-US.md) | 日本語 - **コミュニティメンテナと開発者のために特別に設計されたインテリジェントQ&Aボットソリューション。** +**コミュニティメンテナと開発者のために特別に設計されたインテリジェント Q&A ボットソリューション。** - [![npm](https://img.shields.io/npm/dm/@petercatai/assistant.svg)](https://www.npmjs.com/package/@petercatai/assistant) - [![Version](https://img.shields.io/npm/v/petercat-lui/latest.svg)](https://www.npmjs.com/package/petercat-lui) - [![DockerHub Version](https://img.shields.io/docker/v/petercatai/petercat?logo=docker&logoColor=white)](https://hub.docker.com/r/petercatai/petercat) - [![CI Test Status](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml/badge.svg)](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml) - [![codecov](https://codecov.io/github/petercat-ai/petercat/graph/badge.svg?token=2HAO18FB6X)](https://codecov.io/github/petercat-ai/petercat) - [![License](https://img.shields.io/badge/License-MIT%40Peter%20Cat-yellow.svg)](https://github.com/petercat-ai/petercat/blob/master/LICENSE) - +[![npm](https://img.shields.io/npm/dm/@petercatai/assistant.svg)](https://www.npmjs.com/package/@petercatai/assistant) [![Version](https://img.shields.io/npm/v/petercat-lui/latest.svg)](https://www.npmjs.com/package/petercat-lui) [![DockerHub Version](https://img.shields.io/docker/v/petercatai/petercat?logo=docker&logoColor=white)](https://hub.docker.com/r/petercatai/petercat) [![CI Test Status](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml/badge.svg)](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml) [![codecov](https://codecov.io/github/petercat-ai/petercat/graph/badge.svg?token=2HAO18FB6X)](https://codecov.io/github/petercat-ai/petercat) [![License](https://img.shields.io/badge/License-MIT%40Peter%20Cat-yellow.svg)](https://github.com/petercat-ai/petercat/blob/master/LICENSE)
@@ -25,26 +18,26 @@ ## ✨ 特徴 -会話型Q&Aエージェントの構成システム、自ホスト型デプロイメントソリューション、および便利なオールインワンアプリケーションSDKを提供し、GitHubリポジトリのためのインテリジェントQ&Aボットをワンクリックで作成し、さまざまな公式ウェブサイトやプロジェクトに迅速に統合し、コミュニティのためのより効率的な技術サポートエコシステムを提供します。 +会話型 Q&A エージェントの構成システム、自ホスト型デプロイメントソリューション、および便利なオールインワンアプリケーション SDK を提供し、GitHub リポジトリのためのインテリジェント Q&A ボットをワンクリックで作成し、さまざまな公式ウェブサイトやプロジェクトに迅速に統合し、コミュニティのためのより効率的な技術サポートエコシステムを提供します。 ### 会話で作成 -リポジトリのアドレスまたは名前を提供するだけで、Peter Catがボットの作成プロセス全体を自動的に完了します。 +リポジトリのアドレスまたは名前を提供するだけで、Peter Cat がボットの作成プロセス全体を自動的に完了します。 ![Create with Conversations](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*aQ9uRoNZGe8AAAAAAAAAAAAADrPSAQ/original) ### 自動化されたナレッジベース -ボットが作成されると、関連するすべてのGitHubドキュメントと問題が自動的にナレッジベースに追加され、ボットの知識源となります。 +ボットが作成されると、関連するすべての GitHub ドキュメントと問題が自動的にナレッジベースに追加され、ボットの知識源となります。 ![Automated Knowledge Base](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*hkkFSaR1UqgAAAAAAAAAAAAADrPSAQ/original) ### マルチプラットフォーム統合 -公式ウェブサイトへのSDK統合やGitHubリポジトリへのワンクリックインストールなど、さまざまな統合オプションを提供します。 +公式ウェブサイトへの SDK 統合や GitHub リポジトリへのワンクリックインストールなど、さまざまな統合オプションを提供します。 | ![Website](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*REw3QYgdJ44AAAAAAAAAAAAADrPSAQ/original) | ![GitHub](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*jlYzSqlcpRIAAAAAAAAAAAAADrPSAQ/original) | -|:----------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------:| +| :-: | :-: | ## エージェントワークフロー @@ -55,70 +48,71 @@ デプロイメントソリューション: [AWS](https://aws.amazon.com) + [Supabase](https://supabase.com) 完全なガイドはこちらで見つけることができます: + - [Self-Hosting - Start the Service Locally](./docs/guides/self_hosted_local.md) - [Self-Hosting - Deploy to AWS](./docs/guides/self_hosted_aws.md) - ![Deployment Solution](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*0_aUTJpyx1YAAAAAAAAAAAAADrPSAQ/original) [![Self hosted Video](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*spdZSbWsVhkAAAAAAAAAAAAADrPSAQ/fmt.webp)](https://www.youtube.com/watch?v=Al6R9Ye5mBY) - ## ⚙️ 環境変数 プロジェクトには環境変数の設定が必要です: ### クライアント -`.env.local` -| 環境変数 | タイプ | 説明 | 例 | -|----------------------------|---------|--------------------------------------------------|---------------------------------------------| -| `NEXT_PUBLIC_API_DOMAIN` | 必須 | バックエンドサービスのAPIドメイン | `https://api.petercat.ai` | +`.env.local` +| 環境変数 | タイプ | 説明 | 例 | +| --- | --- | --- | --- | +| `NEXT_PUBLIC_API_DOMAIN` | 必須 | バックエンドサービスの API ドメイン | `https://api.petercat.ai` | ### サーバー `.env` -| 環境変数 | タイプ | 説明 | 例 | -|----------------------------|--------------------------------------------|-----------------------------------------------|---------------------------------------------| -| **基本アプリケーション環境変数** | -| `API_URL` | 必須 | バックエンドサービスのAPIドメイン | `https://api.petercat.ai` | -| `WEB_URL` | 必須 | フロントエンドウェブサービスのドメイン | `https://petercat.ai` | -| `STATIC_URL` | 必須 | 静的リソースドメイン | `https://static.petercat.ai` | -| **AWS関連環境変数** | -| `X_GITHUB_SECRET_NAME` | 必須 | AWSシークレットファイル名 | `prod/githubapp/petercat/pem` | -| `STATIC_SECRET_NAME` | オプション | AWSが管理するCloudFrontのプライベートキーの名前。設定されている場合、CloudFrontの署名付きURLが使用され、リソースが保護されます。詳細については、[AWSドキュメント](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)を参照してください。 | `prod/petercat/static` | -| `LLM_TOKEN_SECRET_NAME` | オプション | AWSが管理するLLM署名プライベートキーの名前。設定されている場合、PetercatはRSAアルゴリズムを使用してユーザーのLLMトークンを管理します。 | `prod/petercat/llm` | -| `LLM_TOKEN_PUBLIC_NAME` | オプション | AWSが管理するLLM署名公開キーの名前。設定されている場合、PetercatはRSAアルゴリズムを使用してユーザーのLLMトークンを管理します。 | `prod/petercat/llm/pub` | -| `STATIC_KEYPAIR_ID` | オプション | AWS CloudFrontのキーID。設定されている場合、CloudFrontの署名付きURLが使用され、リソースが保護されます。詳細については、[AWSドキュメント](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)を参照してください。 | `APKxxxxxxxx` | -| `S3_TEMP_BUCKET_NAME` | 必須 | 一時的な画像ファイル用のAWS S3バケット | `xxx-temp` | -| `SQS_QUEUE_URL` | 必須 | AWS SQSキューURL | `https://sqs.ap-northeast-1.amazonaws.com/xxx/petercat-task-queue` | -| **Supabase関連環境変数** | -| `SUPABASE_URL` | 必須 | SupabaseサービスURL、[こちら](https://supabase.com/dashboard/project/_/settings/database)で見つけることができます | `https://***.supabase.co` | -| `SUPABASE_SERVICE_KEY` | 必須 | Supabaseサービスキー、[こちら](https://supabase.com/dashboard/project/_/settings/database)で見つけることができます | `{{SUPABASE_SERVICE_KEY}}` | -| **Auth0関連環境変数** | -| `AUTH0_DOMAIN` | 必須 | Auth0ドメイン、Auth0 / Application / Basic Informationから取得 | `petercat.us.auth0.com` | -| `AUTH0_CLIENT_ID` | 必須 | Auth0クライアントID、Auth0 / Application / Basic Informationから取得 | `artfiUxxxx` | -| `AUTH0_CLIENT_SECRET` | 必須 | Auth0クライアントシークレット、Auth0 / Application / Basic Informationから取得 | `xxxx-xxxx-xxx` | -| `API_IDENTIFIER` | 必須 | Auth0 API識別子 | `https://petercat.us.auth0.com/api/v2/` | -| **LLM関連環境変数** | -| `OPENAI_API_KEY` | 必須 | OpenAI APIキー | `sk-xxxx` | -| `OPENAI_BASE_URL` | オプション | APIリクエストのためのベースURL。プロキシやサービスエミュレーターを使用する場合のみ指定します。 | `https://api.openai.com/v1` | -| `GEMINI_API_KEY` | オプション | Gemini APIキー | `xxxx` | -| `TAVILY_API_KEY` | オプション | Tavily APIキー | `tvly-xxxxx` | -| **GitHub App登録環境変数** | -| `X_GITHUB_APP_ID` | オプション | GitHub App ID | `123456` | -| `X_GITHUB_APPS_CLIENT_ID` | オプション | GitHub AppクライアントID | `Iv1.xxxxxxx` | -| `X_GITHUB_APPS_CLIENT_SECRET` | オプション | GitHub Appクライアントシークレット | `xxxxxxxx` | -| **レート制限構成** | -| `RATE_LIMIT_ENABLED` | オプション | レート制限が有効かどうか | `True` | -| `RATE_LIMIT_REQUESTS` | オプション | レート制限のリクエスト数 | `100` | -| `RATE_LIMIT_DURATION` | オプション | レート制限の期間(分単位) | `1` | +| 環境変数 | タイプ | 説明 | 例 | +| --- | --- | --- | --- | +| **基本アプリケーション環境変数** | +| `API_URL` | 必須 | バックエンドサービスの API ドメイン | `https://api.petercat.ai` | +| `WEB_URL` | 必須 | フロントエンドウェブサービスのドメイン | `https://petercat.ai` | +| `STATIC_URL` | 必須 | 静的リソースドメイン | `https://static.petercat.ai` | +| **AWS 関連環境変数** | +| `X_GITHUB_SECRET_NAME` | 必須 | AWS シークレットファイル名 | `prod/githubapp/petercat/pem` | +| `STATIC_SECRET_NAME` | オプション | AWS が管理する CloudFront のプライベートキーの名前。設定されている場合、CloudFront の署名付き URL が使用され、リソースが保護されます。詳細については、[AWS ドキュメント](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)を参照してください。 | `prod/petercat/static` | +| `LLM_TOKEN_SECRET_NAME` | オプション | AWS が管理する LLM 署名プライベートキーの名前。設定されている場合、Petercat は RSA アルゴリズムを使用してユーザーの LLM トークンを管理します。 | `prod/petercat/llm` | +| `LLM_TOKEN_PUBLIC_NAME` | オプション | AWS が管理する LLM 署名公開キーの名前。設定されている場合、Petercat は RSA アルゴリズムを使用してユーザーの LLM トークンを管理します。 | `prod/petercat/llm/pub` | +| `STATIC_KEYPAIR_ID` | オプション | AWS CloudFront のキー ID。設定されている場合、CloudFront の署名付き URL が使用され、リソースが保護されます。詳細については、[AWS ドキュメント](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)を参照してください。 | `APKxxxxxxxx` | +| `S3_TEMP_BUCKET_NAME` | 必須 | 一時的な画像ファイル用の AWS S3 バケット | `xxx-temp` | +| **Supabase 関連環境変数** | +| `SUPABASE_URL` | 必須 | Supabase サービス URL、[こちら](https://supabase.com/dashboard/project/_/settings/database)で見つけることができます | `https://***.supabase.co` | +| `SUPABASE_SERVICE_KEY` | 必須 | Supabase サービスキー、[こちら](https://supabase.com/dashboard/project/_/settings/database)で見つけることができます | `{{SUPABASE_SERVICE_KEY}}` | +| **Auth0 関連環境変数** | +| `AUTH0_DOMAIN` | 必須 | Auth0 ドメイン、Auth0 / Application / Basic Information から取得 | `petercat.us.auth0.com` | +| `AUTH0_CLIENT_ID` | 必須 | Auth0 クライアント ID、Auth0 / Application / Basic Information から取得 | `artfiUxxxx` | +| `AUTH0_CLIENT_SECRET` | 必須 | Auth0 クライアントシークレット、Auth0 / Application / Basic Information から取得 | `xxxx-xxxx-xxx` | +| `API_IDENTIFIER` | 必須 | Auth0 API 識別子 | `https://petercat.us.auth0.com/api/v2/` | +| **LLM 関連環境変数** | +| `OPENAI_API_KEY` | 必須 | OpenAI API キー | `sk-xxxx` | +| `OPENAI_BASE_URL` | オプション | API リクエストのためのベース URL。プロキシやサービスエミュレーターを使用する場合のみ指定します。 | `https://api.openai.com/v1` | +| `GEMINI_API_KEY` | オプション | Gemini API キー | `xxxx` | +| `TAVILY_API_KEY` | オプション | Tavily API キー | `tvly-xxxxx` | +| **GitHub App 登録環境変数** | +| `X_GITHUB_APP_ID` | オプション | GitHub App ID | `123456` | +| `X_GITHUB_APPS_CLIENT_ID` | オプション | GitHub App クライアント ID | `Iv1.xxxxxxx` | +| `X_GITHUB_APPS_CLIENT_SECRET` | オプション | GitHub App クライアントシークレット | `xxxxxxxx` | +| **レート制限構成** | +| `RATE_LIMIT_ENABLED` | オプション | レート制限が有効かどうか | `True` | +| `RATE_LIMIT_REQUESTS` | オプション | レート制限のリクエスト数 | `100` | +| `RATE_LIMIT_DURATION` | オプション | レート制限の期間(分単位) | `1` | +| **RAG server config** | +| `WHISKER_API_URL` | 必須 | WHISKER RAG Server Path | `http://....` | +| `WHISKER_API_KEY` | 必須 | WHISKER RAG Server KEY | `sk-xxxx` | ## 🤝 貢献 -> Peter Catはyarnをパッケージマネージャーとして使用しています。 +> Peter Cat は yarn をパッケージマネージャーとして使用しています。 ```bash git clone https://github.com/petercat-ai/petercat.git @@ -154,17 +148,16 @@ yarn run build:pypi yarn run publish:pypi ``` - ## 💼 エンタープライズ統合 -プロジェクトのアドレス、使用シナリオ、使用頻度などの情報を[petercat.assistant@gmail.com ](petercat.assistant@gmail.com )に送信してください。 +プロジェクトのアドレス、使用シナリオ、使用頻度などの情報を[petercat.assistant@gmail.com ](petercat.assistant@gmail.com)に送信してください。 ## 📧 問題の報告 -Peter Catはまだ成長段階にあり、時折「癇癪」を起こすことがあります。以下のチャンネルを通じて問題を報告してください: +Peter Cat はまだ成長段階にあり、時折「癇癪」を起こすことがあります。以下のチャンネルを通じて問題を報告してください: -* [Submit an Issue(https://github.com/petercat-ai/petercat/issues/new/choose) -* [Discussions](https://github.com/petercat-ai/petercat/discussions) +- [Submit an Issue(https://github.com/petercat-ai/petercat/issues/new/choose) +- [Discussions](https://github.com/petercat-ai/petercat/discussions) 👬 貢献者 diff --git a/README.md b/README.md index bf0bb5e8..d91e9be1 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,20 @@ -![Frame 303](https://github.com/user-attachments/assets/01cf3bb2-650a-47a4-88b3-a63f961ae5e2) -![image](https://github.com/user-attachments/assets/77670023-4fab-4ac9-bbe1-c7ea36363e02) +![Frame 303](https://github.com/user-attachments/assets/01cf3bb2-650a-47a4-88b3-a63f961ae5e2) ![image](https://github.com/user-attachments/assets/77670023-4fab-4ac9-bbe1-c7ea36363e02)

PeterCat

- 简体中文 | [English](./README.en-US.md) | [日本語](./README.ja-JP.md) - - **专为社区维护者和开发者打造的智能答疑机器人解决方案。** - - [![npm](https://img.shields.io/npm/dm/@petercatai/assistant.svg)](https://www.npmjs.com/package/@petercatai/assistant) - [![Version](https://img.shields.io/npm/v/petercat-lui/latest.svg)](https://www.npmjs.com/package/petercat-lui) - [![DockerHub Version](https://img.shields.io/docker/v/petercatai/petercat?logo=docker&logoColor=white)](https://hub.docker.com/r/petercatai/petercat) - [![CI Test Status](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml/badge.svg)](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml) - [![codecov](https://codecov.io/github/petercat-ai/petercat/graph/badge.svg?token=2HAO18FB6X)](https://codecov.io/github/petercat-ai/petercat) - [![License](https://img.shields.io/badge/License-MIT%40Peter%20Cat-yellow.svg)](https://github.com/petercat-ai/petercat/blob/master/LICENSE) - + 简体中文 | [English](./README.en-US.md) | [日本語](./README.ja-JP.md) + +**专为社区维护者和开发者打造的智能答疑机器人解决方案。** + +[![npm](https://img.shields.io/npm/dm/@petercatai/assistant.svg)](https://www.npmjs.com/package/@petercatai/assistant) [![Version](https://img.shields.io/npm/v/petercat-lui/latest.svg)](https://www.npmjs.com/package/petercat-lui) [![DockerHub Version](https://img.shields.io/docker/v/petercatai/petercat?logo=docker&logoColor=white)](https://hub.docker.com/r/petercatai/petercat) [![CI Test Status](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml/badge.svg)](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml) [![codecov](https://codecov.io/github/petercat-ai/petercat/graph/badge.svg?token=2HAO18FB6X)](https://codecov.io/github/petercat-ai/petercat) [![License](https://img.shields.io/badge/License-MIT%40Peter%20Cat-yellow.svg)](https://github.com/petercat-ai/petercat/blob/master/LICENSE) +
- ## 🏠 主页 -[🐱窝: petercat.ai](https://petercat.ai) - +[🐱 窝: petercat.ai](https://petercat.ai) ## ✨ 特性 @@ -34,40 +26,37 @@ ![对话即创造](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*aQ9uRoNZGe8AAAAAAAAAAAAADrPSAQ/original) - ### 知识自动入库 -机器人创建后,所有相关Github 文档和 issue 将自动入库,作为机器人的知识依据 +机器人创建后,所有相关 Github 文档和 issue 将自动入库,作为机器人的知识依据 ![知识自动入库](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*hkkFSaR1UqgAAAAAAAAAAAAADrPSAQ/original) ### 多平台集成 -多种集成方式自由选择,如对话应用 SDK 集成至官网,Github APP一键安装至 Github 仓库等 +多种集成方式自由选择,如对话应用 SDK 集成至官网,Github APP 一键安装至 Github 仓库等 | ![官网](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*REw3QYgdJ44AAAAAAAAAAAAADrPSAQ/original) | ![GitHub](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*jlYzSqlcpRIAAAAAAAAAAAAADrPSAQ/original) | -|:--------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------:| +| :-: | :-: | [完整演示视频](https://www.bilibili.com/video/BV1KiHUeFE4p) ### 不止是 QA 机器人 -| 项目信息查询 | 回复 Discussion | -| ----------------------------------------- | --------------------------------------- | -| ![search_repo](https://github.com/user-attachments/assets/a7e6d37b-4674-4fd0-a89b-678e10ec01c8) | ![ discussion replay](https://github.com/user-attachments/assets/e28a3ded-dc6c-4ba5-9543-05c41bbff331)| +| 项目信息查询 | 回复 Discussion | +| --- | --- | +| ![search_repo](https://github.com/user-attachments/assets/a7e6d37b-4674-4fd0-a89b-678e10ec01c8) | ![ discussion replay](https://github.com/user-attachments/assets/e28a3ded-dc6c-4ba5-9543-05c41bbff331) | +| PR Summary | Code Review | +| --- | --- | +| ![image](https://github.com/user-attachments/assets/28bd546b-0c00-48a2-a57e-982448d37ef2) | ![image](https://github.com/user-attachments/assets/a39c4d71-1368-4508-bca4-018a00549528) | | -| PR Summary | Code Review | -| ----------------------------------------- | --------------------------------------- | -| ![image](https://github.com/user-attachments/assets/28bd546b-0c00-48a2-a57e-982448d37ef2)| ![image](https://github.com/user-attachments/assets/a39c4d71-1368-4508-bca4-018a00549528) | - -| 查 Issue | 提 Issue | 回 Issue | -| ----------------------------------------- | --------------------------------------- | --------------------------------------- | -| ![image](https://github.com/user-attachments/assets/501c6ba0-20c4-480f-97ff-1f20d0a99136)| ![image](https://github.com/user-attachments/assets/d020b03d-74cd-49d2-a199-5d21154b7793)| ![image](https://github.com/user-attachments/assets/f6093cb1-b089-4ac9-ad2c-f1c8126fb86b) | +| 查 Issue | 提 Issue | 回 Issue | +| --- | --- | --- | +| ![image](https://github.com/user-attachments/assets/501c6ba0-20c4-480f-97ff-1f20d0a99136) | ![image](https://github.com/user-attachments/assets/d020b03d-74cd-49d2-a199-5d21154b7793) | ![image](https://github.com/user-attachments/assets/f6093cb1-b089-4ac9-ad2c-f1c8126fb86b) | [完整演示视频](https://www.bilibili.com/video/BV12eHUe8EkT/) - ## Agent 工作流 我们为猫猫预置了一个创建机器人的机器人,当得到用户 GitHub 仓库地址或名称时,它会使用创建工具,生成该仓库答疑机器人的各项配置(Prompt,、名字、 头像、开场白、引导语、工具集……),同时触发 Issue 和 Markdown 的入库任务。这些任务会拆分为多个子任务,将该仓库的所有已解决 issue 、高票回复以及所有 Markdown 文件内容经过 load -> split -> embed -> store 的加工过程进行知识库构建,作为机器人的回复知识依据。 @@ -79,6 +68,7 @@ 部署方案:[AWS](https://aws.amazon.com) + [Supabase](https://supabase.com) 你可以在这里看到完整方案: + - [私有化部署 - 本地启动服务](./docs/guides/self_hosted_local_cn.md) - [私有化部署 - 部署到 AWS ](./docs/guides/self_hosted_aws_cn.md) @@ -86,61 +76,59 @@ [![演示视频](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*spdZSbWsVhkAAAAAAAAAAAAADrPSAQ/fmt.webp)](https://www.youtube.com/watch?v=Al6R9Ye5mBY) - - ## ⚙️ 环境变量 本项目需要进行环境变量进行设置: ### Client -`.env.local` +`.env.local` -| 环境变量 | 类型 | 描述 | 示例 | -| ------------------- | ---- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -| `NEXT_PUBLIC_API_DOMAIN` | 必选 | 后端服务的 API 域名。 | `https://api.petercat.ai` | - +| 环境变量 | 类型 | 描述 | 示例 | +| --- | --- | --- | --- | +| `NEXT_PUBLIC_API_DOMAIN` | 必选 | 后端服务的 API 域名。 | `https://api.petercat.ai` | ### Server `.env` - -| 环境变量 | 类型          | 描述 | 示例 | -| ------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -| **应用基础环境变量** | -| `API_URL` | 必选 | 后端服务的 API 域名 | `https://api.petercat.ai` -| `WEB_URL` | 必选 | 前端 Web 服务的域名 | `https://petercat.ai` -| `STATIC_URL` | 必选 | 静态资源域名 | `https://static.petercat.ai` -| **AWS 相关环境变量** | -| `X_GITHUB_SECRET_NAME` | 必选 | AWS 托管的 Github 私钥文件名 | `prod/githubapp/petercat/pem` +| 环境变量 | 类型          | 描述 | 示例 | +| --- | --- | --- | --- | +| **应用基础环境变量** | +| `API_URL` | 必选 | 后端服务的 API 域名 | `https://api.petercat.ai` | +| `WEB_URL` | 必选 | 前端 Web 服务的域名 | `https://petercat.ai` | +| `STATIC_URL` | 必选 | 静态资源域名 | `https://static.petercat.ai` | +| **AWS 相关环境变量** | +| `X_GITHUB_SECRET_NAME` | 必选 | AWS 托管的 Github 私钥文件名 | `prod/githubapp/petercat/pem` | | `STATIC_SECRET_NAME` | 可选 | AWS 托管的 CloudFront 签名私钥名称。如果配置了该项,将使用 CloudFront 签名 URL 来保护你的资源。更多信息请参阅 [AWS 文档](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)。 | `prod/petercat/static` | | `LLM_TOKEN_SECRET_NAME` | 可选 | AWS 托管的 llm 签名私钥名称。如果配置了该项,petercat 将使用 RSA 算法托管用户的 LLM Token | `prod/petercat/llm` | | `LLM_TOKEN_PUBLIC_NAME` | 可选 | AWS 托管的 llm 签名公钥名称。如果配置了该项,petercat 将使用 RSA 算法托管用户的 LLM Token | `prod/petercat/llm/pub` | | `STATIC_KEYPAIR_ID` | 可选 | AWS CloudFront 的 Key Pair ID。如果配置了该项,将使用 CloudFront 签名 URL 来保护你的资源。更多信息请参阅 [AWS 文档](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)。 | `APKxxxxxxxx` | -| `S3_TEMP_BUCKET_NAME` | 可选 | 用于托管 AWS 临时图片文件 S3 的 bucket | `xxx-temp` -| `SQS_QUEUE_URL`| 必选 | AWS SQS 消息队列 URL | `https://sqs.ap-northeast-1.amazonaws.com/xxx/petercat-task-queue` +| `S3_TEMP_BUCKET_NAME` | 可选 | 用于托管 AWS 临时图片文件 S3 的 bucket | `xxx-temp` | | **SUPABASE 相关 env** | -| `SUPABASE_URL` | 必选 | supabase 服务的 URL,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `https://***.supabase.co` | -| `SUPABASE_SERVICE_KEY` | 必选 | supabase 服务密钥,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `{{SUPABASE_SERVICE_KEY}}` | -| **Auth0 相关 env**| -| `AUTH0_DOMAIN` | 必选 | auth0 服务域名,从 auth0 / Application / Basic Information 下获取 | `petercat.us.auth0.com` -| `AUTH0_CLIENT_ID` | 必选 | auth0 客户端 ID,从 auth0 / Application / Basic Information 下获取 | `artfiUxxxx` -| `AUTH0_CLIENT_SECRET` | 必选 | auth0 客户端密钥, 从 auth0 / Application / Basic Information 下获取 | `xxxx-xxxx-xxx` -| `API_IDENTIFIER` | 必选 | auth0 的 API Identifier | `https://petercat.us.auth0.com/api/v2/` -| **LLM相关的 env** | -| `OPENAI_API_KEY` | 必选 | OpenAI 的密钥 | `sk-xxxx` -| `OPENAI_BASE_URL` | 可选 | API 请求的基础 URL。仅在使用代理或服务模拟器时指定。| `https://api.openai.com/v1` -| `GEMINI_API_KEY` | 可选 | Gemini 的密钥 | `xxxx` -| `TAVILY_API_KEY` | 必选 | Tavily 的密钥 | `tvly-xxxxx` -| **注册为 Github App 的 env** | -| `X_GITHUB_APP_ID` | 可选 | 注册为 Github App 时,APPID | `123456` -| `X_GITHUB_APPS_CLIENT_ID` | 可选 | 注册为 Github App 时,APP 的 Client ID | `Iv1.xxxxxxx` -| `X_GITHUB_APPS_CLIENT_SECRET` | 可选 | 注册为 Github App 时,APP 的 Client 密钥 | `xxxxxxxx` -| **限流配置** | -| `RATE_LIMIT_ENABLED` | 可选 | 限流配置是否开启 | `True` -| `RATE_LIMIT_REQUESTS` | 可选 | 限流的请求数量 | `100` -| `RATE_LIMIT_DURATION` | 可选 | 限流的统计时长,单位为分钟 | `1` +| `SUPABASE_URL` | 必选 | supabase 服务的 URL,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `https://***.supabase.co` | +| `SUPABASE_SERVICE_KEY` | 必选 | supabase 服务密钥,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `{{SUPABASE_SERVICE_KEY}}` | +| **Auth0 相关 env** | +| `AUTH0_DOMAIN` | 必选 | auth0 服务域名,从 auth0 / Application / Basic Information 下获取 | `petercat.us.auth0.com` | +| `AUTH0_CLIENT_ID` | 必选 | auth0 客户端 ID,从 auth0 / Application / Basic Information 下获取 | `artfiUxxxx` | +| `AUTH0_CLIENT_SECRET` | 必选 | auth0 客户端密钥, 从 auth0 / Application / Basic Information 下获取 | `xxxx-xxxx-xxx` | +| `API_IDENTIFIER` | 必选 | auth0 的 API Identifier | `https://petercat.us.auth0.com/api/v2/` | +| **LLM 相关的 env** | +| `OPENAI_API_KEY` | 必选 | OpenAI 的密钥 | `sk-xxxx` | +| `OPENAI_BASE_URL` | 可选 | API 请求的基础 URL。仅在使用代理或服务模拟器时指定。 | `https://api.openai.com/v1` | +| `GEMINI_API_KEY` | 可选 | Gemini 的密钥 | `xxxx` | +| `TAVILY_API_KEY` | 必选 | Tavily 的密钥 | `tvly-xxxxx` | +| **注册为 Github App 的 env** | +| `X_GITHUB_APP_ID` | 可选 | 注册为 Github App 时,APPID | `123456` | +| `X_GITHUB_APPS_CLIENT_ID` | 可选 | 注册为 Github App 时,APP 的 Client ID | `Iv1.xxxxxxx` | +| `X_GITHUB_APPS_CLIENT_SECRET` | 可选 | 注册为 Github App 时,APP 的 Client 密钥 | `xxxxxxxx` | +| **限流配置** | +| `RATE_LIMIT_ENABLED` | 可选 | 限流配置是否开启 | `True` | +| `RATE_LIMIT_REQUESTS` | 可选 | 限流的请求数量 | `100` | +| `RATE_LIMIT_DURATION` | 可选 | 限流的统计时长,单位为分钟 | `1` | +| **RAG 服务配置** | +| `WHISKER_API_URL` | 必选 | WHISKER RAG 服务地址 | `http://....` | +| `WHISKER_API_KEY` | 必选 | WHISKER RAG 服务的 KEY | `sk-xxxx` | ## 🤝 参与贡献 @@ -181,19 +169,16 @@ yarn run publish:pypi ``` - ## 💼 企业版接入 -请把您的项目地址,使用场景,使用频率等信息发送至 [petercat.assistant@gmail.com ](petercat.assistant@gmail.com) - +请把您的项目地址,使用场景,使用频率等信息发送至 [petercat.assistant@gmail.com ](petercat.assistant@gmail.com) ## 📧 反馈问题 猫猫还在养成阶段,难免有些 “小脾气”,遇到问题请对它宽容一些,可以通过以下两种途径告知铲屎官: - -* [提交 Issue](https://github.com/petercat-ai/petercat/issues/new/choose) -* [Discussions](https://github.com/petercat-ai/petercat/discussions) 提问 +- [提交 Issue](https://github.com/petercat-ai/petercat/issues/new/choose) +- [Discussions](https://github.com/petercat-ai/petercat/discussions) 提问 ## 👬 Contributors diff --git a/client/.kiwi/en/chunk.ts b/client/.kiwi/en/chunk.ts new file mode 100644 index 00000000..c53700ed --- /dev/null +++ b/client/.kiwi/en/chunk.ts @@ -0,0 +1,5 @@ +export default { + page: { + fanHui: 'Back', + }, +}; diff --git a/client/.kiwi/en/components.ts b/client/.kiwi/en/components.ts index 5e757c95..28dff3b0 100644 --- a/client/.kiwi/en/components.ts +++ b/client/.kiwi/en/components.ts @@ -92,4 +92,33 @@ export default { tOKEN: 'Token Management', dengLu: 'Login', }, + Header: { + gengXinDaiMaCang: 'Update Code Repository', + zhiShiLieBiao: 'Knowledge List', + bianJiJiQiRen: 'Edit Bot', + gengXinDaiMaCang2: 'Failed to update code repository {val1}', + chengGongGengXinDai: 'Successfully updated code repository', + }, + KnowledgeList: { + keYongZhuangTai: 'Available Status', + daXiao: 'Size', + xiangLiangHuaMoXing: 'Vector Model', + zhiShiLaiYuan: 'Knowledge Source', + leiXing: 'Type', + zhiShiMing: 'Knowledge Name', + }, + TaskButton: { + chaKanRenWu: 'View Task', + chongXinZhiXing: 'Retry', + shiBai: 'Failed', + ruKuZhong: 'Running', + dengDaiZhong: 'Waiting', + chongShiSuoXuanRen: 'Retry selected tasks', + }, + TaskList: { + meiYouChaXunDao: 'No tasks found', + }, + ChunkList: { + fenKuaiNeiRong: 'Chunk content', + } }; diff --git a/client/.kiwi/en/index.ts b/client/.kiwi/en/index.ts index 1bd359a1..97388780 100644 --- a/client/.kiwi/en/index.ts +++ b/client/.kiwi/en/index.ts @@ -1,6 +1,7 @@ import components from './components'; import edit from './edit'; import utils from './utils'; +import chunk from './chunk' import app from './app'; import release from './release'; import DeployBotModal from './DeployBotModal'; @@ -12,6 +13,7 @@ export default Object.assign( app, utils, edit, + chunk, release, DeployBotModal, }, diff --git a/client/.kiwi/ja/chunk.ts b/client/.kiwi/ja/chunk.ts new file mode 100644 index 00000000..fe45536f --- /dev/null +++ b/client/.kiwi/ja/chunk.ts @@ -0,0 +1,5 @@ +export default { + page: { + fanHui: '返回', + }, +}; diff --git a/client/.kiwi/ja/components.ts b/client/.kiwi/ja/components.ts index f1210268..9418dfeb 100644 --- a/client/.kiwi/ja/components.ts +++ b/client/.kiwi/ja/components.ts @@ -91,4 +91,33 @@ export default { tOKEN: 'トークン管理', dengLu: 'ログイン', }, + Header: { + gengXinDaiMaCang: 'コードリポジトリを更新', + zhiShiLieBiao: '知識リスト', + bianJiJiQiRen: 'ボットを編集', + gengXinDaiMaCang2: 'コードリポジトリの更新に失敗しました{val1}', + chengGongGengXinDai: 'コードリポジトリを正常に更新しました', + }, + KnowledgeList: { + keYongZhuangTai: '利用可能状態', + daXiao: 'サイズ', + xiangLiangHuaMoXing: 'ベクトルモデル', + zhiShiLaiYuan: '知識ソース', + leiXing: 'タイプ', + zhiShiMing: '知識名', + }, + TaskButton: { + chaKanRenWu: 'タスク表示', + chongXinZhiXing: '再実行', + shiBai: '失敗', + ruKuZhong: 'インポート中', + dengDaiZhong: '待機中', + chongShiSuoXuanRen: '選択したタスクを再試行', + }, + TaskList: { + meiYouChaXunDao: 'タスクが見つかりません', + }, + ChunkList: { + fenKuaiNeiRong: 'チャンクコンテンツ', + } }; diff --git a/client/.kiwi/ja/index.ts b/client/.kiwi/ja/index.ts index 1bd359a1..d1ad9165 100644 --- a/client/.kiwi/ja/index.ts +++ b/client/.kiwi/ja/index.ts @@ -1,6 +1,7 @@ import components from './components'; import edit from './edit'; import utils from './utils'; +import chunk from './chunk'; import app from './app'; import release from './release'; import DeployBotModal from './DeployBotModal'; @@ -8,6 +9,7 @@ import DeployBotModal from './DeployBotModal'; export default Object.assign( {}, { + chunk, components, app, utils, diff --git a/client/.kiwi/ko/chunk.ts b/client/.kiwi/ko/chunk.ts new file mode 100644 index 00000000..09e23710 --- /dev/null +++ b/client/.kiwi/ko/chunk.ts @@ -0,0 +1,5 @@ +export default { + page: { + fanHui: '돌아가기', + }, +}; diff --git a/client/.kiwi/ko/components.ts b/client/.kiwi/ko/components.ts index ef23e3cc..5f0c43c9 100644 --- a/client/.kiwi/ko/components.ts +++ b/client/.kiwi/ko/components.ts @@ -91,4 +91,34 @@ export default { tOKEN: '토큰 관리', dengLu: '로그인', }, + Header: { + gengXinDaiMaCang: '코드 저장소 업데이트', + zhiShiLieBiao: '지식 목록', + bianJiJiQiRen: '봇 편집', + gengXinDaiMaCang2: '코드 저장소 업데이트 실패 {val1}', + chengGongGengXinDai: '코드 저장소가 성공적으로 업데이트되었습니다', + }, + KnowledgeList: { + keYongZhuangTai: '사용 가능 상태', + daXiao: '크기', + xiangLiangHuaMoXing: '벡터 모델', + zhiShiLaiYuan: '지식 출처', + leiXing: '유형', + zhiShiMing: '지식명', + chongShiSuoXuanRen: '선택한 작업 재시도', + }, + TaskButton: { + chaKanRenWu: '작업 보기', + chongXinZhiXing: '재실행', + shiBai: '실패', + ruKuZhong: '가져오는 중', + dengDaiZhong: '대기 중', + chongShiSuoXuanRen: '선택한 작업 재시도', + }, + TaskList: { + meiYouChaXunDao: '작업을 찾을 수 없음', + }, + ChunkList: { + fenKuaiNeiRong: '청크 내용', + } }; diff --git a/client/.kiwi/ko/index.ts b/client/.kiwi/ko/index.ts index 1bd359a1..a29f6627 100644 --- a/client/.kiwi/ko/index.ts +++ b/client/.kiwi/ko/index.ts @@ -2,12 +2,14 @@ import components from './components'; import edit from './edit'; import utils from './utils'; import app from './app'; +import chunk from './chunk'; import release from './release'; import DeployBotModal from './DeployBotModal'; export default Object.assign( {}, { + chunk, components, app, utils, diff --git a/client/.kiwi/zh-CN/chunk.ts b/client/.kiwi/zh-CN/chunk.ts new file mode 100644 index 00000000..fe45536f --- /dev/null +++ b/client/.kiwi/zh-CN/chunk.ts @@ -0,0 +1,5 @@ +export default { + page: { + fanHui: '返回', + }, +}; diff --git a/client/.kiwi/zh-CN/components.ts b/client/.kiwi/zh-CN/components.ts index 11abc14f..05d0faa4 100644 --- a/client/.kiwi/zh-CN/components.ts +++ b/client/.kiwi/zh-CN/components.ts @@ -91,4 +91,33 @@ export default { tOKEN: 'Token 管理', dengLu: '登录', }, + Header: { + gengXinDaiMaCang: '更新代码仓库', + zhiShiLieBiao: '知识列表', + bianJiJiQiRen: '编辑机器人', + gengXinDaiMaCang2: '更新代码仓库失败{val1}', + chengGongGengXinDai: '成功更新代码仓库', + }, + KnowledgeList: { + keYongZhuangTai: '可用状态', + daXiao: '大小', + xiangLiangHuaMoXing: '向量化模型', + zhiShiLaiYuan: '知识来源', + leiXing: '类型', + zhiShiMing: '知识名', + }, + TaskButton: { + chaKanRenWu: '查看任务', + chongXinZhiXing: '重新执行', + shiBai: '失败', + ruKuZhong: '入库中', + dengDaiZhong: '等待中', + chongShiSuoXuanRen: '重试所选任务', + }, + TaskList: { + meiYouChaXunDao: '没有查询到任务', + }, + ChunkList: { + fenKuaiNeiRong: '分块内容', + }, }; diff --git a/client/.kiwi/zh-CN/index.ts b/client/.kiwi/zh-CN/index.ts index 32243bc2..24cac3aa 100644 --- a/client/.kiwi/zh-CN/index.ts +++ b/client/.kiwi/zh-CN/index.ts @@ -1,4 +1,5 @@ import components from './components'; +import chunk from './chunk'; import release from './release'; import DeployBotModal from './DeployBotModal'; import edit from './edit'; @@ -14,5 +15,6 @@ export default Object.assign( edit, DeployBotModal, release, + chunk }, ); diff --git a/client/.kiwi/zh-TW/chunk.ts b/client/.kiwi/zh-TW/chunk.ts new file mode 100644 index 00000000..fe45536f --- /dev/null +++ b/client/.kiwi/zh-TW/chunk.ts @@ -0,0 +1,5 @@ +export default { + page: { + fanHui: '返回', + }, +}; diff --git a/client/.kiwi/zh-TW/components.ts b/client/.kiwi/zh-TW/components.ts index 13c68ad3..daa6f0bc 100644 --- a/client/.kiwi/zh-TW/components.ts +++ b/client/.kiwi/zh-TW/components.ts @@ -91,4 +91,33 @@ export default { tOKEN: 'Token 管理', dengLu: '登入', }, + Header: { + gengXinDaiMaCang: '更新程式碼庫', + zhiShiLieBiao: '知識列表', + bianJiJiQiRen: '編輯機器人', + gengXinDaiMaCang2: '更新程式碼儲存庫失敗{val1}', + chengGongGengXinDai: '成功更新程式碼儲存庫', + }, + KnowledgeList: { + keYongZhuangTai: '可用狀態', + daXiao: '大小', + xiangLiangHuaMoXing: '向量化模型', + zhiShiLaiYuan: '知識來源', + leiXing: '類型', + zhiShiMing: '知識名稱', + }, + TaskButton: { + chaKanRenWu: '查看任務', + chongXinZhiXing: '重新執行', + shiBai: '失敗', + ruKuZhong: '匯入中', + dengDaiZhong: '等待中', + chongShiSuoXuanRen: '重試所選任務', + }, + TaskList: { + meiYouChaXunDao: '沒有查詢到任務', + }, + ChunkList: { + fenKuaiNeiRong: '分塊內容', + } }; diff --git a/client/.kiwi/zh-TW/index.ts b/client/.kiwi/zh-TW/index.ts index 1bd359a1..030e1468 100644 --- a/client/.kiwi/zh-TW/index.ts +++ b/client/.kiwi/zh-TW/index.ts @@ -1,6 +1,7 @@ import components from './components'; import edit from './edit'; import utils from './utils'; +import chunk from './chunk' import app from './app'; import release from './release'; import DeployBotModal from './DeployBotModal'; @@ -8,6 +9,7 @@ import DeployBotModal from './DeployBotModal'; export default Object.assign( {}, { + chunk, components, app, utils, diff --git a/client/app/contexts/GlobalContext.tsx b/client/app/contexts/GlobalContext.tsx index 94eb3d1f..e26ba378 100644 --- a/client/app/contexts/GlobalContext.tsx +++ b/client/app/contexts/GlobalContext.tsx @@ -33,6 +33,7 @@ export const GlobalProvider: React.FC<{ children: React.ReactNode }> = ({ const [language, setLanguage] = useState('zh-CN'); const searchParams = useSearchParams(); const router = useRouter(); + const updateLanguage = useCallback( (newLang: string) => { setLanguage(newLang); @@ -64,17 +65,17 @@ export const GlobalProvider: React.FC<{ children: React.ReactNode }> = ({ defaultLang; updateLanguage(matchingLang); } - }, [searchParams]); + }, [searchParams, updateLanguage]); useEffect(() => { if (I18N.setLang) { I18N.setLang(language); } // Update URL query parameters - const newSearchParams = new URLSearchParams(searchParams); + const newSearchParams = new URLSearchParams(searchParams.toString()); newSearchParams.set('lang', language); - router.push(`?${newSearchParams.toString()}`); - }, [language]); + router.replace(`?${newSearchParams.toString()}`); + }, [language, router, searchParams]); return ( void; -}; -const ChunkCard = ({ update_timestamp, content, file_path }: RAGDoc) => { - const { isOpen, onOpen, onOpenChange } = useDisclosure(); - return ( - <> -
-
-

{content}

-
-
-

- {file_path} -

- - {content?.length} {I18N.components.Knowledge.ziFu} - -
-

- {I18N.components.Knowledge.gengXinYu} - {convertToLocalTime(update_timestamp ?? '')} -

-
- - - {() => ( - <> - - {file_path} - - -
{content}
-
- - )} -
-
- - ); -}; - -const ChunkList = ({ data }: { data: RAGDoc[] }) => { - return ( -
- {data.map((card, index) => ( - - ))} -
- ); -}; - -export default function Knowledge({ repoName, goBack }: IProps) { - const { botProfile } = useBot(); - const [pageSize, setPageSize] = React.useState(12); - const [pageNumber, setPageNumber] = React.useState(1); - const { taskProfile } = useBotTask(); - const { data: RagDocData, isFetching } = useBotRAGChunkList( - repoName, - pageSize, - pageNumber, - true, - taskProfile.running, - ); - const list = React.useMemo(() => { - return RagDocData?.rows ?? []; - }, [RagDocData]); - return ( -
-
-
- { - e.preventDefault(); - goBack(); - }} - > - - {botProfile.name} - - / - {I18N.components.Knowledge.zhiShiKuFenDuan} -
-
- {}} - mode={'pageHeader'} - /> -
-
-
- - {list.length > 0 || isFetching ? ( - - ) : ( -
- {taskProfile.running ? ( -

{I18N.components.Knowledge.zhiShiKuGengXin}

- ) : ( -

{I18N.components.Knowledge.zhiShiKuWeiKong}

- )} -
- )} -
- {RagDocData?.total && RagDocData?.total > 12 ? ( - setPageNumber(page)} - classNames={{ - cursor: 'bg-gray-700', - }} - /> - ) : null} -
-
- ); -} diff --git a/client/app/factory/edit/components/KnowledgeBtn.tsx b/client/app/factory/edit/components/KnowledgeBtn.tsx deleted file mode 100644 index c1e13750..00000000 --- a/client/app/factory/edit/components/KnowledgeBtn.tsx +++ /dev/null @@ -1,128 +0,0 @@ -'use client'; -import I18N from '@/app/utils/I18N'; -import React, { useEffect } from 'react'; -import 'react-toastify/dist/ReactToastify.css'; -import { Button, Tooltip } from '@nextui-org/react'; -import { useGetBotRagTask } from '@/app/hooks/useBot'; -import { convertToLocalTime } from '@/app/utils/time'; -import BookIcon from '@/public/icons/BookIcon'; -import { TaskStatus } from '@/types/task'; -import RefreshIcon from '@/public/icons/RefreshIcon'; -import { useBotTask } from './TaskContext'; - -type IProps = { - repoName: string; - onClick: () => void; - mode: 'configItem' | 'pageHeader'; -}; - -const KnowledgeBtn = (props: IProps) => { - const { onClick, repoName, mode } = props; - const { setTaskProfile } = useBotTask(); - const [shouldGetTask, setShouldGetTask] = React.useState(); - const [taskLoading, setTaskLoading] = React.useState(true); - const [allowShowChunkList, setAllowShowChunkList] = - React.useState(false); - - const { data: taskList } = useGetBotRagTask( - repoName, - // if task is running, query every 5s - // if task is completed, query once - taskLoading, - ); - - const taskCnt = taskList?.length ?? 0; - // compute task running status by taskList - useEffect(() => { - if (!taskList) return; - let completeTaskCnt = 0; - taskList.forEach((item) => { - if ( - [TaskStatus.CANCELLED, TaskStatus.COMPLETED, TaskStatus.ERROR].includes( - item.status as TaskStatus, - ) - ) { - completeTaskCnt++; - } - }); - if (completeTaskCnt > 0) { - setAllowShowChunkList(true); - } - const isTaskRunning = taskList?.length === completeTaskCnt ? false : true; - setTaskLoading(isTaskRunning); - setTaskProfile({ running: isTaskRunning }); - }, [taskList]); - - // close the interval query - useEffect(() => { - return () => { - setShouldGetTask(false); - }; - }, []); - - if (mode === 'pageHeader') { - return ( - <> - {taskList && taskList?.length > 0 ? ( - - {I18N.components.KnowledgeBtn.zuiJinGengXinYu} - {convertToLocalTime(taskList[taskCnt - 1]?.created_at ?? '')} - - ) : null} - - - ); - } - if (mode === 'configItem') { - return ( - <> - - {taskList && taskList?.length > 0 ? ( - - {I18N.components.KnowledgeBtn.zuiJinGengXinYu} - {convertToLocalTime(taskList[taskCnt - 1]?.created_at ?? '')} - - ) : null} - - ); - } - return <>; -}; - -export default KnowledgeBtn; diff --git a/client/app/factory/edit/page.tsx b/client/app/factory/edit/page.tsx index 4b574510..518f15e9 100644 --- a/client/app/factory/edit/page.tsx +++ b/client/app/factory/edit/page.tsx @@ -30,7 +30,7 @@ import { } from '@/app/hooks/useBot'; import { useAgreement, useAgreementStatus } from '@/app/hooks/useAgreement'; import FullPageSkeleton from '@/components/FullPageSkeleton'; -import { isEmpty, map, size } from 'lodash'; +import { isEmpty, map } from 'lodash'; import { Chat } from '@petercatai/assistant'; import AIBtnIcon from '@/public/icons/AIBtnIcon'; import ChatIcon from '@/public/icons/ChatIcon'; @@ -38,9 +38,7 @@ import ConfigIcon from '@/public/icons/ConfigIcon'; import SaveIcon from '@/public/icons/SaveIcon'; import { useBot } from '@/app/contexts/BotContext'; import { useUser, useUserRepos } from '@/app/hooks/useUser'; -import Knowledge from './components/Knowledge'; import { useGlobal } from '@/app/contexts/GlobalContext'; -import KnowledgeBtn from './components/KnowledgeBtn'; import { BotTaskProvider } from './components/TaskContext'; import { useSearchParams } from 'next/navigation'; import { extractFullRepoNameFromGitHubUrl } from '@/app/utils/tools'; @@ -53,6 +51,7 @@ import AgreementKO from '../../../.kiwi/ko/agreement.md'; import AgreementZhTW from '../../../.kiwi/zh-TW/agreement.md'; import 'react-toastify/dist/ReactToastify.css'; +import BookIcon from '@/public/icons/BookIcon'; const API_HOST = process.env.NEXT_PUBLIC_API_DOMAIN; enum VisibleTypeEnum { @@ -434,13 +433,18 @@ export default function Edit() { {isEdit && activeTab === ConfigTypeEnum.MANUAL_CONFIG && botProfile.repoName ? ( - { - setVisibleType(VisibleTypeEnum.KNOWLEDGE_DETAIL); + ) : null} @@ -641,16 +645,6 @@ export default function Edit() { ) : ( <> )} - {visibleType === VisibleTypeEnum.KNOWLEDGE_DETAIL ? ( - { - setVisibleType(VisibleTypeEnum.BOT_CONFIG); - }} - > - ) : ( - <> - )} { const { bot, userId } = props; const router = useRouter(); const { deleteBot, isLoading, isSuccess } = useBotDelete(); - const { data: taskInfo } = useGetBotRagTask(bot.repo_name!, false); useEffect(() => { if (isSuccess) { @@ -90,30 +84,6 @@ const BotCard = (props: { bot: BotInfo; userId: string }) => { const onDelete = (id: string) => { deleteBot(id); }; - const renderTaskStatusIcon = (taskList: RagTask[]) => { - const status = taskList.find((task) => task.status === TaskStatus.ERROR) - ? TaskStatus.ERROR - : taskList.every((task) => - [ - TaskStatus.CANCELLED, - TaskStatus.COMPLETED, - TaskStatus.ERROR, - ].includes(task.status as TaskStatus), - ) - ? TaskStatus.COMPLETED - : 'others'; - if (status === TaskStatus.COMPLETED) { - return ; - } - if (status === TaskStatus.ERROR) { - return ; - } - return ( - - - - ); - }; return ( <> @@ -211,14 +181,14 @@ const BotCard = (props: { bot: BotInfo; userId: string }) => { { src="../images/refresh.svg" alt={I18N.components.BotCard.gengXinZhiShi} className="z-10 cursor-pointer" + onClick={() => + router.push( + `/knowledge?repo_name=${bot.repo_name}&bot_id=${bot.id}`, + ) + } /> @@ -261,10 +236,6 @@ const BotCard = (props: { bot: BotInfo; userId: string }) => { src="/images/statistic.svg" /> - -
- {renderTaskStatusIcon(taskInfo ?? [])} -
diff --git a/client/app/hooks/useBot.ts b/client/app/hooks/useBot.ts index bb986c80..e1e89ca5 100644 --- a/client/app/hooks/useBot.ts +++ b/client/app/hooks/useBot.ts @@ -9,16 +9,13 @@ import { getBotDetail, getBotInfoByRepoName, getBotList, - getChunkList, getGitAvatarByRepoName, - getRagTask, getUserPeterCatAppRepos, publicBot, unPublicBot, updateBot, } from '@/app/services/BotsController'; import { - keepPreviousData, useMutation, useQuery, useQueryClient, @@ -127,37 +124,6 @@ export function useBotConfigGenerator() { }; } -export const useBotRAGChunkList = ( - repoName: string, - page_size: number, - page_number: number, - enabled: boolean = true, - refetchInterval: boolean = false, -) => { - return useQuery({ - queryKey: [`rag.chunk.list`, page_number, repoName], - queryFn: async () => getChunkList(repoName, page_size, page_number), - select: (data) => data, - enabled, - retry: true, - placeholderData: keepPreviousData, - refetchInterval: refetchInterval ? 5 * 1000 : undefined, - }); -}; - -export const useGetBotRagTask = ( - repoName: string, - refetchInterval: boolean = true, -) => { - return useQuery({ - queryKey: [`rag.task`, repoName], - queryFn: async () => getRagTask(repoName), - select: (data) => data, - enabled: !!repoName, - retry: true, - refetchInterval: refetchInterval ? 3 * 1000 : undefined, - }); -}; export function usePublicBot() { const mutation = useMutation({ diff --git a/client/app/hooks/useRAG.ts b/client/app/hooks/useRAG.ts new file mode 100644 index 00000000..5f78fb2f --- /dev/null +++ b/client/app/hooks/useRAG.ts @@ -0,0 +1,85 @@ +import { + getTaskList, + PageResponse, + RAGKnowledge, + PageParams, + RAGTask, + getKnowledgeList, + getChunkList, + RAGChunk, + reloadRepo, + restartTask +} from '@/app/services/RAGController'; +import { + useQuery, + UseQueryOptions, +} from '@tanstack/react-query'; +import { useMutation, UseMutationOptions } from '@tanstack/react-query'; + + + + +export function useKnowledgeList( + params: PageParams, + options?: Omit, Error>, 'queryKey' | 'queryFn'> +) { + return useQuery({ + queryKey: ['knowledge', params], + queryFn: () => getKnowledgeList(params), + refetchOnWindowFocus: true, + ...options + }); +} + +export function useTaskList( + params: PageParams, + enabled:boolean, + options?: Omit, Error>, 'queryKey' | 'queryFn'> +) { + return useQuery({ + queryKey: ['task', params], + queryFn: () => getTaskList(params), + refetchOnWindowFocus: true, + enabled:enabled, + ...options + }); +} + + +export function useChunkList( + params: PageParams, + options?: Omit, Error>, 'queryKey' | 'queryFn'> +) { + return useQuery({ + queryKey: ['chunk', params], + queryFn: () => getChunkList(params), + refetchOnWindowFocus: true, + ...options + }); +} + +export function useReloadRepo() { + const mutation = useMutation({ + mutationFn: reloadRepo, + }); + return { + data: mutation.data, + reloadRepo: mutation.mutate, + isLoading: mutation.isPending, + error: mutation.error, + isSuccess: mutation.isSuccess, + }; +} + +export function useRestartTask() { + const mutation = useMutation({ + mutationFn: restartTask, + }); + return { + data: mutation.data, + restartTask: mutation.mutate, + isLoading: mutation.isPending, + error: mutation.error, + isSuccess: mutation.isSuccess, + }; +} diff --git a/client/app/knowledge/chunk/page.tsx b/client/app/knowledge/chunk/page.tsx new file mode 100644 index 00000000..e516ffd8 --- /dev/null +++ b/client/app/knowledge/chunk/page.tsx @@ -0,0 +1,42 @@ +'use client'; + +import I18N from '@/app/utils/I18N'; +import { useRouter, useSearchParams } from 'next/navigation'; +import React from 'react'; +import ChunkList from '../components/ChunkList'; + +export default function ChunkPage() { + const searchParams = useSearchParams(); + const knowledge_id = searchParams.get('knowledge_id'); + const router = useRouter(); + if (!knowledge_id) { + router.push('/'); + } + return ( +
+
{ + window.history.back(); + }} + className="p-2 flex gap-2 cursor-pointer" + > + + + + {I18N.chunk.page.fanHui} +
+ +
+ ); +} diff --git a/client/app/knowledge/components/ChunkList.tsx b/client/app/knowledge/components/ChunkList.tsx new file mode 100644 index 00000000..efc099a4 --- /dev/null +++ b/client/app/knowledge/components/ChunkList.tsx @@ -0,0 +1,170 @@ +import I18N from '@/app/utils/I18N'; +import { useState } from 'react'; +import { + Modal, + ModalBody, + ModalContent, + ModalHeader, + Pagination, + useDisclosure, +} from '@nextui-org/react'; +import { + PageParams, + RAGChunk, + RAGKnowledge, +} from '@/app/services/RAGController'; +import { useChunkList } from '@/app/hooks/useRAG'; +import MySpinner from '@/components/Spinner'; + +const DeleteIcon = () => { + return ( +
+ + + +
+ ); +}; + +const EditIcon = () => { + return ( +
+ + + + +
+ ); +}; + +const ChunkCard = ({ chunk }: { chunk: RAGChunk }) => { + const { isOpen, onOpen, onClose } = useDisclosure(); + + return ( + <> +
+
+ {chunk.context} +
+
+
+
+
+ {chunk.chunk_id} +
+
+
+ {chunk.context?.length} {I18N.components.Knowledge.ziFu} +
+
+
+
+
+ {I18N.components.Knowledge.gengXinYu} + {new Date(chunk.updated_at).toLocaleString()} +
+
+
+ + + + {(onClose) => ( + <> + + + {I18N.components.ChunkList.fenKuaiNeiRong} + + + +
{chunk.context}
+
+ + )} +
+
+ + ); +}; +export default function ChunkList({ knowledge_id }: { knowledge_id: string }) { + const [pageParams, setPageParams] = useState>({ + page: 1, + page_size: 12, + order_direction: 'asc', + eq_conditions: { + knowledge_id: knowledge_id, + }, + }); + const { data, isLoading } = useChunkList(pageParams); + return ( +
+ +
+ {(data?.items ?? []).map((item, index) => ( + + ))} +
+
+ {data && ( +
+ { + setPageParams((prevParams) => ({ + ...prevParams, + page: page, + })); + }} + className="flex justify-center" + initialPage={1} + size="lg" + classNames={{ + cursor: 'bg-gray-700', + }} + /> +
+ )} +
+ ); +} diff --git a/client/app/knowledge/components/Header.tsx b/client/app/knowledge/components/Header.tsx new file mode 100644 index 00000000..808162d1 --- /dev/null +++ b/client/app/knowledge/components/Header.tsx @@ -0,0 +1,87 @@ +import I18N from '@/app/utils/I18N'; + +import { useEffect } from 'react'; +import { useReloadRepo } from '@/app/hooks/useRAG'; +import RefreshIcon from '@/public/icons/RefreshIcon'; +import { Button, Tooltip } from '@nextui-org/react'; +import { useRouter } from 'next/navigation'; +import TaskButton from './TaskButton'; +import { toast } from 'react-toastify'; +import LoadingIcon from '@/public/icons/LoadingIcon'; + +export default function KnowledgePageHeader(props: { + repo_name: string; + bot_id: string; +}) { + const { repo_name, bot_id } = props; + const router = useRouter(); + const { data, reloadRepo, error, isLoading, isSuccess } = useReloadRepo(); + + useEffect(() => { + if (isSuccess) { + toast.success(I18N.components.Header.chengGongGengXinDai); + } + }, [isSuccess]); + + useEffect(() => { + if (error) { + toast.error( + I18N.template?.(I18N.components.Header.gengXinDaiMaCang2, { + val1: error.message, + }), + ); + } + }, [error]); + + return ( +
+
+
router.push(`/factory/edit?id=${bot_id}`)} + className="cursor-pointer" + > + + + + + +
+
+
+
+ + {repo_name} + + + / + + + {I18N.components.Header.zhiShiLieBiao} + +
+
+
+
+
+ + +
+
+ ); +} diff --git a/client/app/knowledge/components/KnowledgeList.tsx b/client/app/knowledge/components/KnowledgeList.tsx new file mode 100644 index 00000000..f4425df1 --- /dev/null +++ b/client/app/knowledge/components/KnowledgeList.tsx @@ -0,0 +1,111 @@ +import I18N from '@/app/utils/I18N'; +import { useState } from 'react'; +import { + Table, + TableHeader, + TableColumn, + TableBody, + TableRow, + TableCell, + Pagination, + Chip, +} from '@nextui-org/react'; +import { PageParams, RAGKnowledge } from '@/app/services/RAGController'; +import { useKnowledgeList } from '@/app/hooks/useRAG'; +import MySpinner from '@/components/Spinner'; + +export default function KnowledgeList({ repo_name }: { repo_name: string }) { + const [pageParams, setPageParams] = useState>({ + page: 1, + page_size: 10, + order_direction: 'asc', + eq_conditions: { + space_id: repo_name, + }, + }); + + const { data, isLoading, error, isFetching, refetch } = + useKnowledgeList(pageParams); + return ( +
+ +
+ + + + {I18N.components.KnowledgeList.zhiShiMing} + + {I18N.components.KnowledgeList.leiXing} + + {I18N.components.KnowledgeList.zhiShiLaiYuan} + + + {I18N.components.KnowledgeList.xiangLiangHuaMoXing} + + {I18N.components.KnowledgeList.daXiao} + + {I18N.components.KnowledgeList.keYongZhuangTai} + + + + {!data || !data.items ? ( + <> + ) : ( + data.items + .filter((item) => item.knowledge_type !== 'folder') + .map((item, index) => ( + + + + {item.knowledge_name} + + + {item.knowledge_type} + {item.source_type} + {item.embedding_model_name} + + {item.file_size + ? `${(item.file_size / 1024).toFixed(2)} KB` + : '-'} + + + + {item.enabled ? 'Enabled' : 'Disabled'} + + + + )) + )} + +
+
+ {data && ( +
+ { + setPageParams((prevParams) => ({ + ...prevParams, + page: page, + })); + }} + className="flex justify-center" + initialPage={1} + size="lg" + classNames={{ + cursor: 'bg-gray-700', + }} + /> +
+ )} +
+
+ ); +} diff --git a/client/app/knowledge/components/TaskButton.tsx b/client/app/knowledge/components/TaskButton.tsx new file mode 100644 index 00000000..8db951bc --- /dev/null +++ b/client/app/knowledge/components/TaskButton.tsx @@ -0,0 +1,142 @@ +import I18N from '@/app/utils/I18N'; + +import { + Button, + Checkbox, + Popover, + PopoverContent, + PopoverTrigger, +} from '@nextui-org/react'; +import { useEffect, useState } from 'react'; +import { useRestartTask, useTaskList } from '@/app/hooks/useRAG'; +import { TaskList } from './TaskList'; +import { Pagination } from '@nextui-org/react'; +import { PageParams, RAGTask } from '@/app/services/RAGController'; +import MySpinner from '@/components/Spinner'; + +const statusOptions = [ + { value: 'pending', label: I18N.components.TaskButton.dengDaiZhong }, + { value: 'running', label: I18N.components.TaskButton.ruKuZhong }, + { value: 'failed', label: I18N.components.TaskButton.shiBai }, + { value: 'pending_retry', label: I18N.components.TaskButton.chongXinZhiXing }, +]; + +const TaskButton = ({ space_id }: { space_id: string }) => { + const [isOpen, setIsOpen] = useState(false); + const [pageParams, setPageParams] = useState>({ + page: 1, + page_size: 8, + eq_conditions: { space_id: space_id }, + }); + const { + data, + isLoading: isTaskLoading, + refetch, + } = useTaskList(pageParams, isOpen); + const [selectedTaskIds, setSelectedTaskIds] = useState([]); + const { + isLoading: isRestartLoading, + restartTask, + isSuccess, + } = useRestartTask(); + + useEffect(() => { + if (isSuccess) { + refetch(); + setSelectedTaskIds([]); + } + }, [isSuccess, refetch]); + + return ( + + + + + +
+
+ {statusOptions.map((option) => ( + { + setPageParams((prevParams) => { + const newConditions = { ...prevParams.eq_conditions }; + if (option.value === prevParams.eq_conditions?.status) { + delete newConditions.status; + } else { + newConditions.status = option.value; + } + return { + ...prevParams, + eq_conditions: newConditions, + page: 1, + page_size: 8, + }; + }); + }} + > + {option.label} + + ))} +
+ + { + if (isSelected && status === 'failed') { + setSelectedTaskIds((prevSelected) => [...prevSelected, id]); + } else { + setSelectedTaskIds((prevSelected) => + prevSelected.filter((taskId) => taskId !== id), + ); + } + }} + selectedTaskIds={selectedTaskIds} + /> + + {selectedTaskIds.length > 0 ? ( + + ) : ( + <> + )} + {data && ( + { + setPageParams((prevParams) => ({ + ...prevParams, + page: page, + })); + }} + classNames={{ + cursor: 'bg-gray-700', + }} + /> + )} +
+
+
+ ); +}; + +export default TaskButton; diff --git a/client/app/knowledge/components/TaskList.tsx b/client/app/knowledge/components/TaskList.tsx new file mode 100644 index 00000000..f8c8c507 --- /dev/null +++ b/client/app/knowledge/components/TaskList.tsx @@ -0,0 +1,153 @@ +import I18N from '@/app/utils/I18N'; +import { RAGTask } from '@/app/services/RAGController'; +import LoadingIcon from '@/public/icons/LoadingIcon'; +import { Checkbox } from '@nextui-org/react'; + +const SuccessIcon = () => { + return ( +
+ + + +
+ ); +}; + +interface SubTaskProps { + task_id: string; + datetime: string; + error_message?: string; + status: 'success' | 'failed' | 'running' | 'pending'; + isSelected: boolean; + handleCheckBoxChange?: ( + id: string, + status: 'success' | 'failed' | 'running' | 'pending', + isSelected: boolean, + ) => void; +} +export const SubTask: React.FC = ({ + task_id, + datetime, + error_message, + status, + handleCheckBoxChange, + isSelected, +}) => { + const getStatusIcon = () => { + switch (status) { + case 'success': + return ; + case 'failed': + return ( + { + handleCheckBoxChange?.(task_id, status, isSelected); + }} + isSelected={isSelected} + classNames={{ + base: 'border-red-400', + wrapper: ['before:border-red-400'], + icon: 'border-red-600', + }} + color={'danger'} + /> + ); + case 'running': + return ; + default: + return ; + } + }; + + const getContainerStyle = () => { + if (status === 'failed') { + return 'bg-red-100'; + } + return 'bg-zinc-100'; + }; + + const getTextStyle = () => { + if (status === 'failed') { + return 'text-red-700 opacity-60'; + } + return 'text-gray-800'; + }; + const description = status === 'failed' ? error_message : status; + + return ( +
+
{getStatusIcon()}
+
+
+
+ {task_id} +
+
+ {new Date(datetime).toLocaleString()} +
+
+
+ {description} +
+
+
+ ); +}; + +interface TaskListProps { + tasks: RAGTask[]; + handleCheckBoxChange: ( + id: string, + status: 'success' | 'failed' | 'running' | 'pending', + isSelected: boolean, + ) => void; + selectedTaskIds: string[]; +} + +export const TaskList: React.FC = ({ + tasks, + handleCheckBoxChange, + selectedTaskIds = [], +}) => { + return ( +
+ {tasks.length === 0 ? ( +
+ {I18N.components.TaskList.meiYouChaXunDao} +
+ ) : ( + tasks.map((task) => ( + + )) + )} +
+ ); +}; diff --git a/client/app/knowledge/page.tsx b/client/app/knowledge/page.tsx new file mode 100644 index 00000000..4510d044 --- /dev/null +++ b/client/app/knowledge/page.tsx @@ -0,0 +1,25 @@ +'use client'; + +import { useSearchParams } from 'next/navigation'; +import React from 'react'; +import KnowledgePageHeader from './components/Header'; +import KnowledgeList from './components/KnowledgeList'; +import { ToastContainer } from 'react-toastify'; + +import 'react-toastify/dist/ReactToastify.css'; + +export default function KnowledgePage() { + const searchParams = useSearchParams(); + const repo_name = searchParams.get('repo_name'); + const bot_id = searchParams.get('bot_id'); + return ( +
+ + + +
+ ); +} diff --git a/client/app/services/BotsController.ts b/client/app/services/BotsController.ts index 9660d62f..ee975f72 100644 --- a/client/app/services/BotsController.ts +++ b/client/app/services/BotsController.ts @@ -4,8 +4,6 @@ import axios from 'axios'; import { BotProfile } from '@/app/interface'; export declare type Bot = Tables<'bots'>; -export declare type RAGDoc = Tables<'rag_docs'>; -export declare type RagTask = Tables<'rag_tasks'>; export declare type GithubRepoConfig = Tables<'github_repo_config'>; axios.defaults.withCredentials = true; @@ -78,24 +76,6 @@ export async function getGitAvatarByRepoName(repo_name: string) { return axios.get(`${apiDomain}/api/bot/git/avatar?repo_name=${repo_name}`); } -export async function getChunkList( - repo_name: string, - page_size: number, - page_number: number, -): Promise<{ rows: RAGDoc[]; total: number }> { - const response = await axios.get( - `${apiDomain}/api/rag/chunk/list?repo_name=${repo_name}&page_size=${page_size}&page_number=${page_number}`, - ); - return response.data; -} - -export async function getRagTask(repo_name: string): Promise { - const response = await axios.get( - `${apiDomain}/api/rag/task/latest?repo_name=${repo_name}`, - ); - return response.data.data; -} - export async function getBotApprovalList(bot_id: string, status: string) { const response = await axios.get( `${apiDomain}/api/bot/approval/list?bot_id=${bot_id}&status=${status}`, diff --git a/client/app/services/RAGController.ts b/client/app/services/RAGController.ts new file mode 100644 index 00000000..01307aed --- /dev/null +++ b/client/app/services/RAGController.ts @@ -0,0 +1,86 @@ +import axios from "axios"; + +const apiDomain = process.env.NEXT_PUBLIC_API_DOMAIN; + +export interface RAGKnowledge { + knowledge_id:string; + space_id: string; + knowledge_type: string; + knowledge_name: string; + source_type: string; + embedding_model_name: string; + file_size?: number; + enabled: boolean; + source_config?: string; +} + +export interface RAGTask { + task_id: string; + status: "success" | "failed" | "running" | "pending"; + knowledge_id: string; + error_message?: string; + space_id: string; + user_id?: string; + tenant_id: string; + created_at: string; + updated_at: string; +} +export interface RAGChunk { + chunk_id: string; + context: string; + knowledge_id: string; + space_id: string; + tenant_id: string; + created_at: string; + updated_at: string; + embedding?: string; +} + +export interface PageParams { + page: number; + page_size: number; + order_by?: string; + order_direction?: 'asc' | 'desc'; + eq_conditions?: Partial>; +} + + +export interface PageResponse { + items: T[]; + total: number; + page: number; + page_size: number; + total_pages: number; +} + +export async function getKnowledgeList( + params:PageParams +): Promise> { + const response = await axios.post(`${apiDomain}/api/rag/knowledge/list`,params ) + return response.data; +} +export async function getTaskList( + params:PageParams +): Promise> { + const response = await axios.post(`${apiDomain}/api/rag/task/list`,params ) + return response.data; +} + +export async function getChunkList( + params:PageParams +): Promise> { + const response = await axios.post(`${apiDomain}/api/rag/chunk/list`,params ) + return response.data; +} + +export async function reloadRepo(repo_name: string): Promise { + await axios.post(`${apiDomain}/api/rag/knowledge/repo/reload`, { + repo_name: repo_name, + }); +} + +export async function restartTask(task_id_list: string[]): Promise { + await axios.post(`${apiDomain}/api/rag/task/restart`, { + task_id_list: task_id_list, + }); +} diff --git a/client/public/icons/LoadingIcon.tsx b/client/public/icons/LoadingIcon.tsx index e03ae591..696bd126 100644 --- a/client/public/icons/LoadingIcon.tsx +++ b/client/public/icons/LoadingIcon.tsx @@ -1,18 +1,20 @@ -const KnowledgeTaskRunningIcon = () => ( - - - +const KnowledgeTaskRunningIcon = (props: { className?: string }) => ( +
+ + + +
); export default KnowledgeTaskRunningIcon; diff --git a/docker/Dockerfile.subscriber b/docker/Dockerfile.subscriber deleted file mode 100644 index 2e5148f9..00000000 --- a/docker/Dockerfile.subscriber +++ /dev/null @@ -1,16 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.12 - -# Set the working directory inside the container to Lambda's task root -WORKDIR ${LAMBDA_TASK_ROOT} - -# Copy requirements.txt first to leverage Docker's layer caching -COPY requirements.txt . - -# Install the specified packages into the Lambda task root -RUN pip3 install --target "${LAMBDA_TASK_ROOT}" -r requirements.txt --no-cache-dir - -# Copy the rest of the application code to the Lambda task root -COPY . . - -# Set the CMD to the Lambda handler -CMD [ "handler.lambda_handler" ] diff --git a/docs/guides/publish_petercat-utils.md b/docs/guides/publish_petercat-utils.md deleted file mode 100644 index 15adddb8..00000000 --- a/docs/guides/publish_petercat-utils.md +++ /dev/null @@ -1,43 +0,0 @@ -# Maunally - -## Debug locally - -On project root (where `pyproject.toml` located). -```bash -pip install -e $PWD -``` - -Generating distribution archives -The next step is to generate distribution packages for the package. These are archives that are uploaded to the Python Package Index and can be installed by pip. - -Make sure you have the latest version of PyPA’s build installed: - -```bash -python3 -m pip install --upgrade build -``` - -Build petercat_utils: - -```bash -npm run build:pypi -``` - -Make sure your have the latest version of twine installed: - -> Uploading distributions to https://upload.pypi.org/legacy/ -ERROR InvalidDistribution: Metadata is missing required fields: Name, Version. - Make sure the distribution includes the files where those fields are - specified, and is using a supported Metadata-Version: 1.0, 1.1, 1.2, - 2.0, 2.1, 2.2. - -Once the error occurs, ensure that the Twine version is 6.0.1. -For more details, refer to the issue here: https://github.com/pypi/warehouse/issues/15611. - -```bash -pip install twine==6.0.1 -``` - -Publish it: -```bash -npm run publish:pypi -``` \ No newline at end of file diff --git a/package.json b/package.json index 669c086e..2d152c33 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,6 @@ "assistant:server": "concurrently \"yarn run server\" \"yarn run assistant\"", "build:docker": "docker build -t petercat .", "build:pypi": "rm -rf dist && python3 -m build", - "publish:test": "python3 -m twine upload --repository petercat-utils dist/* ", "publish:pypi": "python3 -m twine upload --repository pypi dist/* " }, "engines": { diff --git a/petercat_utils/README.md b/petercat_utils/README.md deleted file mode 100644 index 55227e62..00000000 --- a/petercat_utils/README.md +++ /dev/null @@ -1,211 +0,0 @@ -![banner](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*m23lS7sVRDgAAAAAAAAAAAAADrPSAQ/original) - -

PeterCat

- -
- - 简体中文 | [English](./README.en-US.md) | [日本語](./README.ja-JP.md) - - **专为社区维护者和开发者打造的智能答疑机器人解决方案。** - - [![npm](https://img.shields.io/npm/dm/@petercatai/assistant.svg)](https://www.npmjs.com/package/@petercatai/assistant) - [![Version](https://img.shields.io/npm/v/petercat-lui/latest.svg)](https://www.npmjs.com/package/petercat-lui) - [![DockerHub Version](https://img.shields.io/docker/v/petercatai/petercat?logo=docker&logoColor=white)](https://hub.docker.com/r/petercatai/petercat) - [![CI Test Status](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml/badge.svg)](https://github.com/petercat-ai/petercat/actions/workflows/pr-tests.yml) - [![codecov](https://codecov.io/github/petercat-ai/petercat/graph/badge.svg?token=2HAO18FB6X)](https://codecov.io/github/petercat-ai/petercat) - [![License](https://img.shields.io/badge/License-MIT%40Peter%20Cat-yellow.svg)](https://github.com/petercat-ai/petercat/blob/master/LICENSE) - -
- - -## 🏠 主页 - -[🐱窝: petercat.ai](https://petercat.ai) - - -## ✨ 特性 - -我们提供对话式答疑 Agent 配置系统、自托管部署方案和便捷的一体化应用 SDK,让您能够为自己的 GitHub 仓库一键创建智能答疑机器人,并快速集成到各类官网或项目中, 为社区提供更高效的技术支持生态。 - -### 对话即创造 - -仅需要告知你的仓库地址或名称,PeterCat 即可自动完成创建机器人的全部流程 - -![对话即创造](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*aQ9uRoNZGe8AAAAAAAAAAAAADrPSAQ/original) - - -### 知识自动入库 - -机器人创建后,所有相关Github 文档和 issue 将自动入库,作为机器人的知识依据 - -![知识自动入库](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*hkkFSaR1UqgAAAAAAAAAAAAADrPSAQ/original) - -### 多平台集成 - -多种集成方式自由选择,如对话应用 SDK 集成至官网,Github APP一键安装至 Github 仓库等 - -| ![官网](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*REw3QYgdJ44AAAAAAAAAAAAADrPSAQ/original) | ![GitHub](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*jlYzSqlcpRIAAAAAAAAAAAAADrPSAQ/original) | -|:--------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------:| - -[完整演示视频](https://www.bilibili.com/video/BV1KiHUeFE4p) - -### 不止是 QA 机器人 - -| 项目信息查询 | 回复 Discussion | -| ----------------------------------------- | --------------------------------------- | -| ![search_repo](https://github.com/user-attachments/assets/a7e6d37b-4674-4fd0-a89b-678e10ec01c8) | ![ discussion replay](https://github.com/user-attachments/assets/e28a3ded-dc6c-4ba5-9543-05c41bbff331)| - - -| PR Summary | Code Review | -| ----------------------------------------- | --------------------------------------- | -| ![image](https://github.com/user-attachments/assets/28bd546b-0c00-48a2-a57e-982448d37ef2)| ![image](https://github.com/user-attachments/assets/a39c4d71-1368-4508-bca4-018a00549528) | - -| 查 Issue | 提 Issue | 回 Issue | -| ----------------------------------------- | --------------------------------------- | --------------------------------------- | -| ![image](https://github.com/user-attachments/assets/501c6ba0-20c4-480f-97ff-1f20d0a99136)| ![image](https://github.com/user-attachments/assets/d020b03d-74cd-49d2-a199-5d21154b7793)| ![image](https://github.com/user-attachments/assets/f6093cb1-b089-4ac9-ad2c-f1c8126fb86b) | - -[完整演示视频](https://www.bilibili.com/video/BV12eHUe8EkT/) - - -## Agent 工作流 - -我们为猫猫预置了一个创建机器人的机器人,当得到用户 GitHub 仓库地址或名称时,它会使用创建工具,生成该仓库答疑机器人的各项配置(Prompt,、名字、 头像、开场白、引导语、工具集……),同时触发 Issue 和 Markdown 的入库任务。这些任务会拆分为多个子任务,将该仓库的所有已解决 issue 、高票回复以及所有 Markdown 文件内容经过 load -> split -> embed -> store 的加工过程进行知识库构建,作为机器人的回复知识依据。 - -![Agent workflow](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*m24tTIZpW7cAAAAAAAAAAAAADrPSAQ/original) - -## 📦 私有化部署 - -部署方案:[AWS](https://aws.amazon.com) + [Supabase](https://supabase.com) - -你可以在这里看到完整方案: -- [私有化部署 - 本地启动服务](./docs/guides/self_hosted_local_cn.md) -- [私有化部署 - 部署到 AWS ](./docs/guides/self_hosted_aws_cn.md) - -![架构方案](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*0_aUTJpyx1YAAAAAAAAAAAAADrPSAQ/original) - -[![演示视频](https://mdn.alipayobjects.com/huamei_j8gzmo/afts/img/A*spdZSbWsVhkAAAAAAAAAAAAADrPSAQ/fmt.webp)](https://www.youtube.com/watch?v=Al6R9Ye5mBY) - - - -## ⚙️ 环境变量 - -本项目需要进行环境变量进行设置: - -### Client -`.env.local` - - -| 环境变量 | 类型 | 描述 | 示例 | -| ------------------- | ---- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -| `NEXT_PUBLIC_API_DOMAIN` | 必选 | 后端服务的 API 域名。 | `https://api.petercat.ai` | - - -### Server - -`.env` - - -| 环境变量 | 类型          | 描述 | 示例 | -| ------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | -| **应用基础环境变量** | -| `API_URL` | 必选 | 后端服务的 API 域名 | `https://api.petercat.ai` -| `WEB_URL` | 必选 | 前端 Web 服务的域名 | `https://petercat.ai` -| `STATIC_URL` | 必选 | 静态资源域名 | `https://static.petercat.ai` -| **AWS 相关环境变量** | -| `X_GITHUB_SECRET_NAME` | 必选 | AWS 托管的 Github 私钥文件名 | `prod/githubapp/petercat/pem` -| `STATIC_SECRET_NAME` | 可选 | AWS 托管的 CloudFront 签名私钥名称。如果配置了该项,将使用 CloudFront 签名 URL 来保护你的资源。更多信息请参阅 [AWS 文档](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)。 | `prod/petercat/static` | -| `LLM_TOKEN_SECRET_NAME` | 可选 | AWS 托管的 llm 签名私钥名称。如果配置了该项,petercat 将使用 RSA 算法托管用户的 LLM Token | `prod/petercat/llm` | -| `LLM_TOKEN_PUBLIC_NAME` | 可选 | AWS 托管的 llm 签名公钥名称。如果配置了该项,petercat 将使用 RSA 算法托管用户的 LLM Token | `prod/petercat/llm/pub` | -| `STATIC_KEYPAIR_ID` | 可选 | AWS CloudFront 的 Key Pair ID。如果配置了该项,将使用 CloudFront 签名 URL 来保护你的资源。更多信息请参阅 [AWS 文档](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-trusted-signers.html)。 | `APKxxxxxxxx` | -| `S3_TEMP_BUCKET_NAME` | 可选 | 用于托管 AWS 临时图片文件 S3 的 bucket | `xxx-temp` -| `SQS_QUEUE_URL`| 必选 | AWS SQS 消息队列 URL | `https://sqs.ap-northeast-1.amazonaws.com/xxx/petercat-task-queue` -| **SUPABASE 相关 env** | -| `SUPABASE_URL` | 必选 | supabase 服务的 URL,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `https://***.supabase.co` | -| `SUPABASE_SERVICE_KEY` | 必选 | supabase 服务密钥,可以在[这里](https://supabase.com/dashboard/project/_/settings/database)找到 | `{{SUPABASE_SERVICE_KEY}}` | -| **Auth0 相关 env**| -| `AUTH0_DOMAIN` | 必选 | auth0 服务域名,从 auth0 / Application / Basic Information 下获取 | `petercat.us.auth0.com` -| `AUTH0_CLIENT_ID` | 必选 | auth0 客户端 ID,从 auth0 / Application / Basic Information 下获取 | `artfiUxxxx` -| `AUTH0_CLIENT_SECRET` | 必选 | auth0 客户端密钥, 从 auth0 / Application / Basic Information 下获取 | `xxxx-xxxx-xxx` -| `API_IDENTIFIER` | 必选 | auth0 的 API Identifier | `https://petercat.us.auth0.com/api/v2/` -| **LLM相关的 env** | -| `OPENAI_API_KEY` | 必选 | OpenAI 的密钥 | `sk-xxxx` -| `OPENAI_BASE_URL` | 可选 | API 请求的基础 URL。仅在使用代理或服务模拟器时指定。| `https://api.openai.com/v1` -| `GEMINI_API_KEY` | 可选 | Gemini 的密钥 | `xxxx` -| `TAVILY_API_KEY` | 必选 | Tavily 的密钥 | `tvly-xxxxx` -| **注册为 Github App 的 env** | -| `X_GITHUB_APP_ID` | 可选 | 注册为 Github App 时,APPID | `123456` -| `X_GITHUB_APPS_CLIENT_ID` | 可选 | 注册为 Github App 时,APP 的 Client ID | `Iv1.xxxxxxx` -| `X_GITHUB_APPS_CLIENT_SECRET` | 可选 | 注册为 Github App 时,APP 的 Client 密钥 | `xxxxxxxx` -| **限流配置** | -| `RATE_LIMIT_ENABLED` | 可选 | 限流配置是否开启 | `True` -| `RATE_LIMIT_REQUESTS` | 可选 | 限流的请求数量 | `100` -| `RATE_LIMIT_DURATION` | 可选 | 限流的统计时长,单位为分钟 | `1` - -## 🤝 参与贡献 - -> PeterCat 使用 yarn 作为包管理器 - -```bash -git clone https://github.com/petercat-ai/petercat.git - -# 安装依赖 -yarn run bootstrap - -# 调试 client -yarn run client - -# 调试 assistant -yarn run assistant - -# 调试 server -yarn run server - -# 本地启动网站 -yarn run client:server - -# 本地启动 assistant 组件 -yarn run assistant:server - -# assistant 构建 -cd assistant -yarn run build -npm publish - -# docker 构建 -yarn run build:docker - -# pypi 构建 -yarn run build:pypi -yarn run publish:pypi - -``` - - -## 💼 企业版接入 - -请把您的项目地址,使用场景,使用频率等信息发送至 [petercat.assistant@gmail.com](petercat.assistant@gmail.com) -或者扫码加入我们的交流群 - -| ![钉钉群](https://github.com/user-attachments/assets/0b46736c-26d3-49c2-95c0-b033173a3a2d) | ![GitHub](https://github.com/user-attachments/assets/b5d3b1a7-c751-4937-8480-55b95614c057) | -|:--------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------:| - - -## 📧 反馈问题 - -猫猫还在养成阶段,难免有些 “小脾气”,遇到问题请对它宽容一些,可以通过以下两种途径告知铲屎官: - - -* [提交 Issue](https://github.com/petercat-ai/petercat/issues/new/choose) -* [Discussions](https://github.com/petercat-ai/petercat/discussions) 提问 - -## 👬 Contributors - -![https://github.com/petercat-ai/petercat/graphs/contributors](https://contrib.rocks/image?repo=petercat-ai/petercat) - -## 💗 Sponsor - -[Ant Design](https://ant.design/) - -## 📄 License - -MIT@[PeterCat](https://github.com/petercat-ai/petercat/blob/main/LICENSE) diff --git a/petercat_utils/__init__.py b/petercat_utils/__init__.py deleted file mode 100644 index 24c24049..00000000 --- a/petercat_utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from .db.client.supabase import get_client -from .rag_helper import github_file_loader, retrieval, issue_retrieval, task, git_task, git_issue_task, git_doc_task -from .utils.env import get_env_variable - -__all__ = [ - "get_client", - "get_env_variable", - "github_file_loader", - "retrieval", - "issue_retrieval", - "task", - "git_task", - "git_issue_task", - "git_doc_task" -] diff --git a/petercat_utils/data_class.py b/petercat_utils/data_class.py index 8b0b7a99..c8cf238d 100644 --- a/petercat_utils/data_class.py +++ b/petercat_utils/data_class.py @@ -64,27 +64,6 @@ class S3Config(BaseModel): file_path: Optional[str] = None -class GitDocConfig(BaseModel): - repo_name: str - """File path of the documentation file. eg:'docs/blog/build-ghost.zh-CN.md'""" - file_path: Optional[str] = "" - branch: Optional[str] = "main" - commit_id: Optional[str] = "" - - -class RAGGitDocConfig(GitDocConfig): - bot_id: Optional[str] = "" - - -class GitIssueConfig(BaseModel): - repo_name: str - issue_id: str - - -class RAGGitIssueConfig(GitIssueConfig): - bot_id: str - - class AutoNameEnum(Enum): def _generate_next_value_(name, start, count, last_values): return name @@ -99,11 +78,6 @@ class TaskStatus(AutoNameEnum): ERROR = auto() -class TaskType(AutoNameEnum): - GIT_DOC = auto() - GIT_ISSUE = auto() - - class GitDocTaskNodeType(AutoNameEnum): TREE = auto() BLOB = auto() diff --git a/petercat_utils/rag_helper/git_doc_task.py b/petercat_utils/rag_helper/git_doc_task.py deleted file mode 100644 index 08f03e15..00000000 --- a/petercat_utils/rag_helper/git_doc_task.py +++ /dev/null @@ -1,163 +0,0 @@ -from typing import Optional - -from github import Github, Repository - -from .git_task import GitTask -from ..data_class import RAGGitDocConfig, TaskStatus, TaskType, GitDocTaskNodeType -from ..rag_helper import retrieval - -g = Github() - - -def get_path_sha(repo: Repository.Repository, sha: str, path: Optional[str] = None): - if not path: - return sha - else: - tree_data = repo.get_git_tree(sha) - for item in tree_data.tree: - if path.split("/")[0] == item.path: - return get_path_sha(repo, item.sha, "/".join(path.split("/")[1:])) - - -def add_rag_git_doc_task(config: RAGGitDocConfig, extra=None): - if extra is None: - extra = { - "node_type": None, - "from_task_id": None, - } - repo = g.get_repo(config.repo_name) - - commit_id = ( - config.commit_id - if config.commit_id - else repo.get_branch(config.branch).commit.sha - ) - if config.file_path == "" or config.file_path is None: - extra["node_type"] = GitDocTaskNodeType.TREE.value - - if not extra.get("node_type"): - content = repo.get_contents(config.file_path, ref=commit_id) - if isinstance(content, list): - extra["node_type"] = GitDocTaskNodeType.TREE.value - else: - extra["node_type"] = GitDocTaskNodeType.BLOB.value - - sha = get_path_sha(repo, commit_id, config.file_path) - doc_task = GitDocTask( - commit_id=commit_id, - sha=sha, - repo_name=config.repo_name, - node_type=extra["node_type"], - path=config.file_path, - ) - res = doc_task.save() - doc_task.send() - return res - - -class GitDocTask(GitTask): - node_type: GitDocTaskNodeType - - def __init__( - self, - commit_id, - node_type: GitDocTaskNodeType, - sha, - path, - repo_name, - status=TaskStatus.NOT_STARTED, - from_id=None, - id=None, - retry_count=0, - ): - super().__init__( - type=TaskType.GIT_DOC, - from_id=from_id, - id=id, - status=status, - repo_name=repo_name, - retry_count=retry_count, - ) - self.commit_id = commit_id - self.node_type = GitDocTaskNodeType(node_type) - self.sha = sha - self.path = path - - def extra_save_data(self): - data = { - "commit_id": self.commit_id, - "node_type": self.node_type.value, - "path": self.path, - "sha": self.sha, - } - return data - - def handle_tree_node(self): - repo = g.get_repo(self.repo_name) - tree_data = repo.get_git_tree(self.sha) - - task_list = list( - filter( - lambda item: item["path"].endswith(".md") - or item["node_type"] == GitDocTaskNodeType.TREE.value, - map( - lambda item: { - "repo_name": self.repo_name, - "commit_id": self.commit_id, - "status": TaskStatus.NOT_STARTED.value, - "node_type": (item.type + "").upper(), - "from_task_id": self.id, - "path": "/".join(filter(lambda s: s, [self.path, item.path])), - "sha": item.sha, - }, - tree_data.tree, - ), - ) - ) - - if len(task_list) > 0: - result = self.get_table().insert(task_list).execute() - - for record in result.data: - doc_task = GitDocTask( - id=record["id"], - commit_id=record["commit_id"], - sha=record["sha"], - repo_name=record["repo_name"], - node_type=record["node_type"], - path=record["path"], - ) - doc_task.send() - - return ( - self.get_table() - .update( - { - "metadata": { - "tree": list(map(lambda item: item.raw_data, tree_data.tree)), - }, - "status": TaskStatus.COMPLETED.value, - } - ) - .eq("id", self.id) - .execute() - ) - - def handle_blob_node(self): - retrieval.add_knowledge_by_doc( - RAGGitDocConfig( - repo_name=self.repo_name, - file_path=self.path, - commit_id=self.commit_id, - ) - ) - return self.update_status(TaskStatus.COMPLETED) - - def handle(self): - self.update_status(TaskStatus.IN_PROGRESS) - if self.node_type == GitDocTaskNodeType.TREE: - return self.handle_tree_node() - elif self.node_type == GitDocTaskNodeType.BLOB: - return self.handle_blob_node() - else: - raise ValueError(f"Unsupported node type [{self.node_type}]") diff --git a/petercat_utils/rag_helper/git_issue_task.py b/petercat_utils/rag_helper/git_issue_task.py deleted file mode 100644 index a8690422..00000000 --- a/petercat_utils/rag_helper/git_issue_task.py +++ /dev/null @@ -1,111 +0,0 @@ -from github import Github - -from .git_task import GitTask -from ..data_class import GitIssueTaskNodeType, TaskStatus, TaskType, RAGGitIssueConfig -from ..rag_helper import issue_retrieval - -g = Github() - - -def add_rag_git_issue_task(config: RAGGitIssueConfig): - g.get_repo(config.repo_name) - - issue_task = GitIssueTask( - issue_id="", - node_type=GitIssueTaskNodeType.REPO, - bot_id=config.bot_id, - repo_name=config.repo_name, - ) - res = issue_task.save() - issue_task.send() - - return res - - -class GitIssueTask(GitTask): - issue_id: str - node_type: GitIssueTaskNodeType - - def __init__( - self, - issue_id, - node_type: GitIssueTaskNodeType, - bot_id, - repo_name, - status=TaskStatus.NOT_STARTED, - from_id=None, - id=None, - retry_count=0, - ): - super().__init__( - bot_id=bot_id, - type=TaskType.GIT_ISSUE, - from_id=from_id, - id=id, - status=status, - repo_name=repo_name, - retry_count=retry_count, - ) - self.issue_id = issue_id - self.node_type = GitIssueTaskNodeType(node_type) - - def extra_save_data(self): - return { - "issue_id": self.issue_id, - "node_type": self.node_type.value, - } - - def handle(self): - self.update_status(TaskStatus.IN_PROGRESS) - if self.node_type == GitIssueTaskNodeType.REPO: - return self.handle_repo_node() - elif self.node_type == GitIssueTaskNodeType.ISSUE: - return self.handle_issue_node() - else: - raise ValueError(f"Unsupported node type [{self.node_type}]") - - def handle_repo_node(self): - repo = g.get_repo(self.repo_name) - repo.get_issues() - issues = [issue for issue in repo.get_issues()] - task_list = list( - map( - lambda item: { - "repo_name": self.repo_name, - "issue_id": str(item.number), - "status": TaskStatus.NOT_STARTED.value, - "node_type": GitIssueTaskNodeType.ISSUE.value, - "from_task_id": self.id, - "bot_id": self.bot_id, - }, - issues, - ), - ) - if len(task_list) > 0: - result = self.get_table().insert(task_list).execute() - for record in result.data: - issue_task = GitIssueTask( - id=record["id"], - issue_id=record["issue_id"], - repo_name=record["repo_name"], - node_type=record["node_type"], - bot_id=record["bot_id"], - status=record["status"], - from_id=record["from_task_id"], - ) - issue_task.send() - - return ( - self.get_table() - .update({"status": TaskStatus.COMPLETED.value}) - .eq("id", self.id) - .execute() - ) - - def handle_issue_node(self): - issue_retrieval.add_knowledge_by_issue( - RAGGitIssueConfig( - repo_name=self.repo_name, bot_id=self.bot_id, issue_id=self.issue_id - ) - ) - return self.update_status(TaskStatus.COMPLETED) diff --git a/petercat_utils/rag_helper/git_task.py b/petercat_utils/rag_helper/git_task.py deleted file mode 100644 index 8b37f152..00000000 --- a/petercat_utils/rag_helper/git_task.py +++ /dev/null @@ -1,100 +0,0 @@ -import json -from abc import ABC, abstractmethod - -import boto3 - -from ..data_class import TaskStatus, TaskType -from ..db.client.supabase import get_client -from ..utils.env import get_env_variable - -sqs = boto3.client("sqs") - -TABLE_NAME_MAP = {TaskType.GIT_DOC: "rag_tasks", TaskType.GIT_ISSUE: "git_issue_tasks"} -SQS_QUEUE_URL = get_env_variable("SQS_QUEUE_URL") - - -# Base GitTask Class -class GitTask(ABC): - type: TaskType - - def __init__( - self, - type, - repo_name, - status=TaskStatus.NOT_STARTED, - from_id=None, - id=None, - retry_count=0, - ): - self.type = type - self.id = id - self.from_id = from_id - self.status = status - self.repo_name = repo_name - self.retry_count = retry_count - - @staticmethod - def get_table_name(type: TaskType): - return TABLE_NAME_MAP[type] - - @property - def table_name(self): - return GitTask.get_table_name(self.type) - - @property - def raw_data(self): - data = { - **self.extra_save_data(), - "repo_name": self.repo_name, - "from_task_id": self.from_id, - "status": self.status.value, - } - return data - - def get_table(self): - supabase = get_client() - return supabase.table(self.table_name) - - def update_status(self, status: TaskStatus): - return ( - self.get_table() - .update({"status": status.value}) - .eq("id", self.id) - .execute() - ) - - def save(self): - res = self.get_table().insert(self.raw_data).execute() - self.id = res.data[0]["id"] - return res - - @abstractmethod - def extra_save_data(self): - pass - - @abstractmethod - def handle(self): - pass - - def send(self): - assert self.id, "Task ID needed, save it first" - assert self.type, "Task type needed, set it first" - - response = sqs.send_message( - QueueUrl=SQS_QUEUE_URL, - DelaySeconds=10, - MessageBody=( - json.dumps( - { - "task_id": self.id, - "task_type": self.type.value, - "retry_count": self.retry_count, - } - ) - ), - ) - message_id = response["MessageId"] - print( - f"task_id={self.id}, task_type={self.type.value}, message_id={message_id}, retry_count={self.retry_count}" - ) - return message_id diff --git a/petercat_utils/rag_helper/github_file_loader.py b/petercat_utils/rag_helper/github_file_loader.py deleted file mode 100644 index 59c0eff5..00000000 --- a/petercat_utils/rag_helper/github_file_loader.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -This file was originally sourced from the https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/document_loaders/github.py -and it has been modified based on the requirements provided by petercat. -""" - -import base64 -from typing import Callable, Dict, List, Optional -from github import Github -from langchain_core.documents import Document - -class GithubFileLoader: - repo: str - github: Github - """Load GitHub File""" - file_path: str - branch: str = "main" - file_filter: Optional[Callable[[str], bool]] - commit_id: str = None - file_sha: str = None - github_api_url: str = "https://api.github.com" - - def __init__(self, **data: Dict): - self.repo = data["repo"] - self.file_path = data["file_path"] - self.branch = data["branch"] - self.file_filter = data["file_filter"] - self.github = Github() - if "commit_id" in data and data["commit_id"]: - self.commit_id = data["commit_id"] - else: - self.commit_id = self.get_commit_id_by_branch(self.branch) - - def get_commit_id_by_branch(self, branch: str) -> str: - repo = self.github.get_repo(self.repo) - branch_info = repo.get_branch(branch) - return branch_info.commit.sha - - def get_file_content_by_path(self, path: str) -> str: - repo = self.github.get_repo(self.repo) - file_content = repo.get_contents(path, ref=self.commit_id) - self.file_sha = file_content.sha - return base64.b64decode(file_content.content).decode("utf-8") - - def load(self) -> List[Document]: - content = self.get_file_content_by_path(self.file_path) - metadata = { - "path": self.file_path, - "source": f"{self.github_api_url}/{self.repo}/blob/" - f"{self.branch}/{self.file_path}", - } - return [Document(page_content=content, metadata=metadata)] diff --git a/petercat_utils/rag_helper/issue_retrieval.py b/petercat_utils/rag_helper/issue_retrieval.py deleted file mode 100644 index defb843d..00000000 --- a/petercat_utils/rag_helper/issue_retrieval.py +++ /dev/null @@ -1,114 +0,0 @@ -from typing import Any - -from github import Github, IssueComment -from github.Issue import Issue -from langchain_community.vectorstores import SupabaseVectorStore -from langchain_core.documents import Document -from langchain_openai import OpenAIEmbeddings -from petercat_utils import get_client -from petercat_utils.data_class import RAGGitIssueConfig - -g = Github() - -TABLE_NAME = "rag_issues" -QUERY_NAME = "match_rag_issues" -CHUNK_SIZE = 2000 -CHUNK_OVERLAP = 200 - -reaction_scores_map = { - '+1': 1, - '-1': -1, - 'confused': -0.5, - 'eyes': 0.5, - 'heart': 2, - 'hooray': 1.5, - 'laugh': 1, - 'rocket': 1, -} - - -def supabase_embedding(documents, **kwargs: Any): - from langchain_text_splitters import CharacterTextSplitter - - try: - text_splitter = CharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP - ) - docs = text_splitter.split_documents(documents) - embeddings = OpenAIEmbeddings() - vector_store = SupabaseVectorStore.from_documents( - docs, - embeddings, - client=get_client(), - table_name=TABLE_NAME, - query_name=QUERY_NAME, - chunk_size=CHUNK_SIZE, - **kwargs, - ) - return vector_store - except Exception as e: - print(e) - return None - - -def get_reactions_score(comment: IssueComment): - reactions = comment.reactions - score = 0 - for key in reaction_scores_map: - score += reactions[key] - return score - - -def get_issue_document_list(issue: Issue): - all_comments = sorted([{ - "id": comment.id, - "url": comment.url, - "html_url": comment.html_url, - "content": comment.body, - "reaction_score": get_reactions_score(comment) - } for comment in issue.get_comments()], - key=lambda x: x['reaction_score'], - reverse=True) - document_list = [Document(page_content=comment["content"], - metadata={key: value for key, value in comment.items() if key != "content"} - ) for - comment in all_comments if comment['reaction_score'] > 0] - return document_list - - -def add_knowledge_by_issue(config: RAGGitIssueConfig): - supabase = get_client() - is_added_query = ( - supabase.table(TABLE_NAME) - .select("id, repo_name, issue_id, bot_id") - .eq("repo_name", config.repo_name) - .eq("issue_id", config.issue_id) - .eq("bot_id", config.bot_id) - .eq("comment_id", None) - .execute() - ) - - if not is_added_query.data: - issue = g.get_repo(config.repo_name).get_issue(int(config.issue_id)) - document_list = get_issue_document_list(issue) - - issue_store = supabase_embedding( - documents=[Document(page_content=issue.body, - metadata={"id": config.issue_id, "url": issue.url, "html_url": issue.html_url})], - repo_name=config.repo_name, - issue_id=config.issue_id, - bot_id=config.bot_id, - ) - comment_stores = [ - supabase_embedding( - documents=[document], - repo_name=config.repo_name, - issue_id=config.issue_id, - bot_id=config.bot_id, - comment_id=document.metadata["id"] - ) - for document in document_list - ] - return issue_store + comment_stores - else: - return True diff --git a/petercat_utils/rag_helper/retrieval.py b/petercat_utils/rag_helper/retrieval.py deleted file mode 100644 index 1cb889e1..00000000 --- a/petercat_utils/rag_helper/retrieval.py +++ /dev/null @@ -1,210 +0,0 @@ -import json -from typing import Any, Dict - -from langchain_community.vectorstores import SupabaseVectorStore -from langchain_openai import OpenAIEmbeddings - -from .github_file_loader import GithubFileLoader -from ..data_class import GitDocConfig, RAGGitDocConfig, S3Config -from ..db.client.supabase import get_client - -TABLE_NAME = "rag_docs" -QUERY_NAME = "match_embedding_docs" -CHUNK_SIZE = 2000 -CHUNK_OVERLAP = 200 - - -def convert_document_to_dict(document): - return document.page_content - - -def init_retriever(search_kwargs): - embeddings = OpenAIEmbeddings() - vector_store = SupabaseVectorStore( - embedding=embeddings, - client=get_client(), - table_name=TABLE_NAME, - query_name=QUERY_NAME, - chunk_size=CHUNK_SIZE, - ) - - return vector_store.as_retriever(search_kwargs=search_kwargs) - - -def init_s3_Loader(config: S3Config): - from langchain_community.document_loaders import S3DirectoryLoader - - loader = S3DirectoryLoader(config.s3_bucket, prefix=config.file_path) - return loader - - -# TODO init_github_issue_loader -# def init_github_issue_loader(config: GitIssueConfig): -# from langchain_community.document_loaders import GitHubIssuesLoader - -# loader = GitHubIssuesLoader( -# repo=config.repo_name, -# access_token=ACCESS_TOKEN, -# page=config.page, -# per_page=config.per_page, -# state=config.state -# ) -# return loader - - -def init_github_file_loader(config: GitDocConfig): - loader = GithubFileLoader( - repo=config.repo_name, - branch=config.branch, - file_path=config.file_path, - file_filter=lambda file_path: file_path.endswith(".md"), - commit_id=config.commit_id, - ) - return loader - - -def supabase_embedding(documents, **kwargs: Any): - from langchain_text_splitters import CharacterTextSplitter - - try: - text_splitter = CharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP - ) - docs = text_splitter.split_documents(documents) - embeddings = OpenAIEmbeddings() - vector_store = SupabaseVectorStore.from_documents( - docs, - embeddings, - client=get_client(), - table_name=TABLE_NAME, - query_name=QUERY_NAME, - chunk_size=CHUNK_SIZE, - **kwargs, - ) - return vector_store - except Exception as e: - print(e) - return None - - -# TODO this feature is not implemented yet -# def add_knowledge_by_issues(config: GitIssueConfig): -# try: -# loader = init_github_issue_loader(config) -# documents = loader.load() -# store = supabase_embedding(documents, repo_name=config.repo_name) -# if (store): -# return json.dumps({ -# "success": True, -# "message": "Knowledge added successfully!", -# }) -# else: -# return json.dumps({ -# "success": False, -# "message": "Knowledge not added!" -# }) -# except Exception as e: -# return json.dumps({ -# "success": False, -# "message": str(e) -# }) - - -def add_knowledge_by_doc(config: RAGGitDocConfig): - loader = init_github_file_loader(config) - documents = loader.load() - supabase = get_client() - is_doc_added_query = ( - supabase.table(TABLE_NAME) - .select("id") - .eq("repo_name", config.repo_name) - .eq("commit_id", loader.commit_id) - .eq("file_path", config.file_path) - .limit(1) - .execute() - ) - if not is_doc_added_query.data: - is_doc_equal_query = ( - supabase.table(TABLE_NAME).select("id").eq("file_sha", loader.file_sha).limit(1) - ).execute() - if not is_doc_equal_query.data: - # If there is no file with the same file_sha, perform embedding. - store = supabase_embedding( - documents, - repo_name=config.repo_name, - commit_id=loader.commit_id, - file_sha=loader.file_sha, - file_path=config.file_path, - ) - return store - else: - # Prioritize obtaining the minimal set of records to avoid overlapping with the original records. - minimum_repeat_result = supabase.rpc('count_rag_docs_by_sha', {'file_sha_input': loader.file_sha}).execute() - target_filter = minimum_repeat_result.data[0] - # Copy the minimal set - insert_docs = ( - supabase.table(TABLE_NAME) - .select("*") - .eq("repo_name", target_filter['repo_name']) - .eq("file_path", target_filter['file_path']) - .eq("file_sha", target_filter['file_sha']) - .execute() - ) - new_commit_list = [ - { - **{k: v for k, v in item.items() if k != "id"}, - "repo_name": config.repo_name, - "commit_id": loader.commit_id, - "file_path": config.file_path, - } - for item in insert_docs.data - ] - insert_result = supabase.table(TABLE_NAME).insert(new_commit_list).execute() - return insert_result - else: - return True - - -def reload_knowledge(config: RAGGitDocConfig): - loader = init_github_file_loader(config) - documents = loader.load() - # TODO:检查历史的文件会如何处理?是否需要手动删除? - store = supabase_embedding( - documents, - repo_name=config.repo_name, - commit_id=loader.commit_id, - file_sha=loader.file_sha, - file_path=config.file_path, - ) - return store - - -def search_knowledge( - query: str, - repo_name: str, - meta_filter: Dict[str, Any] = {}, -): - retriever = init_retriever( - {"filter": {"metadata": meta_filter, "repo_name": repo_name}} - ) - docs = retriever.invoke(query) - documents_as_dicts = [convert_document_to_dict(doc) for doc in docs] - json_output = json.dumps(documents_as_dicts, ensure_ascii=False) - return json_output - - -def get_chunk_list(repo_name: str, page_size: int, page_number: int): - client = get_client() - query = ( - client.table(TABLE_NAME) - .select("id, content, file_path,update_timestamp") - .eq("repo_name", repo_name) - .limit(page_size) - .offset((page_number - 1) * page_size) - .execute() - ) - count_response = ( - client.table(TABLE_NAME).select("id").eq("repo_name", repo_name).execute() - ) - total_count = len(count_response.data) - return {"rows": query.data, "total": total_count} diff --git a/petercat_utils/rag_helper/task.py b/petercat_utils/rag_helper/task.py deleted file mode 100644 index c9e8ab67..00000000 --- a/petercat_utils/rag_helper/task.py +++ /dev/null @@ -1,87 +0,0 @@ -import json -from typing import Optional -from github import Github - -import boto3 - -from .git_doc_task import GitDocTask -from .git_issue_task import GitIssueTask -from .git_task import GitTask - -from ..utils.env import get_env_variable -from ..data_class import TaskStatus, TaskType -from ..db.client.supabase import get_client - -# Create SQS client -sqs = boto3.client("sqs") - - -g = Github() - -TABLE_NAME = "rag_tasks" - -SQS_QUEUE_URL = get_env_variable("SQS_QUEUE_URL") - - -def get_oldest_task(): - supabase = get_client() - - response = ( - supabase.table(TABLE_NAME) - .select("*") - .eq("status", TaskStatus.NOT_STARTED.value) - .order("created_at", desc=False) - .limit(1) - .execute() - ) - - return response.data[0] if (len(response.data) > 0) else None - - -def get_task_by_id(task_id): - supabase = get_client() - - response = supabase.table(TABLE_NAME).select("*").eq("id", task_id).execute() - return response.data[0] if (len(response.data) > 0) else None - - -def get_task(task_type: TaskType, task_id: str, retry_count=0) -> GitTask: - supabase = get_client() - response = ( - supabase.table(GitTask.get_table_name(task_type)) - .select("*") - .eq("id", task_id) - .execute() - ) - if len(response.data) > 0: - data = response.data[0] - if task_type == TaskType.GIT_DOC: - return GitDocTask( - id=data["id"], - commit_id=data["commit_id"], - sha=data["sha"], - repo_name=data["repo_name"], - node_type=data["node_type"], - path=data["path"], - status=data["status"], - from_id=data["from_task_id"], - retry_count=retry_count, - ) - if task_type == TaskType.GIT_ISSUE: - return GitIssueTask( - id=data["id"], - issue_id=data["issue_id"], - repo_name=data["repo_name"], - node_type=data["node_type"], - bot_id=data["bot_id"], - status=data["status"], - from_id=data["from_task_id"], - retry_count=retry_count, - ) - - -def trigger_task(task_type: TaskType, task_id: Optional[str], retry_count: int = 0): - task = get_task(task_type, task_id, retry_count) if task_id else get_oldest_task() - if task is None: - return task - return task.handle() diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 40232495..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[tool.poetry] -name = "petercat_utils" -version = "0.1.42" -description = "" -authors = ["raoha.rh "] -readme = "README.md" -packages = [{include = "petercat_utils"}] - -[tool.ruff] -builtins = ["_"] - -[pytest] -testpaths = ["tests"] -pythonpath = "." -consider_namespace_packages = "True" -python_files = "test_*.py" -cov="com" -cov-report=["xml","html"] -md_report = true -md_report_verbose = 0 -md_report_color = "auto" - -[tool.poetry.dependencies] -python = "^3.9" -langchain_community = ">=0.3,<0.4" -langchain_openai = "0.3.3" -langchain_core = ">=0.3,<0.4" -langchain = ">=0.3,<0.4" -supabase = "2.6.0" -pydantic = ">=2.7.0,<3" -PyGithub = "2.3.0" -python-dotenv = "1.0.0" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/server/.env.example b/server/.env.example index b5e86f1f..4700ae5d 100644 --- a/server/.env.example +++ b/server/.env.example @@ -37,7 +37,6 @@ AUTH0_CLIENT_ID=auth0_client_id AUTH0_CLIENT_SECRET=auth0_client_secret # OPTIONAL - AWS Configures -SQS_QUEUE_URL=https://sqs.ap-northeast-1.amazonaws.com/{your_aws_user}/{your_aws_sqs_message} X_GITHUB_SECRET_NAME="prod/githubapp/petercat/pem" STATIC_SECRET_NAME="prod/petercat/static" LLM_TOKEN_SECRET_NAME="prod/petercat/llm" diff --git a/server/.env.local.example b/server/.env.local.example index a0354e2d..d8023226 100644 --- a/server/.env.local.example +++ b/server/.env.local.example @@ -38,7 +38,6 @@ AUTH0_CLIENT_ID=auth0_client_id AUTH0_CLIENT_SECRET=auth0_client_secret # OPTIONAL - AWS Configures -SQS_QUEUE_URL=https://sqs.ap-northeast-1.amazonaws.com/{your_aws_user}/{your_aws_sqs_message} X_GITHUB_SECRET_NAME="prod/githubapp/petercat/pem" STATIC_SECRET_NAME="prod/petercat/static" LLM_TOKEN_SECRET_NAME="prod/petercat/llm" diff --git a/server/agent/base.py b/server/agent/base.py index 600215f6..f21b8a90 100644 --- a/server/agent/base.py +++ b/server/agent/base.py @@ -4,7 +4,6 @@ from langchain.agents import AgentExecutor from openai import APIError from agent.llm import BaseLLMClient -from petercat_utils.data_class import ChatData, Message from langchain.agents.format_scratchpad.openai_tools import ( format_to_openai_tool_messages, ) @@ -19,7 +18,8 @@ from langchain_core.prompts import ChatPromptTemplate from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper from langchain_community.tools.tavily_search.tool import TavilySearchResults -from petercat_utils import get_env_variable +from core.type_class.data_class import ChatData, Message +from utils.env import get_env_variable TAVILY_API_KEY = get_env_variable("TAVILY_API_KEY") diff --git a/server/agent/bot/bot_builder.py b/server/agent/bot/bot_builder.py index 0c126577..428d7843 100644 --- a/server/agent/bot/bot_builder.py +++ b/server/agent/bot/bot_builder.py @@ -1,6 +1,6 @@ from typing import AsyncIterator from agent.llm.clients.openai import OpenAIClient -from petercat_utils.data_class import ChatData +from core.type_class.data_class import ChatData from agent.base import AgentBuilder, dict_to_sse from agent.prompts.bot_builder import generate_prompt_by_user_id diff --git a/server/agent/bot/get_bot.py b/server/agent/bot/get_bot.py index fee4f8f7..145ffd03 100644 --- a/server/agent/bot/get_bot.py +++ b/server/agent/bot/get_bot.py @@ -1,4 +1,3 @@ - from typing import Annotated from fastapi import Depends @@ -10,7 +9,7 @@ from core.models.user import User from core.service.user_llm_token import UserLLMTokenService, get_llm_token_service -from petercat_utils.data_class import ChatData +from core.type_class.data_class import ChatData def get_bot( input_data: ChatData, @@ -38,4 +37,4 @@ def get_bot_by_id(bot_id: str) -> Bot: bot = bot_dao.get_bot(bot_id) llm_token = llm_token_dao.get_llm_token(bot.llm) - return Bot(bot=bot, llm_token=llm_token) \ No newline at end of file + return Bot(bot=bot, llm_token=llm_token) diff --git a/server/agent/llm/base.py b/server/agent/llm/base.py index b4e3d475..6b5f2b13 100644 --- a/server/agent/llm/base.py +++ b/server/agent/llm/base.py @@ -1,8 +1,7 @@ from abc import abstractmethod from typing import Any, Dict, List, Optional from langchain_core.language_models import BaseChatModel - -from petercat_utils.data_class import MessageContent +from core.type_class.data_class import MessageContent class BaseLLMClient: diff --git a/server/agent/llm/clients/deepseekr1_dashscope.py b/server/agent/llm/clients/deepseekr1_dashscope.py index 17445724..d0c24ac7 100644 --- a/server/agent/llm/clients/deepseekr1_dashscope.py +++ b/server/agent/llm/clients/deepseekr1_dashscope.py @@ -1,14 +1,11 @@ - - from typing import Any, List, Optional from langchain_openai import ChatOpenAI from langchain_core.utils.function_calling import convert_to_openai_tool from agent.llm import register_llm_client from agent.llm.base import BaseLLMClient - -from petercat_utils.data_class import MessageContent -from petercat_utils import get_env_variable +from core.type_class.data_class import MessageContent +from utils.env import get_env_variable DASHSCOPE_API_KEY = get_env_variable("DASHSCOPE_API_KEY") @@ -45,4 +42,4 @@ def get_tools(self, tools: List[Any]): return [convert_to_openai_tool(tool) for tool in tools] def parse_content(self, content: List[MessageContent]): - return [c.model_dump() for c in content] \ No newline at end of file + return [c.model_dump() for c in content] diff --git a/server/agent/llm/clients/deepseekv3_dashscope.py b/server/agent/llm/clients/deepseekv3_dashscope.py index 8e8b5fbf..42b6d4c8 100644 --- a/server/agent/llm/clients/deepseekv3_dashscope.py +++ b/server/agent/llm/clients/deepseekv3_dashscope.py @@ -1,5 +1,3 @@ - - from typing import Any, List, Optional from langchain_openai import ChatOpenAI from langchain_core.utils.function_calling import convert_to_openai_tool @@ -7,8 +5,8 @@ from agent.llm import register_llm_client from agent.llm.base import BaseLLMClient -from petercat_utils.data_class import MessageContent -from petercat_utils import get_env_variable +from core.type_class.data_class import MessageContent +from utils.env import get_env_variable DASHSCOPE_API_KEY = get_env_variable("DASHSCOPE_API_KEY") @@ -45,4 +43,4 @@ def get_tools(self, tools: List[Any]): return [convert_to_openai_tool(tool) for tool in tools] def parse_content(self, content: List[MessageContent]): - return [c.model_dump() for c in content] \ No newline at end of file + return [c.model_dump() for c in content] diff --git a/server/agent/llm/clients/gemini.py b/server/agent/llm/clients/gemini.py index f0b28830..3978709e 100644 --- a/server/agent/llm/clients/gemini.py +++ b/server/agent/llm/clients/gemini.py @@ -5,8 +5,8 @@ from agent.llm import register_llm_client from agent.llm.base import BaseLLMClient -from petercat_utils.data_class import ImageRawURLContentBlock, MessageContent -from petercat_utils.utils.env import get_env_variable +from core.type_class.data_class import ImageRawURLContentBlock, MessageContent +from utils.env import get_env_variable GEMINI_API_KEY = get_env_variable("GEMINI_API_KEY") diff --git a/server/agent/llm/clients/openai.py b/server/agent/llm/clients/openai.py index 5c722c0e..d428de71 100644 --- a/server/agent/llm/clients/openai.py +++ b/server/agent/llm/clients/openai.py @@ -4,8 +4,8 @@ from agent.llm import register_llm_client from agent.llm.base import BaseLLMClient -from petercat_utils.data_class import MessageContent -from petercat_utils.utils.env import get_env_variable +from core.type_class.data_class import MessageContent +from utils.env import get_env_variable OPEN_API_KEY = get_env_variable("OPENAI_API_KEY") diff --git a/server/agent/qa_chat.py b/server/agent/qa_chat.py index c9b5caac..d152dc9f 100644 --- a/server/agent/qa_chat.py +++ b/server/agent/qa_chat.py @@ -3,7 +3,7 @@ from agent.base import AgentBuilder from agent.bot import Bot -from petercat_utils.data_class import ChatData +from core.type_class.data_class import ChatData from agent.tools import issue, pull_request, auth, sourcecode, knowledge, git_info diff --git a/server/agent/tools/bot_builder.py b/server/agent/tools/bot_builder.py index 50773d6c..4fc812ea 100644 --- a/server/agent/tools/bot_builder.py +++ b/server/agent/tools/bot_builder.py @@ -2,9 +2,9 @@ from fastapi.responses import JSONResponse from langchain.tools import tool from github import Github -from petercat_utils import get_client from bot.builder import bot_builder +from utils.supabase import get_client g = Github() diff --git a/server/agent/tools/knowledge.py b/server/agent/tools/knowledge.py index a920ccd3..332c7f70 100644 --- a/server/agent/tools/knowledge.py +++ b/server/agent/tools/knowledge.py @@ -1,13 +1,17 @@ +import json +from httpx import ReadTimeout +from whiskerrag_client import APIClient +from whiskerrag_types.model import RetrievalBySpaceRequest, EmbeddingModelEnum from agent.bot.get_bot import get_bot_by_id from langchain.tools import tool -from petercat_utils import retrieval +from utils.env import get_env_variable def factory(bot_id: str): bot_id = bot_id @tool(parse_docstring=True) - def search_knowledge( + async def search_knowledge( query: str, ) -> str: """Search for information based on the query. When use this tool, do not translate the search query. Use the original query language to search. eg: When user's question is 'Ant Design 有哪些新特性?', the query should be 'Ant Design 有哪些新特性?'. @@ -18,7 +22,29 @@ def search_knowledge( try: bot = get_bot_by_id(bot_id) repo_name = bot.repo_name if bot.repo_name else "" - return retrieval.search_knowledge(query, repo_name) + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + timeout=30, + ) + retrieval_res = await api_client.retrieval.retrieve_space_content( + RetrievalBySpaceRequest( + space_id_list=[bot_id, repo_name], + question=query, + embedding_model_name=EmbeddingModelEnum.OPENAI, + similarity_threshold=0.65, + top=10, + metadata_filter={}, + ) + ) + text = json.dumps( + [chunk.context for chunk in retrieval_res if retrieval_res], + ensure_ascii=False, + ) + return text + except ReadTimeout: + print("TimeoutError: The request timed out.") + return None except Exception as e: print(f"An error occurred: {e}") return None diff --git a/server/auth/clients/__init__.py b/server/auth/clients/__init__.py index a5ad37d3..6f38148b 100644 --- a/server/auth/clients/__init__.py +++ b/server/auth/clients/__init__.py @@ -2,7 +2,7 @@ from auth.clients.base import BaseAuthClient from auth.clients.local import LocalClient -from petercat_utils import get_env_variable +from utils.env import get_env_variable PETERCAT_AUTH0_ENABLED = get_env_variable("PETERCAT_AUTH0_ENABLED", "True") == "True" diff --git a/server/auth/clients/auth0.py b/server/auth/clients/auth0.py index 952050c0..23d1c25c 100644 --- a/server/auth/clients/auth0.py +++ b/server/auth/clients/auth0.py @@ -4,7 +4,7 @@ from fastapi import Request from auth.clients.base import BaseAuthClient -from petercat_utils import get_env_variable +from utils.env import get_env_variable from starlette.config import Config from authlib.integrations.starlette_client import OAuth @@ -106,4 +106,4 @@ async def get_access_token(self, user_id: str, provider="github"): ), None, ) - return identity["access_token"] \ No newline at end of file + return identity["access_token"] diff --git a/server/auth/clients/local.py b/server/auth/clients/local.py index 45950960..6ea70fea 100644 --- a/server/auth/clients/local.py +++ b/server/auth/clients/local.py @@ -2,7 +2,7 @@ from fastapi import Request from fastapi.responses import RedirectResponse from core.dao.profilesDAO import ProfilesDAO -from petercat_utils import get_env_variable +from utils.env import get_env_variable from auth.clients.base import BaseAuthClient PETERCAT_LOCAL_UID = get_env_variable("PETERCAT_LOCAL_UID") diff --git a/server/auth/get_user_info.py b/server/auth/get_user_info.py index 168f6c07..2adb53dc 100644 --- a/server/auth/get_user_info.py +++ b/server/auth/get_user_info.py @@ -3,7 +3,7 @@ from auth.clients.base import BaseAuthClient from core.models.user import User -from petercat_utils import get_env_variable +from utils.env import get_env_variable AUTH0_DOMAIN = get_env_variable("AUTH0_DOMAIN") diff --git a/server/auth/middleware.py b/server/auth/middleware.py index 73182eea..69874a77 100644 --- a/server/auth/middleware.py +++ b/server/auth/middleware.py @@ -12,6 +12,7 @@ ALLOW_LIST = [ "/", + "/token", "/favicon.ico", "/api/health_checker", "/api/bot/list", diff --git a/server/auth/rate_limit.py b/server/auth/rate_limit.py index da57de95..77512ccc 100644 --- a/server/auth/rate_limit.py +++ b/server/auth/rate_limit.py @@ -4,15 +4,22 @@ from auth.clients import get_auth_client from auth.clients.base import BaseAuthClient -from petercat_utils import get_client, get_env_variable from auth.get_user_info import get_user_id +from utils.env import get_env_variable +from utils.supabase import get_client -RATE_LIMIT_ENABLED = get_env_variable("RATE_LIMIT_ENABLED", "False") == 'True' +RATE_LIMIT_ENABLED = get_env_variable("RATE_LIMIT_ENABLED", "False") == "True" RATE_LIMIT_REQUESTS = get_env_variable("RATE_LIMIT_REQUESTS") or 100 -RATE_LIMIT_DURATION = timedelta(minutes=int(get_env_variable("RATE_LIMIT_DURATION") or 1)) +RATE_LIMIT_DURATION = timedelta( + minutes=int(get_env_variable("RATE_LIMIT_DURATION") or 1) +) -async def verify_rate_limit(user_id: Optional[str] = Depends(get_user_id), auth_client: BaseAuthClient = Depends(get_auth_client)): + +async def verify_rate_limit( + user_id: Optional[str] = Depends(get_user_id), + auth_client: BaseAuthClient = Depends(get_auth_client), +): if not RATE_LIMIT_ENABLED: return @@ -22,36 +29,44 @@ async def verify_rate_limit(user_id: Optional[str] = Depends(get_user_id), auth_ if user is None: raise HTTPException( - status_code=429, + status_code=429, detail="Rate Limit Exceeded, Try It Later", - headers={"Retry-After": "60"} + headers={"Retry-After": "60"}, ) user_id = user["id"] supabase = get_client() table = supabase.table("user_token_usage") - rows = table.select('id, user_id, last_request, request_count').eq('user_id', user_id).execute() + rows = ( + table.select("id, user_id, last_request, request_count") + .eq("user_id", user_id) + .execute() + ) now = datetime.now().isoformat() - user_usage = rows.data[0] if len(rows.data) > 0 else { "user_id": user_id, 'request_count': 0, 'last_request': now } + user_usage = ( + rows.data[0] + if len(rows.data) > 0 + else {"user_id": user_id, "request_count": 0, "last_request": now} + ) # Calculate the time elapsed since the last request elapsed_time = datetime.now() - datetime.fromisoformat(user_usage["last_request"]) - + if elapsed_time > RATE_LIMIT_DURATION: # If the elapsed time is greater than the rate limit duration, reset the count - user_usage['request_count'] = 1 + user_usage["request_count"] = 1 else: - if user_usage['request_count'] >= int(RATE_LIMIT_REQUESTS): + if user_usage["request_count"] >= int(RATE_LIMIT_REQUESTS): # If the request count exceeds the rate limit, return a JSON response with an error message raise HTTPException( - status_code=429, + status_code=429, detail="Rate Limit Exceeded, Try It Later", - headers={"Retry-After": "60"} + headers={"Retry-After": "60"}, ) - user_usage['request_count'] = int(user_usage['request_count']) + 1 - - user_usage['last_request'] = datetime.now().isoformat() + user_usage["request_count"] = int(user_usage["request_count"]) + 1 + + user_usage["last_request"] = datetime.now().isoformat() table.upsert(user_usage).execute() diff --git a/server/auth/router.py b/server/auth/router.py index 66ee8ac2..8e95b885 100644 --- a/server/auth/router.py +++ b/server/auth/router.py @@ -9,7 +9,8 @@ from auth.clients.base import BaseAuthClient from auth.get_user_info import get_user_id from core.dao.profilesDAO import ProfilesDAO -from petercat_utils import get_client, get_env_variable +from utils.env import get_env_variable +from utils.supabase import get_client API_URL = get_env_variable("API_URL") WEB_URL = get_env_variable("WEB_URL") @@ -27,28 +28,32 @@ responses={404: {"description": "Not found"}}, ) + @router.get("/login") -async def login(request: Request, auth_client = Depends(get_auth_client)): +async def login(request: Request, auth_client=Depends(get_auth_client)): return await auth_client.login(request) + @router.get("/logout") -async def logout(request: Request, auth_client = Depends(get_auth_client)): +async def logout(request: Request, auth_client=Depends(get_auth_client)): request.session.pop("user", None) redirect = request.query_params.get("redirect") return await auth_client.logout(request, redirect) @router.get("/callback") -async def callback(request: Request, auth_client: BaseAuthClient = Depends(get_auth_client)): +async def callback( + request: Request, auth_client: BaseAuthClient = Depends(get_auth_client) +): user_info = await auth_client.get_user_info(request) profiles_dao = ProfilesDAO() - profile = profiles_dao.get_profile(user_info['id']) + profile = profiles_dao.get_profile(user_info["id"]) if user_info: upsert_user = { **user_info, - 'agreement_accepted': profile['agreement_accepted'] if profile else False, - 'is_admin': profile['is_admin'] if profile else False, + "agreement_accepted": profile["agreement_accepted"] if profile else False, + "is_admin": profile["is_admin"] if profile else False, } request.session["user"] = dict(upsert_user) @@ -58,7 +63,9 @@ async def callback(request: Request, auth_client: BaseAuthClient = Depends(get_a @router.get("/userinfo") -async def userinfo(request: Request, auth_client: BaseAuthClient = Depends(get_auth_client)): +async def userinfo( + request: Request, auth_client: BaseAuthClient = Depends(get_auth_client) +): user = request.session.get("user") if not user: data = await auth_client.anonymouseLogin(request) @@ -84,8 +91,8 @@ async def get_agreement_status(user_id: Optional[str] = Depends(get_user_id)): @router.post("/accept/agreement", status_code=200) async def bot_generator( - request: Request, - user_id: Annotated[str | None, Depends(get_user_id)] = None, + request: Request, + user_id: Annotated[str | None, Depends(get_user_id)] = None, ): if not user_id: raise HTTPException(status_code=401, detail="User not found") @@ -102,7 +109,10 @@ async def bot_generator( @router.get("/repos") -async def get_user_repos(user_id: Optional[str] = Depends(get_user_id), auth_client: BaseAuthClient = Depends(get_auth_client)): +async def get_user_repos( + user_id: Optional[str] = Depends(get_user_id), + auth_client: BaseAuthClient = Depends(get_auth_client), +): if not user_id: raise HTTPException(status_code=401, detail="User not found") try: diff --git a/server/aws/constants.py b/server/aws/constants.py index 697862e0..57acd22b 100644 --- a/server/aws/constants.py +++ b/server/aws/constants.py @@ -1,4 +1,5 @@ -from petercat_utils.utils.env import get_env_variable +from utils.env import get_env_variable + SUCCESS_CODE = "UPLOAD_SUCCESS" ERROR_CODES = {"credentials_error": "CREDENTIALS_ERROR", "upload_error": "UPLOAD_ERROR"} diff --git a/server/aws/service.py b/server/aws/service.py index fdc82133..e3a79ceb 100644 --- a/server/aws/service.py +++ b/server/aws/service.py @@ -1,7 +1,7 @@ import base64 import hashlib from botocore.signers import CloudFrontSigner -from petercat_utils import get_env_variable +from utils.env import get_env_variable import rsa from datetime import datetime, timedelta diff --git a/server/bot/builder.py b/server/bot/builder.py index 6b96983a..ad6ade1a 100644 --- a/server/bot/builder.py +++ b/server/bot/builder.py @@ -1,11 +1,18 @@ from typing import List, Optional from github import Github -from petercat_utils import get_client -from petercat_utils.data_class import RAGGitDocConfig -from petercat_utils import git_doc_task - +from core.models.user import User +from whiskerrag_client import APIClient +from whiskerrag_types.model import ( + KnowledgeCreate, + KnowledgeSplitConfig, + KnowledgeSourceEnum, + KnowledgeTypeEnum, + GithubRepoSourceConfig, +) from agent.prompts.bot_template import generate_prompt_by_repo_name +from utils.env import get_env_variable +from utils.supabase import get_client g = Github() @@ -44,24 +51,8 @@ async def bot_info_generator( return None -def trigger_rag_task(repo_name: str, bot_id: str): - try: - repo = g.get_repo(repo_name) - default_branch = repo.default_branch - config = RAGGitDocConfig( - repo_name=repo_name, - branch=default_branch, - bot_id=bot_id, - file_path="", - commit_id="", - ) - git_doc_task.add_rag_git_doc_task(config) - except Exception as e: - print(f"trigger_rag_task error: {e}") - - async def bot_builder( - uid: str, + user: User, repo_name: str, starters: Optional[List[str]] = None, hello_message: Optional[str] = None, @@ -75,14 +66,38 @@ async def bot_builder( :param hello_message: The hello message of the bot """ try: - bot_data = await bot_info_generator(uid, repo_name, starters, hello_message) + bot_data = await bot_info_generator( + user.sub, repo_name, starters, hello_message + ) if not bot_data: return None supabase = get_client() response = supabase.table("bots").insert(bot_data).execute() if response: - bot_id = response.data[0]["id"] - trigger_rag_task(repo_name=repo_name, bot_id=bot_id) + try: + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + ) + await api_client.knowledge.add_knowledge( + [ + KnowledgeCreate( + source_type=KnowledgeSourceEnum.GITHUB_REPO, + knowledge_type=KnowledgeTypeEnum.FOLDER, + space_id=repo_name, + knowledge_name=repo_name, + source_config=GithubRepoSourceConfig( + repo_name=repo_name, auth_token=user.access_token + ), + split_config=KnowledgeSplitConfig( + chunk_size=500, + chunk_overlap=100, + ), + ) + ] + ) + except Exception as e: + print(f"Add repo knowledge error: {e}") return response except Exception as e: print(f"An error occurred: {e}") diff --git a/server/bot/list.py b/server/bot/list.py index f4cf368e..b528d662 100644 --- a/server/bot/list.py +++ b/server/bot/list.py @@ -3,7 +3,7 @@ from github import Github, Auth from core.dao.repositoryConfigDAO import RepositoryConfigDAO from core.models.user import User -from petercat_utils import get_client +from utils.supabase import get_client def query_list( diff --git a/server/bot/router.py b/server/bot/router.py index 11963fb0..801cac52 100644 --- a/server/bot/router.py +++ b/server/bot/router.py @@ -9,11 +9,11 @@ from core.dao.repositoryConfigDAO import RepositoryConfigDAO from core.models.bot_approval import ApprovalStatus, BotApproval, TaskType from core.models.user import User -from petercat_utils import get_client from typing import Annotated, Optional from bot.builder import bot_builder, bot_info_generator from core.type_class.bot import BotDeployRequest, BotUpdateRequest, BotCreateRequest +from utils.supabase import get_client router = APIRouter( prefix="/api/bot", @@ -112,7 +112,7 @@ def get_bot_config( async def create_bot( request: Request, bot_data: BotCreateRequest, - user_id: Annotated[str | None, Depends(get_user_id)] = None, + user: Annotated[User | None, Depends(get_user)] = None, ): lang = bot_data.lang or "en" default_starters = [ @@ -127,7 +127,7 @@ async def create_bot( ) try: - res = await bot_builder(user_id, bot_data.repo_name, starters, hello_message) + res = await bot_builder(user, bot_data.repo_name, starters, hello_message) if not res: return JSONResponse( content={ diff --git a/server/chat/router.py b/server/chat/router.py index b8f6f7b6..b00eb167 100644 --- a/server/chat/router.py +++ b/server/chat/router.py @@ -7,12 +7,12 @@ from agent.bot.get_bot import get_bot from core.models.user import User from core.service.user_token_usage import create_token_recorder -from petercat_utils.data_class import ChatData from toolz import compose from agent import qa_chat from auth.rate_limit import verify_rate_limit from auth.get_user_info import get_user, get_user_id +from core.type_class.data_class import ChatData router = APIRouter( prefix="/api/chat", @@ -36,7 +36,6 @@ def run_qa_chat( user: Annotated[User | None, Depends(get_user)] = None, bot: Annotated[Bot | None, Depends(get_bot)] = None, ): - auth_token = ( Auth.Token(user.access_token) if getattr(user, "access_token", None) else None ) diff --git a/server/core/dao/botApprovalDAO.py b/server/core/dao/botApprovalDAO.py index a29c02aa..d8ed7381 100644 --- a/server/core/dao/botApprovalDAO.py +++ b/server/core/dao/botApprovalDAO.py @@ -2,7 +2,8 @@ from core.models.bot_approval import BotApproval from supabase.client import Client -from petercat_utils.db.client.supabase import get_client +from utils.supabase import get_client + class BotApprovalDAO(BaseDAO): diff --git a/server/core/dao/botDAO.py b/server/core/dao/botDAO.py index 845fe544..7c4eff6c 100644 --- a/server/core/dao/botDAO.py +++ b/server/core/dao/botDAO.py @@ -2,7 +2,7 @@ from supabase.client import Client from core.models.bot import BotModel -from petercat_utils.db.client.supabase import get_client +from utils.supabase import get_client class BotDAO(BaseDAO): diff --git a/server/core/dao/llmTokenDAO.py b/server/core/dao/llmTokenDAO.py index acee8267..18c73012 100644 --- a/server/core/dao/llmTokenDAO.py +++ b/server/core/dao/llmTokenDAO.py @@ -3,7 +3,7 @@ from typing import Optional from core.dao.BaseDAO import BaseDAO from supabase.client import Client -from petercat_utils.db.client.supabase import get_client +from utils.supabase import get_client from core.models.llm_token import LLMToken logger = logging.getLogger() diff --git a/server/core/dao/profilesDAO.py b/server/core/dao/profilesDAO.py index da5ebf0d..20f1c14a 100644 --- a/server/core/dao/profilesDAO.py +++ b/server/core/dao/profilesDAO.py @@ -1,8 +1,7 @@ from core.dao.BaseDAO import BaseDAO from supabase.client import Client -from petercat_utils.db.client.supabase import get_client - +from utils.supabase import get_client class ProfilesDAO(BaseDAO): client: Client diff --git a/server/core/dao/repositoryConfigDAO.py b/server/core/dao/repositoryConfigDAO.py index 8dec8421..ba919b5a 100644 --- a/server/core/dao/repositoryConfigDAO.py +++ b/server/core/dao/repositoryConfigDAO.py @@ -3,7 +3,8 @@ from core.models.bot import RepoBindBotConfigVO from core.models.repository import RepositoryConfig from supabase.client import Client -from petercat_utils.db.client.supabase import get_client + +from utils.supabase import get_client class RepositoryConfigDAO(BaseDAO): diff --git a/server/core/dao/userLLmTokenDAO.py b/server/core/dao/userLLmTokenDAO.py index ccc41e72..978b5683 100644 --- a/server/core/dao/userLLmTokenDAO.py +++ b/server/core/dao/userLLmTokenDAO.py @@ -2,8 +2,8 @@ from core.dao.BaseDAO import BaseDAO from supabase.client import Client -from petercat_utils.db.client.supabase import get_client from core.models.user_llm_token import UserLLMToken +from utils.supabase import get_client logger = logging.getLogger() logger.setLevel("INFO") @@ -52,4 +52,4 @@ def delete(self, llm_token: UserLLMToken): self.client.table("user_llm_tokens") \ .delete() \ .eq("id", llm_token.id) \ - .execute() \ No newline at end of file + .execute() diff --git a/server/core/dao/userTokenUsageDAO.py b/server/core/dao/userTokenUsageDAO.py index 3dfaf303..33f782b2 100644 --- a/server/core/dao/userTokenUsageDAO.py +++ b/server/core/dao/userTokenUsageDAO.py @@ -1,9 +1,15 @@ from datetime import datetime from supabase.client import Client -from core.models.user_token_usage import BotTokenUsageRate, BotTokenUsageStats, UserTokenUsage, UserTokenUsageRate, UserTokenUsageStats -from petercat_utils.db.client.supabase import get_client +from core.models.user_token_usage import ( + BotTokenUsageRate, + BotTokenUsageStats, + UserTokenUsage, + UserTokenUsageRate, + UserTokenUsageStats, +) from core.dao.BaseDAO import BaseDAO +from utils.supabase import get_client class UserTokenUsageDAO(BaseDAO): @@ -19,38 +25,48 @@ def create(self, token_usage: UserTokenUsage): .insert(token_usage.model_dump(exclude=["id"])) .execute() ) - - + def stats(self, user_id: str, start_date: datetime, end_date: datetime): - resp = self.client.rpc("get_user_stats", { - "filter_user_id": user_id, - "start_date": start_date.strftime("%Y-%m-%d"), - "end_date": end_date.strftime("%Y-%m-%d"), - }).execute() + resp = self.client.rpc( + "get_user_stats", + { + "filter_user_id": user_id, + "start_date": start_date.strftime("%Y-%m-%d"), + "end_date": end_date.strftime("%Y-%m-%d"), + }, + ).execute() return [UserTokenUsageStats(**stats) for stats in resp.data] def analyze(self, start_date: datetime, end_date: datetime): - resp = self.client.rpc("analyze_user_token_usage", { - "start_date": start_date.strftime("%Y-%m-%d"), - "end_date": end_date.strftime("%Y-%m-%d"), - }).execute() + resp = self.client.rpc( + "analyze_user_token_usage", + { + "start_date": start_date.strftime("%Y-%m-%d"), + "end_date": end_date.strftime("%Y-%m-%d"), + }, + ).execute() return [BotTokenUsageStats(**stats) for stats in resp.data] - + def top_bots(self, start_date: datetime, end_date: datetime): - resp = self.client.rpc("bot_token_usage_rate", { - "start_date": start_date.strftime("%Y-%m-%d"), - "end_date": end_date.strftime("%Y-%m-%d"), - }).execute() + resp = self.client.rpc( + "bot_token_usage_rate", + { + "start_date": start_date.strftime("%Y-%m-%d"), + "end_date": end_date.strftime("%Y-%m-%d"), + }, + ).execute() return [BotTokenUsageRate(**stats) for stats in resp.data] - + def top_users(self, start_date: datetime, end_date: datetime): - resp = self.client.rpc("user_token_usage_rate", { - "start_date": start_date.strftime("%Y-%m-%d"), - "end_date": end_date.strftime("%Y-%m-%d"), - }).execute() + resp = self.client.rpc( + "user_token_usage_rate", + { + "start_date": start_date.strftime("%Y-%m-%d"), + "end_date": end_date.strftime("%Y-%m-%d"), + }, + ).execute() return [UserTokenUsageRate(**stats) for stats in resp.data] - \ No newline at end of file diff --git a/server/core/service/user_llm_token.py b/server/core/service/user_llm_token.py index 669696f6..b7d50e7d 100644 --- a/server/core/service/user_llm_token.py +++ b/server/core/service/user_llm_token.py @@ -2,7 +2,7 @@ from typing import Optional from pydantic import BaseModel -from petercat_utils import get_env_variable +from utils.env import get_env_variable from core.dao.userLLmTokenDAO import UserLLMTokenDAO from core.models.user_llm_token import UserLLMToken @@ -19,7 +19,7 @@ class CreateUserLLMTokenVO(BaseModel): slug: Optional[str] = None llm: str token: Optional[str] = None - + class UserLLMTokenVO(CreateUserLLMTokenVO): id: str @@ -67,4 +67,4 @@ def delete_llm_token(self, id: str, user_id: str): return self.llm_token_dao.delete(token_model) def get_llm_token_service(): - return UserLLMTokenService() \ No newline at end of file + return UserLLMTokenService() diff --git a/server/core/type_class/data_class.py b/server/core/type_class/data_class.py new file mode 100644 index 00000000..c8cf238d --- /dev/null +++ b/server/core/type_class/data_class.py @@ -0,0 +1,88 @@ +from enum import Enum, auto +from typing import Literal, Optional, List, TypeAlias +from typing import Union + +from pydantic import BaseModel + + +class ImageURL(BaseModel): + url: str + """ + The external URL of the image, must be a supported image types: jpeg, jpg, png, + gif, webp. + """ + + detail: Optional[Literal["auto", "low", "high"]] = None + """Specifies the detail level of the image. + + `low` uses fewer tokens, you can opt in to high resolution using `high`. Default + value is `auto` + """ + + +class ImageURLContentBlock(BaseModel): + image_url: ImageURL + type: Literal["image_url"] + + +class ImageRawURLContentBlock(BaseModel): + image_url: str + type: Literal["image_url"] + + +class TextContentBlock(BaseModel): + text: str + + type: Literal["text"] + """Always `text`.""" + + +MessageContent: TypeAlias = Union[ImageURLContentBlock, TextContentBlock] + + +class Message(BaseModel): + role: str + content: List[MessageContent] = [] + + +class ChatData(BaseModel): + messages: List[Message] = [] + llm: Optional[str] = "openai" + prompt: Optional[str] = None + bot_id: Optional[str] = None + repo_name: Optional[str] = None + + +class ExecuteMessage(BaseModel): + type: str + repo: str + path: str + + +class S3Config(BaseModel): + s3_bucket: str + file_path: Optional[str] = None + + +class AutoNameEnum(Enum): + def _generate_next_value_(name, start, count, last_values): + return name + + +class TaskStatus(AutoNameEnum): + NOT_STARTED = auto() + IN_PROGRESS = auto() + COMPLETED = auto() + ON_HOLD = auto() + CANCELLED = auto() + ERROR = auto() + + +class GitDocTaskNodeType(AutoNameEnum): + TREE = auto() + BLOB = auto() + + +class GitIssueTaskNodeType(AutoNameEnum): + REPO = auto() + ISSUE = auto() diff --git a/server/env.py b/server/env.py index 72f79905..3d6dd31a 100644 --- a/server/env.py +++ b/server/env.py @@ -1,6 +1,7 @@ # list all env variables -from petercat_utils import get_env_variable +from utils.env import get_env_variable + WEB_URL = get_env_variable("WEB_URL") ENVIRONMENT = get_env_variable("PETERCAT_ENV", "development") -API_URL = get_env_variable("API_URL") +API_URL = get_env_variable("API_URL") diff --git a/server/event_handler/discussion.py b/server/event_handler/discussion.py index de7cf418..ad460d0b 100644 --- a/server/event_handler/discussion.py +++ b/server/event_handler/discussion.py @@ -4,7 +4,7 @@ from github import GithubException from agent.bot.get_bot import get_bot_by_id from core.dao.repositoryConfigDAO import RepositoryConfigDAO -from petercat_utils.data_class import ChatData, Message, TextContentBlock +from core.type_class.data_class import ChatData, Message, TextContentBlock from agent.prompts.issue_helper import ( generate_issue_comment_prompt, generate_issue_prompt, diff --git a/server/event_handler/issue.py b/server/event_handler/issue.py index 8bc1baee..1685b5c5 100644 --- a/server/event_handler/issue.py +++ b/server/event_handler/issue.py @@ -9,7 +9,7 @@ generate_issue_prompt, ) from core.dao.repositoryConfigDAO import RepositoryConfigDAO -from petercat_utils.data_class import ChatData, Message, TextContentBlock +from core.type_class.data_class import ChatData, Message, TextContentBlock from agent.qa_chat import agent_chat from utils.fuzzy_match import contains_keyword_fuzzy diff --git a/server/event_handler/pull_request.py b/server/event_handler/pull_request.py index d98fc508..60265ab4 100644 --- a/server/event_handler/pull_request.py +++ b/server/event_handler/pull_request.py @@ -20,7 +20,7 @@ ) from agent.qa_chat import agent_chat from core.dao.repositoryConfigDAO import RepositoryConfigDAO -from petercat_utils.data_class import ChatData, Message, TextContentBlock +from core.type_class.data_class import ChatData, Message, TextContentBlock def file_match(filename: str, patterns: List[str]): diff --git a/server/github_app/handlers.py b/server/github_app/handlers.py index 0581221f..15a18309 100644 --- a/server/github_app/handlers.py +++ b/server/github_app/handlers.py @@ -1,7 +1,7 @@ from typing import Union from event_handler.intsall import InstallationEventHandler, InstallationEditEventHandler -from petercat_utils import get_env_variable +from utils.env import get_env_variable from github import Auth from event_handler.pull_request import ( diff --git a/server/github_app/router.py b/server/github_app/router.py index a05fb1d7..cc66fde5 100644 --- a/server/github_app/router.py +++ b/server/github_app/router.py @@ -23,7 +23,7 @@ from github_app.utils import ( get_private_key, ) -from petercat_utils import get_env_variable +from utils.env import get_env_variable REGION_NAME = get_env_variable("AWS_REGION") X_GITHUB_SECRET_NAME = get_env_variable("X_GITHUB_SECRET_NAME") diff --git a/server/github_app/utils.py b/server/github_app/utils.py index abc51867..e386b994 100644 --- a/server/github_app/utils.py +++ b/server/github_app/utils.py @@ -6,7 +6,7 @@ from cryptography.hazmat.primitives import serialization from cryptography.hazmat.backends import default_backend -from petercat_utils.utils.env import get_env_variable +from utils.env import get_env_variable from utils.private_key import get_private_key APP_ID = get_env_variable("X_GITHUB_APP_ID") diff --git a/server/main.py b/server/main.py index d7d8a35b..5820251a 100644 --- a/server/main.py +++ b/server/main.py @@ -16,9 +16,8 @@ from env import ENVIRONMENT, API_URL, WEB_URL from github_app import router as github_app_router from i18n.translations import I18nConfig, I18nMiddleware -from petercat_utils import get_env_variable +from utils.env import get_env_variable from rag import router as rag_router -from task import router as task_router from user import router as user_router from insight import router as insight_router @@ -64,7 +63,6 @@ app.include_router(bot_router.router) app.include_router(auth_router.router) app.include_router(chat_router.router) -app.include_router(task_router.router) app.include_router(github_app_router.router) app.include_router(aws_router.router) app.include_router(user_router.router) diff --git a/server/rag/router.py b/server/rag/router.py index d15c2c2d..f282f1bd 100644 --- a/server/rag/router.py +++ b/server/rag/router.py @@ -1,118 +1,126 @@ import json -from typing import Optional - -from fastapi import APIRouter, Depends -from petercat_utils.db.client.supabase import get_client - -from petercat_utils.data_class import ( - RAGGitDocConfig, - RAGGitIssueConfig, - TaskType, -) -from petercat_utils.rag_helper import ( - retrieval, - task, - issue_retrieval, - git_doc_task, - git_issue_task, +from typing import Annotated, List + +from fastapi import APIRouter, Depends, HTTPException, status +from openai import BaseModel +from auth.get_user_info import get_user +from core.models.user import User +from utils.env import get_env_variable +from whiskerrag_client import APIClient +from whiskerrag_types.model import ( + PageParams, + Knowledge, + Task, + Chunk, + KnowledgeCreate, + KnowledgeTypeEnum, + KnowledgeSourceEnum, + GithubRepoSourceConfig, + KnowledgeSplitConfig, ) - from auth.rate_limit import verify_rate_limit - router = APIRouter( - prefix="/api", + prefix="/api/rag", tags=["rag"], responses={404: {"description": "Not found"}}, ) -@router.post("/rag/add_knowledge_by_doc", dependencies=[Depends(verify_rate_limit)]) -def add_knowledge_by_doc(config: RAGGitDocConfig): - try: - result = retrieval.add_knowledge_by_doc(config) - if result: - return json.dumps( - { - "success": True, - "message": "Knowledge added successfully!", - } - ) - else: - return json.dumps({"success": False, "message": "Knowledge not added!"}) - except Exception as e: - return json.dumps({"success": False, "message": str(e)}) +class ReloadRepoRequest(BaseModel): + repo_name: str -@router.post("/rag/add_knowledge_by_issue", dependencies=[Depends(verify_rate_limit)]) -def add_knowledge_by_issue(config: RAGGitIssueConfig): - try: - result = issue_retrieval.add_knowledge_by_issue(config) - if result: - return json.dumps( - { - "success": True, - "message": "Issue added successfully!", - } - ) - else: - return json.dumps({"success": False, "message": "Issue not added!"}) - except Exception as e: - return json.dumps({"success": False, "message": str(e)}) +class RestartTaskRequest(BaseModel): + task_id_list: List[str] -@router.post("/rag/search_knowledge", dependencies=[Depends(verify_rate_limit)]) -def search_knowledge(query: str, repo_name: str, filter: dict = {}): - data = retrieval.search_knowledge(query, repo_name, filter) - return data - - -@router.post("/rag/add_git_doc_task", dependencies=[Depends(verify_rate_limit)]) -def add_git_doc_task(config: RAGGitDocConfig): +@router.post("/knowledge/repo/reload", dependencies=[Depends(verify_rate_limit)]) +async def reload_repo( + request: ReloadRepoRequest, + user: Annotated[User | None, Depends(get_user)] = None, +): + if user is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail="Github Login needed" + ) try: - data = git_doc_task.add_rag_git_doc_task(config) - return data + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + ) + res = await api_client.knowledge.add_knowledge( + [ + KnowledgeCreate( + source_type=KnowledgeSourceEnum.GITHUB_REPO, + knowledge_type=KnowledgeTypeEnum.FOLDER, + space_id=request.repo_name, + knowledge_name=request.repo_name, + source_config=GithubRepoSourceConfig( + repo_name=request.repo_name, auth_token=user.access_token + ), + split_config=KnowledgeSplitConfig( + chunk_size=500, + chunk_overlap=100, + ), + ) + ] + ) + return res except Exception as e: - return json.dumps({"success": False, "message": str(e)}) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e), + ) -@router.post("/rag/add_git_issue_task", dependencies=[Depends(verify_rate_limit)]) -def add_git_issue_task(config: RAGGitIssueConfig): +@router.post("/knowledge/list", dependencies=[Depends(verify_rate_limit)]) +async def get_knowledge_list(params: PageParams[Knowledge]): try: - data = git_issue_task.add_rag_git_issue_task(config) - return data + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + ) + res = await api_client.knowledge.get_knowledge_list(**params.model_dump()) + return res except Exception as e: return json.dumps({"success": False, "message": str(e)}) -@router.post("/rag/trigger_task", dependencies=[Depends(verify_rate_limit)]) -def trigger_task(task_type: TaskType, task_id: Optional[str] = None): +@router.post("/chunk/list", dependencies=[Depends(verify_rate_limit)]) +async def get_chunk_list(params: PageParams[Chunk]): try: - task.trigger_task(task_type, task_id) + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + ) + res = await api_client.chunk.get_chunk_list(**params.model_dump()) + return res except Exception as e: return json.dumps({"success": False, "message": str(e)}) -@router.get("/rag/chunk/list", dependencies=[Depends(verify_rate_limit)]) -def get_chunk_list(repo_name: str = None, page_size: int = 10, page_number: int = 1): +@router.post("/task/list", dependencies=[Depends(verify_rate_limit)]) +async def get_rag_task(params: PageParams[Task]): try: - return retrieval.get_chunk_list(repo_name, page_size, page_number) + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), + ) + res = await api_client.task.get_task_list(**params.model_dump()) + return res except Exception as e: return json.dumps({"success": False, "message": str(e)}) -@router.get("/rag/task/latest", dependencies=[Depends(verify_rate_limit)]) -def get_rag_task(repo_name: str): - # TODO: Think about hot to get correct when reload knowledge task was triggered +@router.post("/task/restart", dependencies=[Depends(verify_rate_limit)]) +async def restart_rag_task(params: RestartTaskRequest): try: - supabase = get_client() - response = ( - supabase.table("rag_tasks") - .select("id,status,node_type,path,from_task_id,created_at", count="exact") - .eq("repo_name", repo_name) - .order("created_at", desc=True) - .execute() + api_client = APIClient( + base_url=get_env_variable("WHISKER_API_URL"), + token=get_env_variable("WHISKER_API_KEY"), ) - return response + res = await api_client.task.restart_task(params.task_id_list) + return res except Exception as e: return json.dumps({"success": False, "message": str(e)}) diff --git a/server/requirements-dev.txt b/server/requirements-dev.txt new file mode 100644 index 00000000..382ac39a --- /dev/null +++ b/server/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest-cov +pytest +pytest-asyncio +twine diff --git a/server/requirements.txt b/server/requirements.txt index f268e450..fd69ebe3 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,4 +1,5 @@ fastapi==0.100.1 +starlette==0.27.0 uvicorn[standard]==0.23.2 python-dotenv==1.0.0 openai @@ -10,11 +11,9 @@ langchain_google_genai PyGithub==2.3.0 GitPython>=3.1.43 python-multipart -httpx[socks] supabase authlib==0.14.3 boto3>=1.34.84 -pytest-cov PyJWT pydantic>=2.7.0 unstructured>=0.15.9 @@ -24,8 +23,7 @@ jose>=1.0.0 itsdangerous==2.2.0 fastapi_auth0==0.5.0 requests -pytest -httpx +httpx==0.27.2 urllib3>=2.2.2 -petercat_utils>=0.1.42 toolz +whiskerrag>=0.0.15 \ No newline at end of file diff --git a/server/setup_python.sh b/server/setup_python.sh index a5ed6127..4777038b 100755 --- a/server/setup_python.sh +++ b/server/setup_python.sh @@ -1,4 +1,5 @@ -python3 -m venv venv +python3.12 -m venv venv source venv/bin/activate python3 -m pip install --upgrade pip pip3 install --no-cache-dir -r requirements.txt +pip3 install --no-cache-dir -r requirements-dev.txt diff --git a/server/task/router.py b/server/task/router.py deleted file mode 100644 index 6493c2fb..00000000 --- a/server/task/router.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Optional -from fastapi import APIRouter - -from petercat_utils import get_client -from petercat_utils.rag_helper.task import TaskStatus - -TABLE_NAME = "rag_tasks" - -router = APIRouter( - prefix="/api", - tags=["task"], - responses={404: {"description": "Not found"}}, -) - -@router.get("/tasks") -def get_tasks(page_size = '10', page_number = '1', status: Optional[TaskStatus] = None): - start = (int(page_number) - 1) * int(page_size) - end = int(page_number) * int(page_size) - supabase = get_client() - - sql = supabase.table(TABLE_NAME)\ - .select('*', count='exact')\ - .range(start=start, end=end) \ - - response = sql.execute() if status is None else sql.eq("status", status).execute() - - return { - "success": True, - "data": response.data, - "pagination": { - "count": response.count, - "page_size": page_size, - "page_number": page_number, - } - } - -@router.get("/tasks/{task_id}") -def get_task(task_id: str): - supabase = get_client() - - response = supabase.table(TABLE_NAME) \ - .select('*') \ - .eq("id", task_id) \ - .execute() - - return response.data[0] if (len(response.data) > 0) else None diff --git a/server/tests/agent/tools/test_knowledge.py b/server/tests/agent/tools/test_knowledge.py new file mode 100644 index 00000000..6d0042fe --- /dev/null +++ b/server/tests/agent/tools/test_knowledge.py @@ -0,0 +1,78 @@ +import json +from httpx import ReadTimeout +import pytest +from unittest.mock import patch, AsyncMock +from server.agent.tools.knowledge import factory + + +@pytest.mark.asyncio +async def test_search_knowledge_success(): + bot_id = "test_bot_id" + query = "What are the new features of Ant Design?" + expected_response = json.dumps(["Feature 1", "Feature 2"], ensure_ascii=False) + + search_knowledge = factory(bot_id) + + with patch( + "server.agent.tools.knowledge.get_bot_by_id" + ) as mock_get_bot_by_id, patch( + "server.agent.tools.knowledge.APIClient" + ) as mock_APIClient: + + mock_get_bot_by_id.return_value.repo_name = "test_repo" + mock_api_client_instance = mock_APIClient.return_value + mock_api_client_instance.retrieval.retrieve_space_content = AsyncMock( + return_value=[ + type("Chunk", (object,), {"context": "Feature 1"}), + type("Chunk", (object,), {"context": "Feature 2"}), + ] + ) + + response = await search_knowledge.arun(query) + assert response == expected_response + + +@pytest.mark.asyncio +async def test_search_knowledge_timeout(): + bot_id = "test_bot_id" + query = "What are the new features of Ant Design?" + + search_knowledge = factory(bot_id) + + with patch( + "server.agent.tools.knowledge.get_bot_by_id" + ) as mock_get_bot_by_id, patch( + "server.agent.tools.knowledge.APIClient" + ) as mock_APIClient: + + mock_get_bot_by_id.return_value.repo_name = "test_repo" + mock_api_client_instance = mock_APIClient.return_value + mock_api_client_instance.retrieval.retrieve_space_content = AsyncMock( + side_effect=ReadTimeout + ) + + response = await search_knowledge.arun(query) + assert response is None + + +@pytest.mark.asyncio +async def test_search_knowledge_exception(): + bot_id = "test_bot_id" + query = "What are the new features of Ant Design?" + + search_knowledge = factory(bot_id) + + with patch( + "server.agent.tools.knowledge.get_bot_by_id" + ) as mock_get_bot_by_id, patch( + "server.agent.tools.knowledge.APIClient" + ) as mock_APIClient: + + mock_get_bot_by_id.return_value.repo_name = "test_repo" + mock_api_client_instance = mock_APIClient.return_value + mock_api_client_instance.retrieval.retrieve_space_content = AsyncMock( + side_effect=Exception("Test Exception") + ) + + response = await search_knowledge.arun(query) + assert response is None diff --git a/server/tests/mock_session.py b/server/tests/mock_session.py index bf87b57f..8b6a653f 100644 --- a/server/tests/mock_session.py +++ b/server/tests/mock_session.py @@ -2,21 +2,35 @@ from base64 import b64encode from itsdangerous import TimestampSigner from core.models.user import User -from petercat_utils import get_env_variable +from utils.env import get_env_variable session_secret_key = get_env_variable("FASTAPI_SECRET_KEY") + def create_session_cookie(data) -> str: signer = TimestampSigner(str(session_secret_key)) return signer.sign( - b64encode(json.dumps(data).encode('utf-8')), - ).decode('utf-8') + b64encode(json.dumps(data).encode("utf-8")), + ).decode("utf-8") + + +mock_user = User( + id="1", + sub="1", + sid="1", + avatar="1", + picture="1", + nickname="1", + access_token="1", + anonymous=False, + agreement_accepted=False, +) -mock_user = User(id="1", sub="1", sid="1", avatar="1", picture="1", nickname="1", access_token="1", anonymous=False, agreement_accepted=False) def get_mock_user(): return mock_user + def mock_session(): - return {'session': create_session_cookie({"user": dict(mock_user) }) } + return {"session": create_session_cookie({"user": dict(mock_user)})} diff --git a/server/tests/test_main.py b/server/tests/test_main.py index f4ce6371..2c0ff26b 100644 --- a/server/tests/test_main.py +++ b/server/tests/test_main.py @@ -1,7 +1,7 @@ from fastapi.testclient import TestClient -from env import ENVIRONMENT, WEB_URL, API_URL -from main import app +from server.env import ENVIRONMENT, WEB_URL, API_URL +from server.main import app client = TestClient(app) diff --git a/petercat_utils/utils/env.py b/server/utils/env.py similarity index 100% rename from petercat_utils/utils/env.py rename to server/utils/env.py diff --git a/server/utils/private_key/__init__.py b/server/utils/private_key/__init__.py index e2adf9c2..82172b40 100644 --- a/server/utils/private_key/__init__.py +++ b/server/utils/private_key/__init__.py @@ -1,4 +1,4 @@ -from petercat_utils import get_env_variable +from utils.env import get_env_variable from utils.private_key.local import LocalPrivateKeyProvider from utils.private_key.s3 import S3PrivateKeyProvider diff --git a/server/utils/private_key/s3.py b/server/utils/private_key/s3.py index 1180f7e2..cc533c67 100644 --- a/server/utils/private_key/s3.py +++ b/server/utils/private_key/s3.py @@ -1,6 +1,6 @@ import boto3 from botocore.exceptions import ClientError -from petercat_utils import get_env_variable +from utils.env import get_env_variable from utils.private_key.base import BasePrivateKeyProvider REGION_NAME = get_env_variable("AWS_REGION") diff --git a/petercat_utils/db/client/supabase.py b/server/utils/supabase.py similarity index 86% rename from petercat_utils/db/client/supabase.py rename to server/utils/supabase.py index cc300c9b..f8ff378e 100644 --- a/petercat_utils/db/client/supabase.py +++ b/server/utils/supabase.py @@ -1,9 +1,11 @@ from supabase.client import Client, create_client -from ...utils.env import get_env_variable + +from utils.env import get_env_variable supabase_url = get_env_variable("SUPABASE_URL") supabase_key = get_env_variable("SUPABASE_SERVICE_KEY") + def get_client(): supabase: Client = create_client(supabase_url, supabase_key) return supabase diff --git a/subscriber/handler.py b/subscriber/handler.py deleted file mode 100644 index b4132d25..00000000 --- a/subscriber/handler.py +++ /dev/null @@ -1,40 +0,0 @@ -import json - -from petercat_utils import task as task_helper -from petercat_utils.data_class import TaskType - -MAX_RETRY_COUNT = 5 - - -def lambda_handler(event, context): - if event: - batch_item_failures = [] - sqs_batch_response = {} - - for record in event["Records"]: - body = record["body"] - print(f"receive message here: {body}") - - message_dict = json.loads(body) - task_id = message_dict["task_id"] - task_type = message_dict["task_type"] - retry_count = message_dict["retry_count"] - task = task_helper.get_task(TaskType(task_type), task_id) - try: - if task is None: - return task - task.handle() - # process message - print( - f"message content: message={message_dict}, task_id={task_id}, task={task}, retry_count={retry_count}" - ) - except Exception as e: - if retry_count < MAX_RETRY_COUNT: - retry_count += 1 - task_helper.trigger_task(task_type, task_id, retry_count) - else: - print(f"message handle error: ${e}") - batch_item_failures.append({"itemIdentifier": record["messageId"]}) - - sqs_batch_response["batchItemFailures"] = batch_item_failures - return sqs_batch_response diff --git a/subscriber/requirements.txt b/subscriber/requirements.txt deleted file mode 100644 index 3ff51c19..00000000 --- a/subscriber/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -petercat_utils>=0.1.40 diff --git a/template.yml b/template.yml index cd02f4df..23d75310 100644 --- a/template.yml +++ b/template.yml @@ -76,14 +76,6 @@ Parameters: Type: String Description: Tavily API Key Default: 1 - SQSQueueName: - Type: String - Description: Name of the SQS queue - Default: 1 - SQSQueueUrl: - Type: String - Description: URL of the SQS queue - Default: 1 Auth0Domain: Type: String Description: Auth0 Domain @@ -120,6 +112,14 @@ Parameters: Type: String Description: Name of the temporary S3 bucket Default: 1 + WhiskerApiUrl: + Type: String + Description: API URL of ths Whisker RAG service + Default: 1 + WhiskerApiKey: + Type: String + Description: API KEY of the Whisker RAG service + Default: 1 Resources: FastAPIFunction: @@ -152,10 +152,11 @@ Resources: SUPABASE_URL: !Ref SupabaseUrl GITHUB_TOKEN: !Ref GitHubToken TAVILY_API_KEY: !Ref TavilyAPIKey - SQS_QUEUE_URL: !Ref SQSQueueUrl AUTH0_DOMAIN: !Ref Auth0Domain AUTH0_CLIENT_ID: !Ref Auth0ClientId AUTH0_CLIENT_SECRET: !Ref Auth0ClientSecret + WHISKER_API_URL: !Ref WhiskerApiUrl + WHISKER_API_KEY: !Ref WhiskerApiKey FunctionUrlConfig: AuthType: NONE InvokeMode: RESPONSE_STREAM @@ -180,56 +181,6 @@ Resources: Dockerfile: ../docker/Dockerfile.aws.lambda DockerTag: v1 - SQSSubscriptionFunction: - Type: AWS::Serverless::Function - Properties: - PackageType: Image - MemorySize: 512 - FunctionUrlConfig: - AuthType: NONE - Environment: - Variables: - X_GITHUB_APP_ID: !Ref GitHubAppID - X_GITHUB_APPS_CLIENT_ID: !Ref GithubAppsClientId - X_GITHUB_APPS_CLIENT_SECRET: !Ref GithubAppsClientSecret - API_IDENTIFIER: !Ref APIIdentifier - FASTAPI_SECRET_KEY: !Ref FastAPISecretKey - OPENAI_API_KEY: !Ref OpenAIAPIKey - GEMINI_API_KEY: !Ref GeminiAPIKey - DEEPSEEK_API_KEY: !Ref DeepSeekAPIKey - SUPABASE_SERVICE_KEY: !Ref SupabaseServiceKey - SUPABASE_URL: !Ref SupabaseUrl - GITHUB_TOKEN: !Ref GitHubToken - TAVILY_API_KEY: !Ref TavilyAPIKey - SQS_QUEUE_URL: !Ref SQSQueueUrl - Policies: - - Statement: - - Sid: BedrockInvokePolicy - Effect: Allow - Action: - - bedrock:InvokeModelWithResponseStream - Resource: '*' - - Sid: SQSInvokePolicy - Effect: Allow - Action: - - sqs:* - Resource: '*' - - Sid: AllObjectActions - Effect: Allow - Action: - - s3:PutObject - - s3:GetObject - - s3:DeleteObject - Resource: - - !Sub 'arn:aws:s3:::${S3TempBucketName}/*' - - SQSPollerPolicy: - QueueName: - !Ref SQSQueueName - Tracing: Active - Metadata: - Dockerfile: ../docker/Dockerfile.subscriber - DockerContext: subscriber - DockerTag: v1 Outputs: FastAPIFunctionUrl: @@ -237,11 +188,4 @@ Outputs: Value: !GetAtt FastAPIFunctionUrl.FunctionUrl FastAPIFunction: Description: "FastAPI Lambda Function ARN" - Value: !GetAtt FastAPIFunction.Arn - - SQSSubscriptionFunctionUrl: - Description: "Function URL for SQS Subscriptio function" - Value: !GetAtt SQSSubscriptionFunctionUrl.FunctionUrl - SQSSubscriptionFunction: - Description: "SQS Subscription Function Lambda Function ARN" - Value: !GetAtt SQSSubscriptionFunction.Arn \ No newline at end of file + Value: !GetAtt FastAPIFunction.Arn \ No newline at end of file