ScrapeGraphAI
diff --git a/‎.github/workflows/release.yml
Lines changed: 79 additions & 0 deletions b/‎.github/workflows/release.yml
Lines changed: 79 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎.releaserc.yml
Lines changed: 56 additions & 0 deletions b/‎.releaserc.yml
Lines changed: 56 additions & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 12 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 12 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 2 additions & 2 deletions b/‎CONTRIBUTING.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 4 additions & 4 deletions b/‎README.md
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/source/index.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/index.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/benchmarks/GenerateScraper/.env.example
Lines changed: 1 addition & 1 deletion b/‎examples/benchmarks/GenerateScraper/.env.example
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/benchmarks/GenerateScraper/Readme.md
Lines changed: 11 additions & 9 deletions b/‎examples/benchmarks/GenerateScraper/Readme.md
Lines changed: 11 additions & 9 deletions
diff --git a/‎examples/benchmarks/GenerateScraper/benchmark_openai_gpt35.py
Lines changed: 1 addition & 1 deletion b/‎examples/benchmarks/GenerateScraper/benchmark_openai_gpt35.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/benchmarks/GenerateScraper/benchmark_openai_gpt4.py
Lines changed: 2 additions & 2 deletions b/‎examples/benchmarks/GenerateScraper/benchmark_openai_gpt4.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/benchmarks/SmartScraper/.env.example
Lines changed: 1 addition & 1 deletion b/‎examples/benchmarks/SmartScraper/.env.example
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/benchmarks/SmartScraper/Readme.md
Lines changed: 11 additions & 9 deletions b/‎examples/benchmarks/SmartScraper/Readme.md
Lines changed: 11 additions & 9 deletions
diff --git a/‎examples/benchmarks/SmartScraper/benchmark_docker.py
Lines changed: 0 additions & 1 deletion b/‎examples/benchmarks/SmartScraper/benchmark_docker.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/benchmarks/SmartScraper/benchmark_openai_gpt35.py
Lines changed: 1 addition & 1 deletion b/‎examples/benchmarks/SmartScraper/benchmark_openai_gpt35.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/benchmarks/SmartScraper/benchmark_openai_gpt4.py
Lines changed: 2 additions & 2 deletions b/‎examples/benchmarks/SmartScraper/benchmark_openai_gpt4.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/gemini/smart_scraper_gemini.py
Lines changed: 8 additions & 0 deletions b/‎examples/gemini/smart_scraper_gemini.py
Lines changed: 8 additions & 0 deletions
@@ -0,0 +1,79 @@
+name: Release
+on:
+  push:
+    branches:
+      - main
+      - pre/*
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Install git
+        run: |
+          sudo apt update
+          sudo apt install -y git
+      - name: Install Python Env and Poetry
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+      - run: pip install poetry
+      - name: Install Node Env
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20
+      - name: Checkout
+        uses: actions/checkout@v4.1.1
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+      - name: Build app
+        run: |
+          poetry install
+          poetry build
+        id: build_cache
+        if: success()
+      - name: Cache build
+        uses: actions/cache@v2
+        with:
+          path: ./dist
+          key: ${{ runner.os }}-build-${{ hashFiles('dist/**') }}
+        if: steps.build_cache.outputs.id != ''
+
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    needs: build
+    environment: development
+    if: | 
+        github.event_name == 'push' && github.ref == 'refs/heads/main' || 
+        github.event_name == 'push' && github.ref == 'refs/heads/pre/beta' || 
+        github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'main' || 
+        github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'pre/beta'
+    permissions:
+      contents: write
+      issues: write
+      pull-requests: write
+      id-token: write
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4.1.1
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+      - name: Semantic Release
+        uses: cycjimmy/semantic-release-action@v4.1.0
+        with:
+          semantic_version: 23
+          extra_plugins: |
+            semantic-release-pypi@3
+            @semantic-release/git 
+            @semantic-release/commit-analyzer@12 
+            @semantic-release/release-notes-generator@13 
+            @semantic-release/github@10 
+            @semantic-release/changelog@6
+            conventional-changelog-conventionalcommits@7
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -35,3 +35,4 @@ poetry.lock
 
 # lock files
 *.lock
+poetry.lock
@@ -0,0 +1,56 @@
+plugins:
+  - - "@semantic-release/commit-analyzer"
+    - preset: conventionalcommits
+  - - "@semantic-release/release-notes-generator"
+    - writerOpts:
+        commitsSort:
+        - subject
+        - scope
+      preset: conventionalcommits
+      presetConfig:
+        types:
+        - type: feat
+          section: Features
+        - type: fix
+          section: Bug Fixes
+        - type: chore
+          section: chore
+        - type: docs
+          section: Docs
+        - type: style
+          hidden: true
+        - type: refactor
+          section: Refactor
+        - type: perf
+          section: Perf
+        - type: test
+          section: Test
+        - type: build
+          section: Build
+        - type: ci
+          section: CI
+  - "@semantic-release/changelog"
+  - "semantic-release-pypi"
+  - "@semantic-release/github"
+  - - "@semantic-release/git"
+    - assets:
+        - CHANGELOG.md
+        - pyproject.toml
+      message: |-
+        ci(release): ${nextRelease.version} [skip ci]
+
+        ${nextRelease.notes}
+branches:
+  #child branches coming from tagged version for bugfix (1.1.x) or new features (1.x)
+  #maintenance branch
+  - name: "+([0-9])?(.{+([0-9]),x}).x"
+    channel: "stable"
+  #release a production version when merging towards main
+  - name: "main"
+    channel: "stable"
+  #prerelease branch
+  - name: "pre/beta"
+    channel: "dev"
+    prerelease: "beta"
+debug: true
+
@@ -0,0 +1,12 @@
+## [0.3.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.2.8...v0.3.0-beta.1) (2024-04-26)
+
+
+### Features
+
+* trigger new beta release ([6f028c4](https://github.com/VinciGit00/Scrapegraph-ai/commit/6f028c499342655851044f54de2a8cc1b9b95697))
+
+
+### CI
+
+* add ci workflow to manage lib release with semantic-release ([92cd040](https://github.com/VinciGit00/Scrapegraph-ai/commit/92cd040dad8ba91a22515f3845f8dbb5f6a6939c))
+* remove pull request trigger and fix plugin release train ([876fe66](https://github.com/VinciGit00/Scrapegraph-ai/commit/876fe668d97adef3863446836b10a3c00a2eb82d))
@@ -26,7 +26,7 @@ To get started with contributing, follow these steps:
 
 ## Contributing Guidelines
 
-Please adhere to the following guidelines when contributing to AmazScraper:
+Please adhere to the following guidelines when contributing to ScrapeGraphAI:
 
 - Follow the code style and formatting guidelines specified in the [Code Style](#code-style) section.
 - Make sure your changes are well-documented and include any necessary updates to the project's documentation.
@@ -61,7 +61,7 @@ If you encounter any issues or have suggestions for improvements, please open an
 
 ## License
 
-AmazScraper is licensed under the **Apache License 2.0**. See the [LICENSE](LICENSE) file for more information.
+ScrapeGraphAI is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for more information.
 By contributing to this project, you agree to license your contributions under the same license.
 
 Can't wait to see your contributions! :smile:
@@ -3,6 +3,7 @@
 [![Downloads](https://static.pepy.tech/badge/scrapegraphai)](https://pepy.tech/project/scrapegraphai)
 [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
 [![Pylint](https://github.com/VinciGit00/Scrapegraph-ai/actions/workflows/pylint.yml/badge.svg)](https://github.com/VinciGit00/Scrapegraph-ai/actions/workflows/pylint.yml)
+[![CodeQL](https://github.com/VinciGit00/Scrapegraph-ai/actions/workflows/codeql.yml/badge.svg)](https://github.com/VinciGit00/Scrapegraph-ai/actions/workflows/codeql.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![](https://dcbadge.vercel.app/api/server/gkxQDAjfeX)](https://discord.gg/gkxQDAjfeX)
 
@@ -53,12 +54,11 @@ graph_config = {
         "model": "ollama/mistral",
         "temperature": 0,
         "format": "json",  # Ollama needs the format to be specified explicitly
-        "base_url": "http://localhost:11434",  # set Ollama URL arbitrarily
+        "base_url": "http://localhost:11434",  # set Ollama URL
     },
     "embeddings": {
         "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",  # set Ollama URL arbitrarily
+        "base_url": "http://localhost:11434",  # set Ollama URL
     }
 }
 
@@ -79,7 +79,7 @@ print(result)
 Note: before using the local model remember to create the docker container!
 ```text
     docker-compose up -d
-    docker exec -it ollama ollama run stablelm-zephyr
+    docker exec -it ollama ollama pull stablelm-zephyr
 ```
 You can use which models avaiable on Ollama or your own model instead of stablelm-zephyr
 ```python
 
@@ -21,7 +21,7 @@ The following sections will guide you through the installation process and the u
    :caption: Getting Started
 
    getting_started/installation
-   getting_started/examples 
+   getting_started/examples
    modules/modules
 
 Indices and tables
 
@@ -1 +1 @@
-OPENAI_APIKEY="your openai api key"
+OPENAI_APIKEY="your openai key here"
@@ -9,12 +9,14 @@ The time is measured in seconds
 
 The model runned for this benchmark is Mistral on Ollama with nomic-embed-text
 
+In particular, is tested with ScriptCreatorGraph
+
 | Hardware               | Model                                   | Example 1 | Example 2 |
 | ---------------------- | --------------------------------------- | --------- | --------- |
 | Macbook 14' m1 pro     | Mistral on Ollama with nomic-embed-text | 30.54s    | 35.76s    |
-| Macbook m2 max         | Mistral on Ollama with nomic-embed-text |           |           |
-| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text  | 27.82s    | 29.986s   |
-| Macbook m2 max<br>     | Llama3 on Ollama with nomic-embed-text  |           |           |
+| Macbook m2 max         | Mistral on Ollama with nomic-embed-text | 18,46s    | 19.59     |
+| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text  | 27.82s    | 29.98s    |
+| Macbook m2 max<br>     | Llama3 on Ollama with nomic-embed-text  | 20.83s    | 12.29s    |
 
 
 **Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama). 
@@ -23,17 +25,17 @@ The model runned for this benchmark is Mistral on Ollama with nomic-embed-text
 **URL**: https://perinim.github.io/projects
 **Task**: List me all the projects with their description.
 
-| Name                | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
-| ------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
-| gpt-3.5-turbo       | 24.215268                | 1892         | 1802          | 90                | 1                   | 0.002883       |
-| gpt-4-turbo-preview | 6.614                    | 1936         | 1802          | 134               | 1                   | 0.02204        |
+| Name                | Execution time | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
+| ------------------- | ---------------| ------------ | ------------- | ----------------- | ------------------- | -------------- |
+| gpt-3.5-turbo       | 4.50s          | 1897         | 1802          | 95                | 1                   | 0.002893       |
+| gpt-4-turbo         | 7.88s          | 1920         | 1802          | 118               | 1                   | 0.02156        |
 
 ### Example 2: Wired
 **URL**: https://www.wired.com
 **Task**: List me all the articles with their description.
 
 | Name                | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
 | ------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
-| gpt-3.5-turbo       |                          |              |               |                   |                     |                |
-| gpt-4-turbo-preview |                          |              |               |                   |                     |                |
+| gpt-3.5-turbo       |   Error (text too long)  |      -       |      -        |         -         |           -         |        -       |
+| gpt-4-turbo         |   Error (TPM limit reach)|      -       |      -        |         -         |           -         |        -       |
 
@@ -19,7 +19,7 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("GPT35_KEY")
+openai_key = os.getenv("OPENAI_APIKEY")
 
 graph_config = {
     "llm": {
 
@@ -19,12 +19,12 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("GPT4_KEY")
+openai_key = os.getenv("OPENAI_APIKEY")
 
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-4-turbo-preview",
+        "model": "gpt-4-turbo-2024-04-09",
     },
     "library": "beautifoulsoup"
 }
 
@@ -1 +1 @@
-OPENAI_APIKEY="your openai api key"
+OPENAI_APIKEY="your openai key here"
@@ -5,35 +5,37 @@ The two websites benchmark are:
 
 Both are strored locally as txt file in .txt format  because in this way we do not have to think about the internet connection
 
+In particular, is tested with SmartScraper
+
 | Hardware           | Moodel                                  | Example 1 | Example 2 |
 | ------------------ | --------------------------------------- | --------- | --------- |
 | Macbook 14' m1 pro | Mistral on Ollama with nomic-embed-text | 11.60s    | 26.61s    |
 | Macbook m2 max     | Mistral on Ollama with nomic-embed-text | 8.05s     | 12.17s    |
-| Macbook 14' m1 pro | Llama3 on Ollama with nomic-embed-text  | 29.871    | 35.32     |
-| Macbook m2 max     | Llama3 on Ollama with nomic-embed-text  |           |           |
+| Macbook 14' m1 pro | Llama3 on Ollama with nomic-embed-text  | 29.871s   | 35.32s    |
+| Macbook m2 max     | Llama3 on Ollama with nomic-embed-text  | 18.36s    | 78.32s    |
 
 
 **Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama). Indeed the results are the following:
 
 | Hardware           | Example 1 | Example 2 |
 | ------------------ | --------- | --------- |
-| Macbook 14' m1 pro | 139.89    | Too long  |
+| Macbook 14' m1 pro | 139.89s   | Too long  |
 # Performance on APIs services
 ### Example 1: personal portfolio 
 **URL**: https://perinim.github.io/projects
 **Task**: List me all the projects with their description.
 
-| Name                | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
-| ------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
-| gpt-3.5-turbo       | 25.22                    | 445          | 272           | 173               | 1                   | 0.000754       |
-| gpt-4-turbo-preview | 9.53                     | 449          | 272           | 177               | 1                   | 0.00803        |
+| Name                | Execution time | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
+| ------------------- | ---------------| ------------ | ------------- | ----------------- | ------------------- | -------------- |
+| gpt-3.5-turbo       | 5.58s          | 445          | 272           | 173               | 1                   | 0.000754       |
+| gpt-4-turbo         | 9.76s          | 445          | 272           | 173               | 1                   | 0.00791        |
 
 ### Example 2: Wired
 **URL**: https://www.wired.com
 **Task**: List me all the articles with their description.
 
 | Name                | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
 | ------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
-| gpt-3.5-turbo       | 25.89                    | 445          | 272           | 173               | 1                   | 0.000754       |
-| gpt-4-turbo-preview | 64.70                    | 3573         | 2199          | 1374              | 1                   | 0.06321        |
+| gpt-3.5-turbo       | 6.50                     | 2442         | 2199          | 243               | 1                   | 0.003784       |
+| gpt-4-turbo         | 76.07                    | 3521         | 2199          | 1322              | 1                   | 0.06165        |
 
@@ -2,7 +2,6 @@
 Basic example of scraping pipeline using SmartScraper from text
 """
 
-import os
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
 
 
@@ -19,7 +19,7 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("GPT35_KEY")
+openai_key = os.getenv("OPENAI_APIKEY")
 
 graph_config = {
     "llm": {
 
@@ -20,12 +20,12 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("GPT4_KEY")
+openai_key = os.getenv("OPENAI_APIKEY")
 
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-4-turbo-preview",
+        "model": "gpt-4-turbo",
     },
 }
 
 
@@ -4,6 +4,7 @@
 
 import os
 from dotenv import load_dotenv
+from scrapegraphai.utils import prettify_exec_info
 from scrapegraphai.graphs import SmartScraperGraph
 load_dotenv()
 
@@ -34,3 +35,10 @@
 
 result = smart_scraper_graph.run()
 print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
Original file line number	Diff line number	Diff line change
`@@ -35,3 +35,4 @@ poetry.lock`
`35`	`35`
`36`	`36`	`# lock files`
`37`	`37`	`*.lock`
	`38`	`+poetry.lock`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-OPENAI_APIKEY="your openai api key"`
	`1`	`+OPENAI_APIKEY="your openai key here"`