Skip to content

Commit 61d08a5

Browse files
authored
Merge pull request #394 from inchoate/update-docs-for-schema
2 parents a8251bd + aedda44 commit 61d08a5

20 files changed

+73
-82
lines changed

examples/ernie/smart_scraper_schema_ernie.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,31 @@
22
Basic example of scraping pipeline using SmartScraper with schema
33
"""
44

5-
import os, json
5+
import json
6+
import os
7+
from typing import Dict
8+
69
from dotenv import load_dotenv
10+
from pydantic import BaseModel
11+
712
from scrapegraphai.graphs import SmartScraperGraph
813

14+
915
load_dotenv()
1016

1117
# ************************************************
1218
# Define the output schema for the graph
1319
# ************************************************
1420

15-
schema= """
16-
{
17-
"Projects": [
18-
"Project #":
19-
{
20-
"title": "...",
21-
"description": "...",
22-
},
23-
"Project #":
24-
{
25-
"title": "...",
26-
"description": "...",
27-
}
28-
]
29-
}
30-
"""
21+
22+
class Project(BaseModel):
23+
title: str
24+
description: str
25+
26+
27+
class Projects(BaseModel):
28+
Projects: Dict[str, Project]
29+
3130

3231
# ************************************************
3332
# Define the configuration for the graph
@@ -37,7 +36,7 @@
3736

3837
graph_config = {
3938
"llm": {
40-
"api_key":openai_key,
39+
"api_key": openai_key,
4140
"model": "gpt-3.5-turbo",
4241
},
4342
"verbose": True,
@@ -51,8 +50,8 @@
5150
smart_scraper_graph = SmartScraperGraph(
5251
prompt="List me all the projects with their description",
5352
source="https://perinim.github.io/projects/",
54-
schema=schema,
55-
config=graph_config
53+
schema=Projects,
54+
config=graph_config,
5655
)
5756

5857
result = smart_scraper_graph.run()

examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
import os
66
from dotenv import load_dotenv
7+
from typing import Dict
8+
9+
from pydantic import BaseModel
710
from scrapegraphai.graphs import SmartScraperGraph
811
from scrapegraphai.utils import prettify_exec_info
912
from langchain_community.llms import HuggingFaceEndpoint
@@ -13,22 +16,12 @@
1316
# Define the output schema for the graph
1417
# ************************************************
1518

16-
schema= """
17-
{
18-
"Projects": [
19-
"Project #":
20-
{
21-
"title": "...",
22-
"description": "...",
23-
},
24-
"Project #":
25-
{
26-
"title": "...",
27-
"description": "...",
28-
}
29-
]
30-
}
31-
"""
19+
class Project(BaseModel):
20+
title: str
21+
description: str
22+
23+
class Projects(BaseModel):
24+
Projects: Dict[str, Project]
3225

3326
## required environment variable in .env
3427
#HUGGINGFACEHUB_API_TOKEN
@@ -61,7 +54,7 @@
6154
smart_scraper_graph = SmartScraperGraph(
6255
prompt="List me all the projects with their description",
6356
source="https://perinim.github.io/projects/",
64-
schema=schema,
57+
schema=Projects,
6558
config=graph_config
6659
)
6760
result = smart_scraper_graph.run()

examples/mixed_models/smart_scraper_schema_groq_openai.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@
22
Basic example of scraping pipeline using SmartScraper with schema
33
"""
44

5-
import os, json
5+
import json
6+
import os
7+
from typing import Dict, List
8+
69
from dotenv import load_dotenv
10+
from pydantic import BaseModel
11+
712
from scrapegraphai.graphs import SmartScraperGraph
813
from scrapegraphai.utils import prettify_exec_info
914

@@ -13,22 +18,12 @@
1318
# Define the output schema for the graph
1419
# ************************************************
1520

16-
schema= """
17-
{
18-
"Projects": [
19-
"Project #":
20-
{
21-
"title": "...",
22-
"description": "...",
23-
},
24-
"Project #":
25-
{
26-
"title": "...",
27-
"description": "...",
28-
}
29-
]
30-
}
31-
"""
21+
class Project(BaseModel):
22+
title: str
23+
description: str
24+
25+
class Projects(BaseModel):
26+
Projects: Dict[str, Project]
3227

3328
# ************************************************
3429
# Define the configuration for the graph
@@ -60,7 +55,7 @@
6055
prompt="List me all the projects with their description.",
6156
# also accepts a string with the already downloaded HTML code
6257
source="https://perinim.github.io/projects/",
63-
schema=schema,
58+
schema=Projects,
6459
config=graph_config
6560
)
6661

scrapegraphai/graphs/abstract_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class AbstractGraph(ABC):
3939
prompt (str): The prompt for the graph.
4040
source (str): The source of the graph.
4141
config (dict): Configuration parameters for the graph.
42-
schema (str): The schema for the graph output.
42+
schema (BaseModel): The schema for the graph output.
4343
llm_model: An instance of a language model client, configured for generating answers.
4444
embedder_model: An instance of an embedding model client,
4545
configured for generating embeddings.

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from copy import copy, deepcopy
66
from typing import List, Optional
77

8+
from pydantic import BaseModel
9+
810
from .base_graph import BaseGraph
911
from .abstract_graph import AbstractGraph
1012
from .csv_scraper_graph import CSVScraperGraph
@@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
3234
prompt (str): The user prompt to search the internet.
3335
source (List[str]): The source of the graph.
3436
config (dict): Configuration parameters for the graph.
35-
schema (Optional[str]): The schema for the graph output.
37+
schema (Optional[BaseModel]): The schema for the graph output.
3638
3739
Example:
3840
>>> search_graph = MultipleSearchGraph(
@@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
4244
>>> result = search_graph.run()
4345
"""
4446

45-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
47+
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
4648

4749
self.max_results = config.get("max_results", 3)
4850

scrapegraphai/graphs/deep_scraper_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
3434
prompt (str): The prompt for the graph.
3535
source (str): The source of the graph.
3636
config (dict): Configuration parameters for the graph.
37-
schema (str): The schema for the graph output.
37+
schema (BaseModel): The schema for the graph output.
3838
llm_model: An instance of a language model client, configured for generating answers.
3939
embedder_model: An instance of an embedding model client,
4040
configured for generating embeddings.
@@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
4545
prompt (str): The prompt for the graph.
4646
source (str): The source of the graph.
4747
config (dict): Configuration parameters for the graph.
48-
schema (str): The schema for the graph output.
48+
schema (BaseModel): The schema for the graph output.
4949
5050
Example:
5151
>>> deep_scraper = DeepScraperGraph(

scrapegraphai/graphs/json_scraper_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
2323
prompt (str): The prompt for the graph.
2424
source (str): The source of the graph.
2525
config (dict): Configuration parameters for the graph.
26-
schema (str): The schema for the graph output.
26+
schema (BaseModel): The schema for the graph output.
2727
llm_model: An instance of a language model client, configured for generating answers.
2828
embedder_model: An instance of an embedding model client,
2929
configured for generating embeddings.
@@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
3434
prompt (str): The prompt for the graph.
3535
source (str): The source of the graph.
3636
config (dict): Configuration parameters for the graph.
37-
schema (str): The schema for the graph output.
37+
schema (BaseModel): The schema for the graph output.
3838
3939
Example:
4040
>>> json_scraper = JSONScraperGraph(

scrapegraphai/graphs/json_scraper_multi_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
3333
prompt (str): The user prompt to search the internet.
3434
source (List[str]): The source of the graph.
3535
config (dict): Configuration parameters for the graph.
36-
schema (Optional[str]): The schema for the graph output.
36+
schema (Optional[BaseModel]): The schema for the graph output.
3737
3838
Example:
3939
>>> search_graph = MultipleSearchGraph(

scrapegraphai/graphs/omni_scraper_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
2929
prompt (str): The prompt for the graph.
3030
source (str): The source of the graph.
3131
config (dict): Configuration parameters for the graph.
32-
schema (str): The schema for the graph output.
32+
schema (BaseModel): The schema for the graph output.
3333
llm_model: An instance of a language model client, configured for generating answers.
3434
embedder_model: An instance of an embedding model client,
3535
configured for generating embeddings.
@@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
4141
prompt (str): The prompt for the graph.
4242
source (str): The source of the graph.
4343
config (dict): Configuration parameters for the graph.
44-
schema (str): The schema for the graph output.
44+
schema (BaseModel): The schema for the graph output.
4545
4646
Example:
4747
>>> omni_scraper = OmniScraperGraph(

scrapegraphai/graphs/omni_search_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
3434
Args:
3535
prompt (str): The user prompt to search the internet.
3636
config (dict): Configuration parameters for the graph.
37-
schema (Optional[str]): The schema for the graph output.
37+
schema (Optional[BaseModel]): The schema for the graph output.
3838
3939
Example:
4040
>>> omni_search_graph = OmniSearchGraph(

scrapegraphai/graphs/pdf_scraper_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
2626
prompt (str): The prompt for the graph.
2727
source (str): The source of the graph.
2828
config (dict): Configuration parameters for the graph.
29-
schema (str): The schema for the graph output.
29+
schema (BaseModel): The schema for the graph output.
3030
llm_model: An instance of a language model client, configured for generating answers.
3131
embedder_model: An instance of an embedding model client,
3232
configured for generating embeddings.
@@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
3838
prompt (str): The prompt for the graph.
3939
source (str): The source of the graph.
4040
config (dict): Configuration parameters for the graph.
41-
schema (str): The schema for the graph output.
41+
schema (BaseModel): The schema for the graph output.
4242
4343
Example:
4444
>>> pdf_scraper = PDFScraperGraph(

scrapegraphai/graphs/pdf_scraper_multi_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
3434
prompt (str): The user prompt to search the internet.
3535
source (List[str]): The source of the graph.
3636
config (dict): Configuration parameters for the graph.
37-
schema (Optional[str]): The schema for the graph output.
37+
schema (Optional[BaseModel]): The schema for the graph output.
3838
3939
Example:
4040
>>> search_graph = MultipleSearchGraph(

scrapegraphai/graphs/script_creator_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
2323
prompt (str): The prompt for the graph.
2424
source (str): The source of the graph.
2525
config (dict): Configuration parameters for the graph.
26-
schema (str): The schema for the graph output.
26+
schema (BaseModel): The schema for the graph output.
2727
llm_model: An instance of a language model client, configured for generating answers.
2828
embedder_model: An instance of an embedding model client,
2929
configured for generating embeddings.
@@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
3636
prompt (str): The prompt for the graph.
3737
source (str): The source of the graph.
3838
config (dict): Configuration parameters for the graph.
39-
schema (str): The schema for the graph output.
39+
schema (BaseModel): The schema for the graph output.
4040
4141
Example:
4242
>>> script_creator = ScriptCreatorGraph(

scrapegraphai/graphs/script_creator_multi_graph.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from copy import copy, deepcopy
66
from typing import List, Optional
77

8+
from pydantic import BaseModel
9+
810
from .base_graph import BaseGraph
911
from .abstract_graph import AbstractGraph
1012
from .script_creator_graph import ScriptCreatorGraph
@@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
3032
prompt (str): The user prompt to search the internet.
3133
source (List[str]): The source of the graph.
3234
config (dict): Configuration parameters for the graph.
33-
schema (Optional[str]): The schema for the graph output.
35+
schema (Optional[BaseModel]): The schema for the graph output.
3436
Example:
3537
>>> script_graph = ScriptCreatorMultiGraph(
3638
... "What is Chioggia famous for?",
@@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
4143
>>> result = script_graph.run()
4244
"""
4345

44-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
46+
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
4547

4648
self.max_results = config.get("max_results", 3)
4749

scrapegraphai/graphs/search_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class SearchGraph(AbstractGraph):
3333
Args:
3434
prompt (str): The user prompt to search the internet.
3535
config (dict): Configuration parameters for the graph.
36-
schema (Optional[str]): The schema for the graph output.
36+
schema (Optional[BaseModel]): The schema for the graph output.
3737
3838
Example:
3939
>>> search_graph = SearchGraph(

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
2626
prompt (str): The prompt for the graph.
2727
source (str): The source of the graph.
2828
config (dict): Configuration parameters for the graph.
29-
schema (str): The schema for the graph output.
29+
schema (BaseModel): The schema for the graph output.
3030
llm_model: An instance of a language model client, configured for generating answers.
3131
embedder_model: An instance of an embedding model client,
3232
configured for generating embeddings.
@@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
3737
prompt (str): The prompt for the graph.
3838
source (str): The source of the graph.
3939
config (dict): Configuration parameters for the graph.
40-
schema (str): The schema for the graph output.
40+
schema (BaseModel): The schema for the graph output.
4141
4242
Example:
4343
>>> smart_scraper = SmartScraperGraph(

scrapegraphai/graphs/smart_scraper_multi_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
3333
prompt (str): The user prompt to search the internet.
3434
source (List[str]): The source of the graph.
3535
config (dict): Configuration parameters for the graph.
36-
schema (Optional[str]): The schema for the graph output.
36+
schema (Optional[BaseModel]): The schema for the graph output.
3737
3838
Example:
3939
>>> search_graph = MultipleSearchGraph(

0 commit comments

Comments
 (0)