1
1
import asyncio
2
- from typing import List , Dict , Any
3
2
4
3
from scrapegraph_py import AsyncClient
5
4
from scrapegraph_py .logger import sgai_logger
@@ -16,15 +15,15 @@ async def scrape_companies(client: AsyncClient, url: str, batch: str) -> None:
16
15
user_prompt = "Extract all company information from this page, including name, description, and website" ,
17
16
number_of_scrolls = 10 ,
18
17
)
19
-
20
18
# Process the results
21
- companies = response .get ("result" , [])
19
+ companies = response .get ("result" , {}). get ( "companies" , [])
22
20
if not companies :
23
21
print (f"No companies found for batch { batch } " )
24
22
return
25
23
26
24
# Save or process the companies data
27
25
print (f"Found { len (companies )} companies in batch { batch } " )
26
+
28
27
for company in companies :
29
28
print (f"Company: { company .get ('name' , 'N/A' )} " )
30
29
print (f"Description: { company .get ('description' , 'N/A' )} " )
@@ -37,13 +36,13 @@ async def scrape_companies(client: AsyncClient, url: str, batch: str) -> None:
37
36
38
37
async def main ():
39
38
# Initialize async client
40
- client = AsyncClient (api_key = "your-api-key-here " )
39
+ client = AsyncClient (api_key = "sgai-4cf4a4f5-87f7-457a-8c58-0790ecaf323e " )
41
40
42
41
try :
43
42
# Example YC batch URLs
44
43
batch_urls = {
45
- "W24" : "https://www.ycombinator.com/companies?batch=W24 " ,
46
- "S23" : "https://www.ycombinator.com/companies?batch=S23 "
44
+ "W24" : "https://www.ycombinator.com/companies?batch=Winter%202024 " ,
45
+ "S23" : "https://www.ycombinator.com/companies?batch=Summer%202023 "
47
46
}
48
47
49
48
# Create tasks for each batch
0 commit comments