-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathberlin-startups.py
38 lines (26 loc) · 993 Bytes
/
berlin-startups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import requests # Include HTTP Requests module
import csv
from bs4 import BeautifulSoup # Include BS web scraping module
base_url = "https://berlin.startups-list.com/" # Website / URL we will contact
links = []
names = []
about = []
current_url = base_url
print(current_url)
#Parse Current URL
r = requests.get(current_url) # Sends HTTP GET Request
soup = BeautifulSoup(r.text, "html.parser") # Parses HTTP Response
#Find divs
headers = soup.find_all('div', attrs={'class':'card'})
#Get names, websites and about
for div in headers:
names.append(div.find('h1', attrs={'property':'name'}).get_text())
links.append(div.find('a').get('href'))
about.append(div.find('p').get_text())
# Populate CSV with info
with open('berlin-startups-list.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Name", "About", "Website"])
for i in range(len(names)):
writer.writerow([names[i], about[i],links[i]])
print('scrapie readie')