-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLooking-for-emails.py
65 lines (49 loc) · 1.65 KB
/
Looking-for-emails.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import csv
mails=[]
def findMails(soup):
for name in soup.find_all('a'):
if(name is not None):
emailText=name.text
match=bool(re.match('\w+@\w+\.{1}\w+',emailText))
if match==True:
emailText=emailText.replace(" ",'').replace('\r','')
emailText=emailText.replace('\n','').replace('\t','')
if(len(mails)==0)or(emailText not in mails):
print(emailText)
mails.append(emailText)
df = pd.read_csv("entertainment2.csv", usecols = ['HTTP'])
n = 1
while n < 20:
url = df.loc[n,'HTTP']
#url='https://aohack.com'
allLinks = []
response = requests.get(url)
soup=BeautifulSoup(response.text,'html.parser')
links = [a.attrs.get('href') for a in soup.select('a[href]') ]
for i in links:
if(("contact" in i or "Contact")or("Career" in i or "career" in i))or('about' in i or "About" in i)or('Services' in i or 'services' in i):
allLinks.append(i)
allLinks=set(allLinks)
for link in allLinks:
if link.startswith(url): # or link.startswith("www"))
if link.endswith('javascript:void(0)'):
continue
else:
r=requests.get(link)
data=r.text
soup=BeautifulSoup(data,'html.parser')
findMails(soup)
mails=set(mails)
if(len(mails)==0):
print("NO MAILS FOUND")
# Populate CSV with links
with open('results.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Number", "Website"])
for x in range(len(mails)):
writer.writerow([url, pure_links[x]])
n+=1