-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemail-csv-extractor
31 lines (27 loc) · 1.1 KB
/
email-csv-extractor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import glob
import pandas as pd
import re
#This function searches for csv and extracts the 'to' column. Added support for case insensitive columns.
def import_emails():
emails = []
for name in glob.glob("./*.csv"):
df = pd.read_csv(name)
# Normalize the column names to lowercase
df.columns = df.columns.str.lower()
# Check if 'to' column exists, to avoid KeyError
if 'to' in df.columns:
emails.extend(df['to'].dropna().tolist())
return emails
#Searches columns for email using regex.
def find_matches():
pattern = ("([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)")
emails = import_emails()
matches = [re.search(pattern, email).group(0) for email in emails if re.search(pattern, email) is not None]
return list(set(matches)) # removes duplicates
#Outputs the matches to a text file.
def write_output():
matches = find_matches()
txtlist = ';'.join(matches)
with open("email-list.txt", "w") as filew:
filew.write(txtlist)
write_output()