You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: functions.py
+15-16Lines changed: 15 additions & 16 deletions
Original file line number
Diff line number
Diff line change
@@ -1,6 +1,7 @@
1
1
frombs4importBeautifulSoup
2
2
importre
3
3
4
+
# duplicates content of content_grabber, but is currently used outside of that function to filter the list of original links so that the lengths of the original list of links and the updated list of links matches. Will be implemented into content_grabber in the future.
4
5
deflink_filter(links):
5
6
6
7
storage_list=[]
@@ -18,28 +19,37 @@ def link_filter(links):
18
19
defcontent_grabber(html):
19
20
20
21
i=0
21
-
22
+
# create an object from the content of the HTML file
22
23
soup=BeautifulSoup(html, 'html.parser')
24
+
# initialize an empty list to store the link content values
23
25
link_content_list= []
24
26
25
27
forlinkinsoup.findAll('a'):
28
+
# filter out the links that don't need to be updated
# a hopefully soon-to-be-unneeded function that removes the random quotation mark that is coming along with the links out of the HTML file for some reason
98
96
defquote_stripper(links):
99
97
100
98
i=0
@@ -105,6 +103,7 @@ def quote_stripper(links):
105
103
106
104
returnlinks
107
105
106
+
# a function used to grab the anchor tag content from original links so that it can be appended to the end of the new link
# initialize the list that will hold the links with final UTM parameters attached minus the content parameter
45
46
working_utm_links= []
46
47
47
48
# checks to see if there is an existing query string in the source URL. if so, the UTM parameters are added onto that existing query string rather than added as a new query string
0 commit comments