Skip to content

Commit f2d7124

Browse files
committed
ready for 1.0! :D
1 parent 1d9cdc9 commit f2d7124

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ jobs:
4747
env:
4848
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
4949
with:
50-
tag_name: v1.0.0-rc2
51-
release_name: Release v1.0.0-rc2
50+
tag_name: v1.0.0
51+
release_name: Release v1.0.0
5252
body: |
53-
Cleaned up some unused code that I forgot about. Sigh.
53+
1.0! Onto feature updates!
5454
draft: false
5555
prerelease: false
5656
- name: Upload Release Asset

functions.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from bs4 import BeautifulSoup
22
import re
33

4+
# duplicates content of content_grabber, but is currently used outside of that function to filter the list of original links so that the lengths of the original list of links and the updated list of links matches. Will be implemented into content_grabber in the future.
45
def link_filter(links):
56

67
storage_list =[]
@@ -18,28 +19,37 @@ def link_filter(links):
1819
def content_grabber(html):
1920

2021
i = 0
21-
22+
# create an object from the content of the HTML file
2223
soup = BeautifulSoup(html, 'html.parser')
24+
# initialize an empty list to store the link content values
2325
link_content_list = []
2426

2527
for link in soup.findAll('a'):
28+
# filter out the links that don't need to be updated
2629
if 'indiana.edu' in link['href'] or 'iu.edu' in link['href']:
2730
if "mailto:" not in link['href'] and ".png" not in link['href'] and "tel:" not in link['href'] and ".jpg" not in link['href'] and 'https://one.iu.edu' not in link['href'] and "machform" not in link['href']:
31+
# case if link is not a button
32+
# this code chops and screws the content and appends the anchor tag to the end of the content if it exists
2833
if len(link.contents) == 1:
2934
if '#' in link['href']:
3035
url = link['href'].split('#')[0]
3136
anchor = '#' + link['href'].split('#')[1]
3237
dirty_content = str(link.contents[0]).replace(" ", "-").lower().strip("\'")
38+
# this regex removes non-alphanum characters from the content
3339
char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
3440
hypen_content = char_regex.sub('', dirty_content)
41+
# cleanup step to remove double hyphens when an illegal character is removed
3542
clean_content = hypen_content.replace('--', '-') + anchor
3643
link_content_list.append(clean_content)
44+
# does the same as the above for links with no anchor tags
3745
else:
3846
dirty_content = str(link.contents[0]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'")
3947
char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
4048
hypen_content = char_regex.sub('', dirty_content)
4149
clean_content = hypen_content.replace('--', '-')
4250
link_content_list.append(clean_content)
51+
# case if link IS a button
52+
# rinse and repeat
4353
else:
4454
if '#' in link['href']:
4555
url = link['href'].split('#')[0]
@@ -55,26 +65,12 @@ def content_grabber(html):
5565
hypen_content = char_regex.sub('', dirty_content)
5666
clean_content = hypen_content.replace('--', '-')
5767
link_content_list.append(clean_content)
58-
# if '#' in link['href']:
59-
# url = link['href'].split('#')[0]
60-
# anchor = '#' + link['href'].split('#')[1]
61-
# dirty_content = str(link.contents[2]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'") + "-button"
62-
# char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
63-
# hypen_content = char_regex.sub('', dirty_content)
64-
# clean_content = hypen_content.replace('--', '-') + anchor
65-
# link_content_list.append(clean_content)
66-
# else:
67-
# print(link.contents)
68-
# dirty_content = str(link.contents[0]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'") + "-button"
69-
# char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
70-
# hypen_content = char_regex.sub('', dirty_content)
71-
# clean_content = hypen_content.replace('--', '-')
72-
# link_content_list.append(clean_content)
7368
else:
7469
pass
7570

7671
return link_content_list
7772

73+
# function that takes a list of created UTM links and appends the content values to them to complete the process
7874
def utm_content_appender(utm_link_list, content_list):
7975

8076
i = 0
@@ -85,6 +81,7 @@ def utm_content_appender(utm_link_list, content_list):
8581

8682
return utm_link_list
8783

84+
# function that goes through the HTML file and replaces the original links with their UTM-laden counterparts
8885
def HTML_link_replacer(html, original_link_list, new_link_list):
8986

9087
i = 0
@@ -95,6 +92,7 @@ def HTML_link_replacer(html, original_link_list, new_link_list):
9592

9693
return html
9794

95+
# a hopefully soon-to-be-unneeded function that removes the random quotation mark that is coming along with the links out of the HTML file for some reason
9896
def quote_stripper(links):
9997

10098
i = 0
@@ -105,6 +103,7 @@ def quote_stripper(links):
105103

106104
return links
107105

106+
# a function used to grab the anchor tag content from original links so that it can be appended to the end of the new link
108107
def anchor_ripper(links):
109108

110109
final_anchor_links = []

link_editor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def main():
4242

4343
final_utm_content_list = content_grabber(working_html)
4444

45+
# initialize the list that will hold the links with final UTM parameters attached minus the content parameter
4546
working_utm_links = []
4647

4748
# checks to see if there is an existing query string in the source URL. if so, the UTM parameters are added onto that existing query string rather than added as a new query string
@@ -63,7 +64,7 @@ def main():
6364
with open(save_path, 'w') as save_file:
6465
save_file.write(old_html_head + final_body_html + old_html_foot)
6566

66-
#write the UTM parameters and links to a CSV file
67+
# write the UTM parameters and links to a CSV file
6768
if os.path.isfile("./2024-2025_EMC_HTML_UTM_links.csv") == True:
6869
with open('2024-2025_EMC_HTML_UTM_links.csv', 'a', newline='') as file:
6970
writer = csv.writer(file)
@@ -76,6 +77,7 @@ def main():
7677
for i in range(len(final_replace_links)):
7778
writer.writerow([utm_unit, utm_campaign, final_replace_links[i], utm_source, 'email', final_utm_content_list[i], final_utm_links[i]])
7879

80+
# reset lists so that the program can be run again without restarting
7981
working_utm_links = []
8082
working_replace_links = []
8183
final_replace_links = []

0 commit comments

Comments
 (0)