ready for 1.0! :D

ferretbreeder · ferretbreeder · commit f2d712489ae5 · 2025-01-16T13:59:12.000-05:00
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -47,10 +47,10 @@ jobs:
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
       with:
-        tag_name: v1.0.0-rc2
-        release_name: Release v1.0.0-rc2
+        tag_name: v1.0.0
+        release_name: Release v1.0.0
         body: |
-          Cleaned up some unused code that I forgot about. Sigh.
+          1.0! Onto feature updates!
         draft: false
         prerelease: false
     - name: Upload Release Asset
diff --git a/functions.py b/functions.py
@@ -1,6 +1,7 @@
 from bs4 import BeautifulSoup
 import re
 
+# duplicates content of content_grabber, but is currently used outside of that function to filter the list of original links so that the lengths of the original list of links and the updated list of links matches. Will be implemented into content_grabber in the future.
 def link_filter(links):
 
     storage_list =[]
@@ -18,28 +19,37 @@ def link_filter(links):
 def content_grabber(html):
 
     i = 0
-
+    # create an object from the content of the HTML file
     soup = BeautifulSoup(html, 'html.parser')
+    # initialize an empty list to store the link content values
     link_content_list = []
 
     for link in soup.findAll('a'):
+        # filter out the links that don't need to be updated
         if 'indiana.edu' in link['href'] or 'iu.edu' in link['href']:
             if "mailto:" not in link['href'] and ".png" not in link['href'] and "tel:" not in link['href'] and ".jpg" not in link['href'] and 'https://one.iu.edu' not in link['href'] and "machform" not in link['href']:
+                # case if link is not a button
+                # this code chops and screws the content and appends the anchor tag to the end of the content if it exists
                 if len(link.contents) == 1:
                     if '#' in link['href']:
                         url = link['href'].split('#')[0]
                         anchor = '#' + link['href'].split('#')[1]
                         dirty_content = str(link.contents[0]).replace(" ", "-").lower().strip("\'")
+                        # this regex removes non-alphanum characters from the content
                         char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
                         hypen_content = char_regex.sub('', dirty_content)
+                        # cleanup step to remove double hyphens when an illegal character is removed
                         clean_content = hypen_content.replace('--', '-') + anchor
                         link_content_list.append(clean_content)
+                    # does the same as the above for links with no anchor tags
                     else:
                         dirty_content = str(link.contents[0]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'")
                         char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
                         hypen_content = char_regex.sub('', dirty_content)
                         clean_content = hypen_content.replace('--', '-')
                         link_content_list.append(clean_content)
+                # case if link IS a button
+                # rinse and repeat
                 else:
                     if '#' in link['href']:
                         url = link['href'].split('#')[0]
@@ -55,26 +65,12 @@ def content_grabber(html):
                         hypen_content = char_regex.sub('', dirty_content)
                         clean_content = hypen_content.replace('--', '-')
                         link_content_list.append(clean_content)
-                # if '#' in link['href']:
-                #     url = link['href'].split('#')[0]
-                #     anchor = '#' + link['href'].split('#')[1]
-                #     dirty_content = str(link.contents[2]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'") + "-button"
-                #     char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
-                #     hypen_content = char_regex.sub('', dirty_content)
-                #     clean_content = hypen_content.replace('--', '-') + anchor
-                #     link_content_list.append(clean_content)
-                # else:
-                #     print(link.contents)
-                #     dirty_content = str(link.contents[0]).strip().replace(" ", "-").lower().strip().strip("\n").strip("\'") + "-button"
-                #     char_regex = re.compile(r'\s*[^a-zA-Z0-9\-]\s*')
-                #     hypen_content = char_regex.sub('', dirty_content)
-                #     clean_content = hypen_content.replace('--', '-')
-                #     link_content_list.append(clean_content)
         else:
             pass
 
     return link_content_list
 
+# function that takes a list of created UTM links and appends the content values to them to complete the process
 def utm_content_appender(utm_link_list, content_list):
 
     i = 0
@@ -85,6 +81,7 @@ def utm_content_appender(utm_link_list, content_list):
 
     return utm_link_list
 
+# function that goes through the HTML file and replaces the original links with their UTM-laden counterparts
 def HTML_link_replacer(html, original_link_list, new_link_list):
 
     i = 0
@@ -95,6 +92,7 @@ def HTML_link_replacer(html, original_link_list, new_link_list):
 
     return html
 
+# a hopefully soon-to-be-unneeded function that removes the random quotation mark that is coming along with the links out of the HTML file for some reason
 def quote_stripper(links):
 
     i = 0
@@ -105,6 +103,7 @@ def quote_stripper(links):
 
     return links
 
+# a function used to grab the anchor tag content from original links so that it can be appended to the end of the new link
 def anchor_ripper(links):
 
     final_anchor_links = []
diff --git a/link_editor.py b/link_editor.py
@@ -42,6 +42,7 @@ def main():
 
         final_utm_content_list = content_grabber(working_html)
 
+        # initialize the list that will hold the links with final UTM parameters attached minus the content parameter
         working_utm_links = []
 
         # checks to see if there is an existing query string in the source URL. if so, the UTM parameters are added onto that existing query string rather than added as a new query string
@@ -63,7 +64,7 @@ def main():
         with open(save_path, 'w') as save_file:
             save_file.write(old_html_head + final_body_html + old_html_foot)
 
-    #write the UTM parameters and links to a CSV file
+    # write the UTM parameters and links to a CSV file
     if os.path.isfile("./2024-2025_EMC_HTML_UTM_links.csv") == True:
         with open('2024-2025_EMC_HTML_UTM_links.csv', 'a', newline='') as file:
             writer = csv.writer(file)
@@ -76,6 +77,7 @@ def main():
             for i in range(len(final_replace_links)):
                 writer.writerow([utm_unit, utm_campaign, final_replace_links[i], utm_source, 'email', final_utm_content_list[i], final_utm_links[i]])
     
+    # reset lists so that the program can be run again without restarting
     working_utm_links = []
     working_replace_links = []
     final_replace_links = []