Skip to content

Commit

Permalink
Add progress bar
Browse files Browse the repository at this point in the history
  • Loading branch information
bradystroud committed Feb 13, 2025
1 parent d0ee76b commit c0a5979
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 25 deletions.
63 changes: 38 additions & 25 deletions scripts/generateSeoDescriptions/generate-descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys
import re
from ollama import generate
from tqdm import tqdm # Progress bar


def generate_seo_description(file_content):
script_dir = os.path.dirname(os.path.realpath(__file__))
Expand All @@ -12,11 +14,12 @@ def generate_seo_description(file_content):
combined_prompt = f"{prompt}\n\n{file_content}"
response = generate(model="deepseek-r1:14b", prompt=combined_prompt)

seo_description = response['response']
seo_description = response["response"]

# Remove <think>...</think> tags from reasoning models
cleaned = re.sub(r"<think>.*?</think>", "", seo_description, flags=re.DOTALL)
return cleaned
return cleaned.strip()


def check_seo_description(seo_description):
issues = []
Expand All @@ -29,58 +32,68 @@ def check_seo_description(seo_description):
issues.append("Contains odd characters *, _, or :")
return issues


def main():
if len(sys.argv) < 2:
print("Usage: python script.py <search_dir> 🛠️")
sys.exit(1)

search_dir = sys.argv[1]
script_dir = os.path.dirname(os.path.realpath(__file__))
log_file = os.path.join(script_dir, "seo_issues.log")

# Gather markdown files (.md and .mdx)
markdown_files = []
for root, _, files in os.walk(search_dir):
for file in files:
if file.endswith((".md", ".mdx")):
markdown_files.append(os.path.join(root, file))
markdown_files = [
os.path.join(root, file)
for root, _, files in os.walk(search_dir)
for file in files
if file.endswith((".md", ".mdx"))
]

total_files = len(markdown_files)
processed_files = 0
if total_files == 0:
print("No Markdown files found. 📭")
sys.exit(0)

with tqdm(total=total_files, desc="Processing", unit="file") as pbar:
for md_file in markdown_files:
with open(md_file, "r", encoding="utf-8") as f:
content = f.read()

for md_file in markdown_files:
print(f"Processing file: {md_file} 🔍")
with open(md_file, "r", encoding="utf-8") as f:
content = f.read()
# Skip if seoDescription already exists
if re.search(r"^seoDescription:", content, re.MULTILINE):
pbar.update(1)
continue

# Skip if seoDescription already exists
if re.search(r"^seoDescription:", content, re.MULTILINE):
print(f"SEO description already present in {md_file}\n")
else:
seo_desc = generate_seo_description(content)
issues = check_seo_description(seo_desc)
# Flatten to a single line for YAML
seo_desc_single = " ".join(seo_desc.split())
seo_desc_single = " ".join(seo_desc.split()) # Flatten for YAML

if not issues:
lines = content.splitlines()
new_lines = []
inserted = False

for line in lines:
new_lines.append(line)
if not inserted and line.strip() == "---":
new_lines.append(f"seoDescription: {seo_desc_single}")
inserted = True

new_content = "\n".join(new_lines)
with open(md_file, "w", encoding="utf-8") as f:
f.write(new_content)
print(f"Added SEO description to {md_file} 🎉")

else:
with open(log_file, "a", encoding="utf-8") as log:
log.write(f"Issues found in {md_file}:\n")
log.write(seo_desc + "\n")
log.write("Issues: " + ", ".join(issues) + "\n\n")
print(f"Logged issues for {md_file} ⚠️")
processed_files += 1
percent = int(processed_files * 100 / total_files) if total_files else 100
print(f"{percent}% complete\n")

pbar.update(1) # Update progress bar

print("✅ SEO description updates complete!")


if __name__ == "__main__":
main()
main()
1 change: 1 addition & 0 deletions scripts/generateSeoDescriptions/prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Keep it Concise: Aim for 150-160 characters.
Use Keywords: Include primary keywords naturally.
Reflect User Intent: Match the description with what users are searching for.

❌ Don't add full stop at the end
❌ DO NOT INCLUDE any context like this in the response: "Here is the generated SEO description:"
ONLY output the generated description, NOTHING else

Expand Down

0 comments on commit c0a5979

Please sign in to comment.