-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf.py
177 lines (148 loc) · 5.21 KB
/
pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os
import PyPDF2
from groq import Groq
import streamlit as st
# API Key for Groq
GROQ_API_KEY = "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"
def extract_text_from_pdf(pdf_path):
"""
Extracts text from a PDF file.
"""
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def analyze_resume_with_llama(text):
"""
Analyze the extracted text using the Groq API.
"""
client = Groq(api_key=GROQ_API_KEY)
prompt = f"""
Analyze the following resume text and identify key domains related to the skills, projects, internships, and interests mentioned.
Limit the identified domains to 6 critically most important ones. Provide only the domain names without any numbers, descriptions, or attached explanations.
Resume Text: {text}
"""
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=512,
top_p=1,
stream=True,
stop=None,
)
analysis_result = ""
for chunk in completion:
chunk_content = chunk.choices[0].delta.content or ""
analysis_result += chunk_content
return analysis_result
# return domains
def extract_domains_from_pdf(pdf_path):
"""
Function to analyze PDF and extract important domains.
"""
if not os.path.exists(pdf_path):
st.error("The specified PDF file does not exist.")
return []
resume_text = extract_text_from_pdf(pdf_path)
analysis = analyze_resume_with_llama(resume_text)
domains = extract_and_limit_domains(analysis, max_domains=15)
return domains
def extract_and_limit_domains(analysis_result, max_domains=15):
"""
Extract important domains from the analysis result and limit to a maximum count.
"""
domains = []
lines = analysis_result.split("\n")
for line in lines:
domain = line.strip()
if domain and len(domain.split()) <= 5:
domains.append(domain)
return domains[:max_domains]
def load_job_listings_from_file(file_path="job_listings.txt"):
"""
Load job listings from a text file.
"""
with open(file_path, 'r') as file:
return file.read()
def analyze_resume_with_job_listings(resume_text, job_listings_text):
"""
Analyze the resume and job listings using the Groq API.
"""
client = Groq(api_key=GROQ_API_KEY)
prompt = f"""
You are an AI job analysis assistant. Below is a resume and a list of job listings.
Your tasks are:
1. For each job listing, analyze the skills required based on the job description.
2. Compare the required skills with the resume.
3. For each job listing, identify:
- Strengths: The skills the user possesses that match the job requirements.
- Areas of Improvement: The skills the user lacks based on the job requirements.
4. Categorize each job into:
- Must Apply
- Good Fit
- Neutral
- Doesn't Align
5. Provide the output in a beautified, structured format, clearly listing:
- Domain
- Job Title
- Company
- Category
- Strengths
- Areas of Improvement
- Reason for Categorization
6. Make sure all the job listings are covered.
Resume:
{resume_text}
Job Listings:
{job_listings_text}
"""
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=2048,
top_p=1,
stream=True,
stop=None,
)
analysis_result = ""
for chunk in completion:
chunk_content = chunk.choices[0].delta.content or ""
analysis_result += chunk_content
# Save the result to a text file
output_file = "job_analysis_results.txt"
with open(output_file, "w", encoding="utf-8") as file:
file.write(analysis_result)
def display_jobs_by_category(content):
"""
Displays job analysis results in a structured format using Streamlit.
Args:
content (str): The content of the job analysis results
"""
try:
current_category = None
lines = content.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith("**") and line.endswith("**"):
current_category = line.replace("**", "").strip()
st.header(current_category)
elif line[0].isdigit() and line[1] == ".":
job_title = line.split('. ', 1)[1].strip()
st.subheader(job_title)
elif line.startswith('* '):
parts = line.replace('* ', '').split(': ', 1)
if len(parts) == 2:
key, value = parts
st.markdown(f"**{key}:** {value}")
elif line.startswith('---'):
st.markdown("---")
except Exception as e:
st.error(f"Error parsing job listings: {str(e)}")
st.text("Content that caused error:")
st.code(content)