-
Notifications
You must be signed in to change notification settings - Fork 1
Pr check #93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pr check #93
Changes from all commits
843ec6c
f2bf10e
1acb959
cd493ab
8720aa8
0609777
ed588ce
fae91a4
08e3017
7c24956
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
from github import Github | ||
from unidiff import PatchSet | ||
from typing import List, Dict | ||
|
||
def create_check_run(repo, sha): | ||
"""Create a check run using the modern PyGithub API""" | ||
return repo.create_check_run( | ||
name="AI Code Review", | ||
head_sha=sha, | ||
status="queued", # Initial status should be 'queued' | ||
output={ | ||
"title": "Analyzing Changes", | ||
"summary": "🔍 Scanning code changes with AI...", | ||
"text": "This may take 20-30 seconds" | ||
} | ||
) | ||
|
||
def update_check_run(check_run, results): | ||
"""Update check run with proper status transitions""" | ||
# First update to in_progress | ||
check_run.edit( | ||
status="in_progress", | ||
output={ | ||
"title": "Processing...", | ||
"summary": "Analyzing code patterns" | ||
} | ||
) | ||
|
||
# Then update with final results | ||
annotations = [] | ||
for result in results: | ||
# Extract line numbers from your analysis results | ||
annotation = { | ||
"path": result['fileName'], | ||
"start_line": result['start_line'], # REQUIRED | ||
"end_line": result['end_line'], # REQUIRED | ||
"annotation_level": map_severity(result['severity']), | ||
"message": result['comment'], | ||
"raw_details": f"Suggestion: {result['suggestion']}\n\n{result.get('suggestedCode', '')}" | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: Formatted string 'raw_details' may expose sensitive information through 'suggestedCode'. Severity: warning Suggestion: Ensure that 'suggestedCode' does not contain any sensitive data or is appropriately sanitized before being used. |
||
|
||
annotations.append(annotation) | ||
|
||
check_run.edit( | ||
status="completed", | ||
# conclusion="success" if len(annotations) == 0 else "action_required", | ||
conclusion="success", | ||
Comment on lines
+46
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: The 'conclusion' logic should dynamically determine success or failure based on annotations. Severity: info Suggestion: Uncomment and fix the original conditional logic to set the 'conclusion' field correctly. conclusion="success" if len(annotations) == 0 else "action_required" |
||
|
||
output={ | ||
"title": f"Found {len(annotations)} items", | ||
"summary": "AI Code Review Results", | ||
"annotations": annotations[:50] # GitHub limits to 50 annotations per update | ||
} | ||
) | ||
|
||
def map_severity(level: str) -> str: | ||
"""Map custom severity levels to GitHub annotation levels""" | ||
return { | ||
"error": "failure", | ||
"warning": "warning", | ||
"info": "notice" | ||
}.get(level.lower(), "notice") | ||
|
||
|
||
def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]: | ||
""" | ||
Parse a unified diff string and return a structured list of changes using | ||
actual file line numbers. | ||
|
||
Returns a list of dicts, each representing a file change: | ||
{ | ||
"file_name": str, | ||
"changes": [ | ||
{ | ||
"type": "added" | "removed" | "context", | ||
"line_number": int, # For added or context lines, this is target_line_no. | ||
# For removed lines, use source_line_no. | ||
"content": str | ||
}, | ||
... | ||
] | ||
} | ||
""" | ||
patch = PatchSet(diff_content) | ||
parsed_files = [] | ||
|
||
for patched_file in patch: | ||
file_info = { | ||
"file_name": patched_file.path, | ||
"changes": [] | ||
} | ||
for hunk in patched_file: | ||
for line in hunk: | ||
# Decide which line number to use based on change type. | ||
if line.is_added or not line.is_removed: | ||
line_num = line.target_line_no | ||
else: | ||
line_num = line.source_line_no | ||
|
||
if line_num is None: | ||
continue # Skip lines without a valid number | ||
|
||
# Append each changed line along with its file-based line number. | ||
file_info["changes"].append({ | ||
"type": "added" if line.is_added else "removed" if line.is_removed else "context", | ||
"line_number": line_num, | ||
"content": line.value.rstrip("\n") | ||
}) | ||
parsed_files.append(file_info) | ||
|
||
return parsed_files | ||
Comment on lines
+65
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: If 'diff_content' is overly large, initializing 'PatchSet' might be resource-intensive and cause memory issues. Severity: performance Suggestion: Consider paginating the 'diff_content' or processing it in parts to avoid potential memory overflows. |
||
|
||
|
||
def build_review_prompt_with_file_line_numbers(parsed_files: List[Dict]) -> str: | ||
""" | ||
Create a prompt that includes the diff using actual file line numbers. | ||
""" | ||
prompt_lines = [] | ||
|
||
for file_data in parsed_files: | ||
file_name = file_data["file_name"] | ||
prompt_lines.append(f"File: {file_name}\n") | ||
prompt_lines.append("Changed lines:") | ||
|
||
for change in file_data["changes"]: | ||
# Mark added lines with +, removed with -, context with a space | ||
sign = ( | ||
"+" if change["type"] == "added" else | ||
"-" if change["type"] == "removed" else | ||
" " | ||
) | ||
prompt_lines.append( | ||
f"[Line {change['line_number']}] {sign} {change['content']}" | ||
) | ||
prompt_lines.append("\n") | ||
|
||
return "\n".join(prompt_lines) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import os | ||
from openai import OpenAI | ||
from typing import List, Dict, Optional | ||
from pydantic import BaseModel, Field | ||
|
||
|
||
api_key = os.environ.get('OPENAI_API_KEY') | ||
Check failure on line 7 in llm_utils.py
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: The 'api_key' retrieved from environment variables could potentially be null if not set, causing failures in API calls. Severity: error Suggestion: Add a validation check to ensure 'api_key' is not null and raise or handle exception transparently if it is. api_key = os.environ.get('OPENAI_API_KEY')
if not api_key:
raise EnvironmentError("OPENAI_API_KEY environment variable not set.") |
||
|
||
client = OpenAI(api_key=api_key) | ||
|
||
class ReviewModel(BaseModel): | ||
class Step(BaseModel): | ||
fileName: str = Field(description="The name of the file that has an issue") | ||
start_line_with_prefix: str = Field(description="The starting line number in the file (REQUIRED). \ | ||
If the start_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix") | ||
end_line_with_prefix: str = Field(description="The ending line number in the file (REQUIRED). \ | ||
If the end_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix") | ||
language: str = Field(description="The language of the code segment") | ||
codeSegmentToFix: str = Field(description="The code segment that needs to be fixed from code diff in diff style('+' for added, '-' for removed, or nothing for normal code)") | ||
comment: str = Field(description="The comment on the code segment") | ||
suggestion: str = Field(description="The suggestion to fix the code segment") | ||
suggestedCode: Optional[str] = Field(None, description="The updated code segment for the fix") | ||
severity: str = Field(description="The severity of the issue. Can be 'error', 'warning', or 'info'") | ||
|
||
steps: list[Step] | ||
|
||
def analyze_code_changes(structured_diff_text: str) -> List[Dict]: | ||
""" | ||
Analyze code changes using OpenAI's GPT model | ||
Returns a list of review comments | ||
""" | ||
|
||
# Prepare the prompt for the LLM | ||
prompt = f""" | ||
Analyze the following code changes and provide detailed review comments. | ||
Focus on: | ||
- Code quality and best practices | ||
- Potential security vulnerabilities | ||
- Performance implications | ||
|
||
Important: | ||
- Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary. | ||
- Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code'). | ||
- If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol. | ||
- Ensure that you provide all the necessary code lines in the codeSegmentToFix field. | ||
- If there are multiple comments for the same code segment, provide the comments separated by commas. | ||
|
||
CRITICAL REQUIREMENTS: | ||
- Precisely mention the position where the comment should be placed. | ||
- The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix. | ||
- Use the file-based line numbers provided in the structured diff below. | ||
- You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment. | ||
- Never omit line numbers or the system will fail. | ||
|
||
Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" | ||
Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" | ||
|
||
Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" | ||
Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" | ||
|
||
Diff content: | ||
{structured_diff_text} | ||
""" | ||
|
||
# Get analysis from OpenAI | ||
completion = client.beta.chat.completions.parse( | ||
model="gpt-4o-2024-08-06", | ||
Check notice on line 67 in llm_utils.py
|
||
messages=[ | ||
{"role": "system", "content": "You are an experienced code reviewer."}, | ||
{"role": "user", "content": prompt} | ||
], | ||
response_format=ReviewModel, | ||
) | ||
Check warning on line 73 in llm_utils.py
|
||
|
||
# Parse and format the response | ||
response_pydantic= completion.choices[0].message.parsed | ||
|
||
review_steps = [] | ||
|
||
for step in response_pydantic.steps: | ||
|
||
value1 = step.start_line_with_prefix | ||
start_line = int(value1.replace("+", "").strip()) # Remove '+' and strip spaces | ||
Check failure on line 83 in llm_utils.py
|
||
|
||
value2 = step.end_line_with_prefix | ||
end_line = int(value2.replace("+", "").strip()) | ||
Check failure on line 86 in llm_utils.py
|
||
|
||
step_dict = { | ||
"fileName": step.fileName, | ||
"start_line": start_line, | ||
"start_line_with_prefix": step.start_line_with_prefix, | ||
"end_line": end_line, | ||
"end_line_with_prefix": step.end_line_with_prefix, | ||
"language": step.language, | ||
"codeSegmentToFix": step.codeSegmentToFix, | ||
"comment": step.comment, | ||
"suggestion": step.suggestion, | ||
"suggestedCode": step.suggestedCode, | ||
"severity": step.severity | ||
} | ||
review_steps.append(step_dict) | ||
|
||
# for review in review_steps: | ||
# print(review) | ||
# print("\n\n") | ||
|
||
return review_steps |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,10 @@ | |
from fastapi import FastAPI, Request, HTTPException, Header | ||
from dotenv import load_dotenv | ||
from github import Github, GithubIntegration | ||
import requests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: Missing request timeout could lead to unresponsive waiting periods in network requests, especially when calling external APIs. Severity: warning Suggestion: Set a global timeout parameter for requests to avoid indefinite hangs and improve fault tolerance. |
||
|
||
from llm_utils import analyze_code_changes | ||
from github_utils import create_check_run, update_check_run, parse_diff_file_line_numbers, build_review_prompt_with_file_line_numbers | ||
|
||
app = FastAPI() | ||
load_dotenv() | ||
|
@@ -37,7 +41,6 @@ | |
payload = await request.body() | ||
verify_signature(payload, x_hub_signature) | ||
payload_dict = json.loads(payload) | ||
#print("Payload:", payload_dict) | ||
|
||
if "repository" in payload_dict: | ||
owner = payload_dict["repository"]["owner"]["login"] | ||
|
@@ -47,8 +50,85 @@ | |
# Check if it's a pull_request event with action 'opened' | ||
if payload_dict.get("pull_request") and payload_dict.get("action") == "opened": | ||
pr_number = payload_dict["pull_request"]["number"] | ||
issue = repo.get_issue(number=pr_number) | ||
issue.create_comment( | ||
"Thanks for opening a new PR! Please follow our contributing guidelines to make your PR easier to review." | ||
) | ||
head_sha = payload_dict['pull_request']['head']['sha'] | ||
|
||
check_run = None # Initialize outside try block | ||
|
||
try: | ||
# Create initial check run | ||
check_run = create_check_run(repo, head_sha) | ||
|
||
#newly added to get pull request diff | ||
pull_request = repo.get_pull(pr_number) | ||
diff_url = pull_request.diff_url | ||
response = requests.get(diff_url) | ||
Check failure on line 64 in main.py
|
||
|
||
# Parse the diff to extract actual file line numbers. | ||
parsed_files = parse_diff_file_line_numbers(response.text) | ||
|
||
# Build a structured diff text for the prompt. | ||
structured_diff_text = build_review_prompt_with_file_line_numbers(parsed_files) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: Potential security risk if 'parsed_files' contains unverified data before being processed or logged. Severity: warning Suggestion: Ensure that 'parsed_files' data is sanitized and safe prior to further manipulation or logging. |
||
|
||
print("Before llm call...") | ||
|
||
issue = repo.get_issue(number=pr_number) | ||
issue.create_comment( | ||
"Hi, I am a code reviewer bot. I will analyze the PR and provide detailed review comments." | ||
) | ||
|
||
# Analyze code changes (your existing function) | ||
review_list = analyze_code_changes(structured_diff_text) | ||
Check failure on line 80 in main.py
|
||
|
||
# Update check run with results | ||
update_check_run( | ||
check_run=check_run, | ||
results=review_list | ||
) | ||
|
||
# Post each review item as a comment on the PR | ||
for review in review_list: | ||
|
||
print(review) | ||
|
||
prog_lang = review.get('language', '') # Default to an empty string if 'language' is missing | ||
comment_body = ( | ||
f"**File:** `{review['fileName']}`\n\n" | ||
f"Comments on lines `{review['start_line_with_prefix']}` to `{review['end_line_with_prefix']}`\n" | ||
f"```diff\n{review['codeSegmentToFix']}\n```\n" | ||
f"**Issue:** {review['comment']}\n\n" | ||
f"**Severity:** {review['severity']}\n\n" | ||
f"**Suggestion:** {review['suggestion']}\n" | ||
) | ||
|
||
# If suggestedCode exists, add it to the comment | ||
if review.get("suggestedCode"): | ||
comment_body += f"```{prog_lang}\n{review['suggestedCode']}\n```" | ||
|
||
try: | ||
res = issue.create_comment(comment_body) | ||
if not res: | ||
print("Failed to create comment on the issue.") | ||
Check warning on line 110 in main.py
|
||
except Exception as e: | ||
print(f"Error when commenting on issue: {e}") | ||
|
||
except Exception as e: | ||
# Only update check run if it was successfully created | ||
if check_run is not None: | ||
check_run.edit( | ||
status="completed", | ||
conclusion="failure", | ||
output={ | ||
"title": "Analysis Failed", | ||
"summary": f"Error: {str(e)}" | ||
} | ||
) | ||
else: | ||
# Fallback error handling | ||
print(f"Critical failure before check run creation: {str(e)}") | ||
|
||
raise | ||
|
||
print("After llm call...") | ||
|
||
|
||
return {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
unidiff | ||
PyGithub | ||
openai | ||
pydantic |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Issue: The field 'text' in 'output' dictionary is not defined in GitHub API's check run output format.
Severity: error
Suggestion: Remove the 'text' field from the 'output' dictionary to comply with GitHub API standards.