diff --git a/authenticate_github.py b/authenticate_github.py new file mode 100644 index 0000000..cf07a78 --- /dev/null +++ b/authenticate_github.py @@ -0,0 +1,40 @@ +import os +import hmac +import hashlib +from fastapi import HTTPException +from github import Github, GithubIntegration + + +APP_ID = os.environ.get("APP_ID") +if not APP_ID: + raise ValueError("APP_ID not set") + +WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET") +if not WEBHOOK_SECRET: + raise ValueError("WEBHOOK_SECRET not set") + +PRIVATE_KEY_PATH = os.environ.get("PRIVATE_KEY_PATH") +if not PRIVATE_KEY_PATH: + raise ValueError("PRIVATE_KEY_PATH not set") + +try: + with open(PRIVATE_KEY_PATH) as fin: + private_key = fin.read() +except FileNotFoundError: + raise FileNotFoundError("Private key file not found. Ensure PRIVATE_KEY_PATH is correctly set.") + +github_integration = GithubIntegration(APP_ID, private_key) + +def generate_hash_signature(secret: bytes, payload: bytes, digest_method=hashlib.sha1): + return hmac.new(secret, payload, digest_method).hexdigest() + +def verify_signature(payload: bytes, x_hub_signature: str): + secret = WEBHOOK_SECRET.encode("utf-8") + expected_signature = f"sha1={generate_hash_signature(secret, payload)}" + if not hmac.compare_digest(expected_signature, x_hub_signature): + raise HTTPException(status_code=401, detail="Invalid webhook signature") + +def connect_repo(owner: str, repo_name: str): + installation_id = github_integration.get_installation(owner, repo_name).id + access_token = github_integration.get_access_token(installation_id).token + return Github(login_or_token=access_token).get_repo(f"{owner}/{repo_name}") \ No newline at end of file diff --git a/github_utils.py b/github_utils.py new file mode 100644 index 0000000..a33d730 --- /dev/null +++ b/github_utils.py @@ -0,0 +1,138 @@ +from github import Github +from unidiff import PatchSet +from typing import List, Dict + +def create_check_run(repo, sha): + """Create a check run using the modern PyGithub API""" + return repo.create_check_run( + name="AI Code Review", + head_sha=sha, + status="queued", # Initial status should be 'queued' + output={ + "title": "Analyzing Changes", + "summary": "🔍 Scanning code changes with AI...", + "text": "This may take 20-30 seconds" + } + ) + +def update_check_run(check_run, results): + """Update check run with proper status transitions""" + # First update to in_progress + check_run.edit( + status="in_progress", + output={ + "title": "Processing...", + "summary": "Analyzing code patterns" + } + ) + + # Then update with final results + annotations = [] + for result in results: + # Extract line numbers from your analysis results + annotation = { + "path": result['fileName'], + "start_line": result['start_line'], # REQUIRED + "end_line": result['end_line'], # REQUIRED + "annotation_level": map_severity(result['severity']), + "message": result['comment'], + "raw_details": f"Suggestion: {result['suggestion']}\n\n{result.get('suggestedCode', '')}" + } + + annotations.append(annotation) + + check_run.edit( + status="completed", + # conclusion="success" if len(annotations) == 0 else "action_required", + conclusion="success", + + output={ + "title": f"Found {len(annotations)} items", + "summary": "AI Code Review Results", + "annotations": annotations[:50] # GitHub limits to 50 annotations per update + } + ) + +def map_severity(level: str) -> str: + """Map custom severity levels to GitHub annotation levels""" + return { + "error": "failure", + "warning": "warning", + "info": "notice" + }.get(level.lower(), "notice") + + +def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]: + """ + Parse a unified diff string and return a structured list of changes using + actual file line numbers. + + Returns a list of dicts, each representing a file change: + { + "file_name": str, + "changes": [ + { + "type": "added" | "removed" | "context", + "line_number": int, # For added or context lines, this is target_line_no. + # For removed lines, use source_line_no. + "content": str + }, + ... + ] + } + """ + patch = PatchSet(diff_content) + parsed_files = [] + + for patched_file in patch: + file_info = { + "file_name": patched_file.path, + "changes": [] + } + for hunk in patched_file: + for line in hunk: + # Decide which line number to use based on change type. + if line.is_added or not line.is_removed: + line_num = line.target_line_no + else: + line_num = line.source_line_no + + if line_num is None: + continue # Skip lines without a valid number + + # Append each changed line along with its file-based line number. + file_info["changes"].append({ + "type": "added" if line.is_added else "removed" if line.is_removed else "context", + "line_number": line_num, + "content": line.value.rstrip("\n") + }) + parsed_files.append(file_info) + + return parsed_files + + +def build_review_prompt_with_file_line_numbers(parsed_files: List[Dict]) -> str: + """ + Create a prompt that includes the diff using actual file line numbers. + """ + prompt_lines = [] + + for file_data in parsed_files: + file_name = file_data["file_name"] + prompt_lines.append(f"File: {file_name}\n") + prompt_lines.append("Changed lines:") + + for change in file_data["changes"]: + # Mark added lines with +, removed with -, context with a space + sign = ( + "+" if change["type"] == "added" else + "-" if change["type"] == "removed" else + " " + ) + prompt_lines.append( + f"[Line {change['line_number']}] {sign} {change['content']}" + ) + prompt_lines.append("\n") + + return "\n".join(prompt_lines) + diff --git a/llm_utils.py b/llm_utils.py new file mode 100644 index 0000000..4288294 --- /dev/null +++ b/llm_utils.py @@ -0,0 +1,109 @@ +import os +from openai import OpenAI +from typing import List, Dict, Optional +from pydantic import BaseModel, Field + + +api_key = os.environ.get('OPENAI_API_KEY') +if not api_key: + raise ValueError("OPENAI_API_KEY is not set") + +client = OpenAI(api_key=api_key) + +class ReviewModel(BaseModel): + class Step(BaseModel): + fileName: str = Field(description="The name of the file that has an issue") + start_line_with_prefix: str = Field(description="The starting line number in the file (REQUIRED). \ + If the start_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix") + end_line_with_prefix: str = Field(description="The ending line number in the file (REQUIRED). \ + If the end_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix") + language: str = Field(description="The language of the code segment") + codeSegmentToFix: str = Field(description="The code segment that needs to be fixed from code diff in diff style('+' for added, '-' for removed, or nothing for normal code)") + comment: str = Field(description="The comment on the code segment") + suggestion: str = Field(description="The suggestion to fix the code segment") + suggestedCode: Optional[str] = Field(None, description="The updated code segment for the fix") + severity: str = Field(description="The severity of the issue. Can be 'error', 'warning', or 'info'") + + steps: list[Step] + +def analyze_code_changes(structured_diff_text: str) -> List[Dict]: + """ + Analyze code changes using OpenAI's GPT model + Returns a list of review comments + """ + + # Prepare the prompt for the LLM + prompt = f""" + Analyze the following code changes and provide detailed review comments. + Focus on: + - Code quality and best practices + - Potential security vulnerabilities + - Performance implications + + Important: + - Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary. + - Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code'). + - If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol. + - Ensure that you provide all the necessary code lines in the codeSegmentToFix field. + - If there are multiple comments for the same code segment, provide the comments separated by commas. + + CRITICAL REQUIREMENTS: + - Precisely mention the position where the comment should be placed. + - The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix. + - Use the file-based line numbers provided in the structured diff below. + - You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment. + - Never omit line numbers or the system will fail. + + Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" + Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" + + Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" + Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" + + Diff content: + {structured_diff_text} + """ + + print("Before API CALL...") + + # Get analysis from OpenAI + completion = client.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": "You are an experienced code reviewer."}, + {"role": "user", "content": prompt} + ], + response_format=ReviewModel, + ) + + print("After API CALL...") + + # Parse and format the response + response_pydantic= completion.choices[0].message.parsed + + review_steps = [] + + for step in response_pydantic.steps: + + value1 = step.start_line_with_prefix + start_line = int(value1.replace("+", "").strip()) # Remove '+' and strip spaces + + value2 = step.end_line_with_prefix + end_line = int(value2.replace("+", "").strip()) + + step_dict = { + "fileName": step.fileName, + "start_line": start_line, + "start_line_with_prefix": step.start_line_with_prefix, + "end_line": end_line, + "end_line_with_prefix": step.end_line_with_prefix, + "language": step.language, + "codeSegmentToFix": step.codeSegmentToFix, + "comment": step.comment, + "suggestion": step.suggestion, + "suggestedCode": step.suggestedCode, + "severity": step.severity + } + review_steps.append(step_dict) + + return review_steps diff --git a/main.py b/main.py index 933cf77..d2219b7 100644 --- a/main.py +++ b/main.py @@ -1,43 +1,20 @@ -import os -import hmac -import hashlib import json -from fastapi import FastAPI, Request, HTTPException, Header +from fastapi import FastAPI, Request, Header from dotenv import load_dotenv -from github import Github, GithubIntegration +import requests + +from llm_utils import analyze_code_changes +from github_utils import create_check_run, update_check_run, parse_diff_file_line_numbers, build_review_prompt_with_file_line_numbers +from authenticate_github import verify_signature, connect_repo app = FastAPI() load_dotenv() -APP_ID = os.environ.get("APP_ID") -WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET") -PRIVATE_KEY_PATH = os.environ.get("PRIVATE_KEY_PATH") - -with open(PRIVATE_KEY_PATH) as fin: - private_key = fin.read() - -github_integration = GithubIntegration(APP_ID, private_key) - -def generate_hash_signature(secret: bytes, payload: bytes, digest_method=hashlib.sha1): - return hmac.new(secret, payload, digest_method).hexdigest() - -def verify_signature(payload: bytes, x_hub_signature: str): - secret = WEBHOOK_SECRET.encode("utf-8") - expected_signature = f"sha1={generate_hash_signature(secret, payload)}" - if not hmac.compare_digest(expected_signature, x_hub_signature): - raise HTTPException(status_code=401, detail="Invalid webhook signature") - -def connect_repo(owner: str, repo_name: str): - installation_id = github_integration.get_installation(owner, repo_name).id - access_token = github_integration.get_access_token(installation_id).token - return Github(login_or_token=access_token).get_repo(f"{owner}/{repo_name}") - @app.post("/webhook") async def webhook(request: Request, x_hub_signature: str = Header(None)): payload = await request.body() verify_signature(payload, x_hub_signature) payload_dict = json.loads(payload) - #print("Payload:", payload_dict) if "repository" in payload_dict: owner = payload_dict["repository"]["owner"]["login"] @@ -47,8 +24,124 @@ async def webhook(request: Request, x_hub_signature: str = Header(None)): # Check if it's a pull_request event with action 'opened' if payload_dict.get("pull_request") and payload_dict.get("action") == "opened": pr_number = payload_dict["pull_request"]["number"] - issue = repo.get_issue(number=pr_number) - issue.create_comment( - "Thanks for opening a new PR! Please follow our contributing guidelines to make your PR easier to review." - ) + head_sha = payload_dict['pull_request']['head']['sha'] + print(head_sha) + + check_run = None # Initialize outside try block + + try: + # Create initial check run + check_run = create_check_run(repo, head_sha) + + #newly added to get pull request diff + pull_request = repo.get_pull(pr_number) + diff_url = pull_request.diff_url + response = requests.get(diff_url) + + # Parse the diff to extract actual file line numbers. + parsed_files = parse_diff_file_line_numbers(response.text) + + # Build a structured diff text for the prompt. + structured_diff_text = build_review_prompt_with_file_line_numbers(parsed_files) + print(structured_diff_text) + + print("Before llm call...") + + issue = repo.get_issue(number=pr_number) + issue.create_comment( + "Hi, I am a code reviewer bot. I will analyze the PR and provide detailed review comments." + ) + + # Analyze code changes (your existing function) + review_list = analyze_code_changes(structured_diff_text) + + print("After llm call ...") + + # Update check run with results + update_check_run( + check_run=check_run, + results=review_list + ) + + # Post each review item as a comment on the PR + for review in review_list: + print("\n") + print(review) + + + prog_lang = review.get('language', '') # Default to an empty string if 'language' is missing + comment_body = ( + f"**Issue:** {review['comment']}\n\n" + f"**Severity:** {review['severity']}\n\n" + f"**Suggestion:** {review['suggestion']}\n" + ) + + # If suggestedCode exists, add it to the comment + if review.get("suggestedCode"): + comment_body += f"```{prog_lang}\n{review['suggestedCode']}\n```" + + #Check whether the start_line and end_line are from new file or old file + if(review['start_line_with_prefix'][0]=='-'): + var_startSide = "LEFT" + else: + var_startSide = "RIGHT" + + if(review['end_line_with_prefix'][0]=='-'): + var_side = "LEFT" + else: + var_side = "RIGHT" + + if(review['start_line'] != review['end_line']): + try: + pull_request.create_review_comment( + body=comment_body, + commit=repo.get_commit(head_sha), + path=review['fileName'], + start_line=review['start_line'], #line number of the starting line of the code block + line=review['end_line'], #line number of the ending line of the code block + start_side=var_startSide, #side of the starting line of the code block + side=var_side, # side of the ending line of the code block + ) + except Exception as e: + print(f"Failed to post comments: {str(e)}") + if hasattr(e, 'data'): + print("Error details:", json.dumps(e.data, indent=2)) + else: + print("No valid comments to post") + + else: + try: + pull_request.create_review_comment( + body=comment_body, + commit=repo.get_commit(head_sha), + path=review['fileName'], + line=review['end_line'], + side=var_side, + ) + except Exception as e: + print(f"Failed to post comments: {str(e)}") + if hasattr(e, 'data'): + print("Error details:", json.dumps(e.data, indent=2)) + else: + print("No valid comments to post") + + + except Exception as e: + # Only update check run if it was successfully created + if check_run is not None: + check_run.edit( + status="completed", + conclusion="failure", + output={ + "title": "Analysis Failed", + "summary": f"Error: {str(e)}" + } + ) + else: + # Fallback error handling + print(f"Critical failure before check run creation: {str(e)}") + + raise + + return {} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f4d8fb0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +unidiff==0.7.5 +PyGithub==2.6.1 +openai==1.66.3 +pydantic==2.10.6 +requests==2.32.3 +python-dotenv==1.0.1 +fastapi==0.115.11 +uvicorn==0.34.0 +github==1.2.7 +githubkit \ No newline at end of file