nithilanimraka · nithilanimraka · Mar 15, 2025 · Mar 15, 2025 · Mar 15, 2025 · Mar 15, 2025
diff --git a/github_utils.py b/github_utils.py
@@ -0,0 +1,138 @@
+from github import Github
+from unidiff import PatchSet
+from typing import List, Dict
+
+def create_check_run(repo, sha):
+    """Create a check run using the modern PyGithub API"""
+    return repo.create_check_run(
+        name="AI Code Review",
+        head_sha=sha,
+        status="queued",  # Initial status should be 'queued'
+        output={
+            "title": "Analyzing Changes",
+            "summary": "🔍 Scanning code changes with AI...",
+            "text": "This may take 20-30 seconds"
+        }
+    )
+
+def update_check_run(check_run, results):
+    """Update check run with proper status transitions"""
+    # First update to in_progress
+    check_run.edit(
+        status="in_progress",
+        output={
+            "title": "Processing...",
+            "summary": "Analyzing code patterns"
+        }
+    )
+
+    # Then update with final results
+    annotations = []
+    for result in results:
+        # Extract line numbers from your analysis results
+        annotation = {
+            "path": result['fileName'],
+            "start_line": result['start_line'],  # REQUIRED
+            "end_line": result['end_line'],      # REQUIRED
+            "annotation_level": map_severity(result['severity']),
+            "message": result['comment'],
+            "raw_details": f"Suggestion: {result['suggestion']}\n\n{result.get('suggestedCode', '')}"
+        }
+
+        annotations.append(annotation)
+
+    check_run.edit(
+        status="completed",
+        # conclusion="success" if len(annotations) == 0 else "action_required",
+        conclusion="success",
+
+        output={
+            "title": f"Found {len(annotations)} items",
+            "summary": "AI Code Review Results",
+            "annotations": annotations[:50]  # GitHub limits to 50 annotations per update
+        }
+    )
+
+def map_severity(level: str) -> str:
+    """Map custom severity levels to GitHub annotation levels"""
+    return {
+        "error": "failure",
+        "warning": "warning",
+        "info": "notice"
+    }.get(level.lower(), "notice")
+
+
+def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]:
+    """
+    Parse a unified diff string and return a structured list of changes using
+    actual file line numbers.
+
+    Returns a list of dicts, each representing a file change:
+    {
+      "file_name": str,
+      "changes": [
+          {
+              "type": "added" | "removed" | "context",
+              "line_number": int,  # For added or context lines, this is target_line_no.
+                                   # For removed lines, use source_line_no.
+              "content": str
+          },
+          ...
+      ]
+    }
+    """
+    patch = PatchSet(diff_content)
+    parsed_files = []
+
+    for patched_file in patch:
+        file_info = {
+            "file_name": patched_file.path,
+            "changes": []
+        }
+        for hunk in patched_file:
+            for line in hunk:
+                # Decide which line number to use based on change type.
+                if line.is_added or not line.is_removed:
+                    line_num = line.target_line_no
+                else:
+                    line_num = line.source_line_no
+
+                if line_num is None:
+                    continue  # Skip lines without a valid number
+
+                # Append each changed line along with its file-based line number.
+                file_info["changes"].append({
+                    "type": "added" if line.is_added else "removed" if line.is_removed else "context",
+                    "line_number": line_num,
+                    "content": line.value.rstrip("\n")
+                })
+        parsed_files.append(file_info)
+
+    return parsed_files
+
+
+def build_review_prompt_with_file_line_numbers(parsed_files: List[Dict]) -> str:
+    """
+    Create a prompt that includes the diff using actual file line numbers.
+    """
+    prompt_lines = []
+
+    for file_data in parsed_files:
+        file_name = file_data["file_name"]
+        prompt_lines.append(f"File: {file_name}\n")
+        prompt_lines.append("Changed lines:")
+
+        for change in file_data["changes"]:
+            # Mark added lines with +, removed with -, context with a space
+            sign = (
+                "+" if change["type"] == "added" else
+                "-" if change["type"] == "removed" else
+                " "
+            )
+            prompt_lines.append(
+                f"[Line {change['line_number']}] {sign} {change['content']}"
+            )
+        prompt_lines.append("\n")
+
+    return "\n".join(prompt_lines)
+
diff --git a/llm_utils.py b/llm_utils.py
@@ -0,0 +1,107 @@
+import os
+from openai import OpenAI
+from typing import List, Dict, Optional
+from pydantic import BaseModel, Field
+
+
+api_key = os.environ.get('OPENAI_API_KEY')
+
+client = OpenAI(api_key=api_key)
+
+class ReviewModel(BaseModel):
+    class Step(BaseModel):
+        fileName: str = Field(description="The name of the file that has an issue")
+        start_line_with_prefix: str = Field(description="The starting line number in the file (REQUIRED). \
+                                            If the start_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
+        end_line_with_prefix: str = Field(description="The ending line number in the file (REQUIRED). \
+                                          If the end_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
+        language: str = Field(description="The language of the code segment")
+        codeSegmentToFix: str = Field(description="The code segment that needs to be fixed from code diff in diff style('+' for added, '-' for removed, or nothing for normal code)")
+        comment: str = Field(description="The comment on the code segment")
+        suggestion: str = Field(description="The suggestion to fix the code segment")
+        suggestedCode: Optional[str] = Field(None, description="The updated code segment for the fix")
+        severity: str = Field(description="The severity of the issue. Can be 'error', 'warning', or 'info'")
+
+    steps: list[Step]
+
+def analyze_code_changes(structured_diff_text: str) -> List[Dict]:
+    """
+    Analyze code changes using OpenAI's GPT model
+    Returns a list of review comments
+    """
+
+    # Prepare the prompt for the LLM
+    prompt = f"""
+    Analyze the following code changes and provide detailed review comments.
+    Focus on:
+    - Code quality and best practices
+    - Potential security vulnerabilities
+    - Performance implications
+
+    Important:
+    - Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary.
+    - Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code').
+    - If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol.
+    - Ensure that you provide all the necessary code lines in the codeSegmentToFix field.
+    - If there are multiple comments for the same code segment, provide the comments separated by commas.
+
+    CRITICAL REQUIREMENTS:
+    - Precisely mention the position where the comment should be placed.
+    - The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix.
+    - Use the file-based line numbers provided in the structured diff below.
+    - You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment.
+    - Never omit line numbers or the system will fail.
+
+    Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" 
+    Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" 
+
+    Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" 
+    Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" 
+
+    Diff content:
+    {structured_diff_text}
+    """
+
+    # Get analysis from OpenAI
+    completion = client.beta.chat.completions.parse(
+        model="gpt-4o-2024-08-06",
+        messages=[
+            {"role": "system", "content": "You are an experienced code reviewer."},
+            {"role": "user", "content": prompt}
+        ],
+        response_format=ReviewModel,
+    )
+
+    # Parse and format the response
+    response_pydantic= completion.choices[0].message.parsed
+
+    review_steps = []
+
+    for step in response_pydantic.steps:
+
+        value1 = step.start_line_with_prefix
+        start_line = int(value1.replace("+", "").strip())  # Remove '+' and strip spaces
+
+        value2 = step.end_line_with_prefix
+        end_line = int(value2.replace("+", "").strip()) 
+
+        step_dict = {
+            "fileName": step.fileName,
+            "start_line": start_line, 
+            "start_line_with_prefix": step.start_line_with_prefix, 
+            "end_line": end_line,      
+            "end_line_with_prefix": step.end_line_with_prefix, 
+            "language": step.language,
+            "codeSegmentToFix": step.codeSegmentToFix,
+            "comment": step.comment,
+            "suggestion": step.suggestion,
+            "suggestedCode": step.suggestedCode,
+            "severity": step.severity
+        }
+        review_steps.append(step_dict)
+
+    # for review in review_steps:
+    #     print(review)
+    #     print("\n\n")
+
+    return review_steps 
diff --git a/main.py b/main.py
@@ -5,6 +5,10 @@
 from fastapi import FastAPI, Request, HTTPException, Header
 from dotenv import load_dotenv
 from github import Github, GithubIntegration
+import requests
+
+from llm_utils import analyze_code_changes
+from github_utils import create_check_run, update_check_run, parse_diff_file_line_numbers, build_review_prompt_with_file_line_numbers
 
 app = FastAPI()
 load_dotenv()
@@ -37,7 +41,6 @@
     payload = await request.body()
     verify_signature(payload, x_hub_signature)
     payload_dict = json.loads(payload)
-    #print("Payload:", payload_dict)
 
     if "repository" in payload_dict:
         owner = payload_dict["repository"]["owner"]["login"]
@@ -47,8 +50,85 @@
         # Check if it's a pull_request event with action 'opened'
         if payload_dict.get("pull_request") and payload_dict.get("action") == "opened":
             pr_number = payload_dict["pull_request"]["number"]
-            issue = repo.get_issue(number=pr_number)
-            issue.create_comment(
-                "Thanks for opening a new PR! Please follow our contributing guidelines to make your PR easier to review."
-            )
+            head_sha = payload_dict['pull_request']['head']['sha']
+
+            check_run = None  # Initialize outside try block
+
+            try:
+                # Create initial check run
+                check_run = create_check_run(repo, head_sha)
+
+                #newly added to get pull request diff
+                pull_request = repo.get_pull(pr_number)
+                diff_url = pull_request.diff_url
+                response = requests.get(diff_url)
+
+                # Parse the diff to extract actual file line numbers.
+                parsed_files = parse_diff_file_line_numbers(response.text)
+
+                # Build a structured diff text for the prompt.
+                structured_diff_text = build_review_prompt_with_file_line_numbers(parsed_files)
+
+                print("Before llm call...")
+
+                issue = repo.get_issue(number=pr_number)
+                issue.create_comment(
+                    "Hi, I am a code reviewer bot. I will analyze the PR and provide detailed review comments."
+                )
+
+                # Analyze code changes (your existing function)
+                review_list = analyze_code_changes(structured_diff_text)
+
+                # Update check run with results
+                update_check_run(
+                    check_run=check_run,
+                    results=review_list
+                )
+
+                # Post each review item as a comment on the PR
+                for review in review_list:
+
+                    print(review)
+
+                    prog_lang = review.get('language', '')  # Default to an empty string if 'language' is missing
+                    comment_body = (
+                        f"**File:** `{review['fileName']}`\n\n"
+                        f"Comments on lines `{review['start_line_with_prefix']}` to `{review['end_line_with_prefix']}`\n"
+                        f"```diff\n{review['codeSegmentToFix']}\n```\n"
+                        f"**Issue:** {review['comment']}\n\n"
+                        f"**Severity:** {review['severity']}\n\n"
+                        f"**Suggestion:** {review['suggestion']}\n"
+                    )
+
+                    # If suggestedCode exists, add it to the comment
+                    if review.get("suggestedCode"):
+                        comment_body += f"```{prog_lang}\n{review['suggestedCode']}\n```"
+
+                    try:
+                        res = issue.create_comment(comment_body)
+                        if not res:
+                            print("Failed to create comment on the issue.")
+                    except Exception as e:
+                        print(f"Error when commenting on issue: {e}")
+
+            except Exception as e:
+                # Only update check run if it was successfully created
+                if check_run is not None:
+                    check_run.edit(
+                        status="completed",
+                        conclusion="failure",
+                        output={
+                            "title": "Analysis Failed",
+                            "summary": f"Error: {str(e)}"
+                        }
+                    )
+                else:
+                    # Fallback error handling
+                    print(f"Critical failure before check run creation: {str(e)}")
+
+                raise
+
+            print("After llm call...")
+
+
     return {}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+unidiff
+PyGithub
+openai
+pydantic