nithilanimraka · nithilanimraka · Mar 15, 2025 · Mar 15, 2025 · Mar 15, 2025 · Mar 15, 2025
diff --git a/authenticate_github.py b/authenticate_github.py
@@ -0,0 +1,40 @@
+import os
+import hmac
+import hashlib
+from fastapi import HTTPException
+from github import Github, GithubIntegration
+
+
+APP_ID = os.environ.get("APP_ID")
+if not APP_ID:
+    raise ValueError("APP_ID not set")
+
+WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
+if not WEBHOOK_SECRET:
+    raise ValueError("WEBHOOK_SECRET not set")
+
+PRIVATE_KEY_PATH = os.environ.get("PRIVATE_KEY_PATH")
+if not PRIVATE_KEY_PATH:
+    raise ValueError("PRIVATE_KEY_PATH not set")
+
+try:
+    with open(PRIVATE_KEY_PATH) as fin:
+        private_key = fin.read()
+except FileNotFoundError:
+    raise FileNotFoundError("Private key file not found. Ensure PRIVATE_KEY_PATH is correctly set.")
+
+github_integration = GithubIntegration(APP_ID, private_key)
+
+def generate_hash_signature(secret: bytes, payload: bytes, digest_method=hashlib.sha1):
+    return hmac.new(secret, payload, digest_method).hexdigest()
+
+def verify_signature(payload: bytes, x_hub_signature: str):
+    secret = WEBHOOK_SECRET.encode("utf-8")
+    expected_signature = f"sha1={generate_hash_signature(secret, payload)}"
+    if not hmac.compare_digest(expected_signature, x_hub_signature):
+        raise HTTPException(status_code=401, detail="Invalid webhook signature")
+
+def connect_repo(owner: str, repo_name: str):
+    installation_id = github_integration.get_installation(owner, repo_name).id
+    access_token = github_integration.get_access_token(installation_id).token
+    return Github(login_or_token=access_token).get_repo(f"{owner}/{repo_name}")
diff --git a/github_utils.py b/github_utils.py
@@ -0,0 +1,138 @@
+from github import Github
+from unidiff import PatchSet
+from typing import List, Dict
+
+def create_check_run(repo, sha):
+    """Create a check run using the modern PyGithub API"""
+    return repo.create_check_run(
+        name="AI Code Review",
+        head_sha=sha,
+        status="queued",  # Initial status should be 'queued'
+        output={
+            "title": "Analyzing Changes",
+            "summary": "🔍 Scanning code changes with AI...",
+            "text": "This may take 20-30 seconds"
+        }
+    )
+
+def update_check_run(check_run, results):
+    """Update check run with proper status transitions"""
+    # First update to in_progress
+    check_run.edit(
+        status="in_progress",
+        output={
+            "title": "Processing...",
+            "summary": "Analyzing code patterns"
+        }
+    )
+
+    # Then update with final results
+    annotations = []
+    for result in results:
+        # Extract line numbers from your analysis results
+        annotation = {
+            "path": result['fileName'],
+            "start_line": result['start_line'],  # REQUIRED
+            "end_line": result['end_line'],      # REQUIRED
+            "annotation_level": map_severity(result['severity']),
+            "message": result['comment'],
+            "raw_details": f"Suggestion: {result['suggestion']}\n\n{result.get('suggestedCode', '')}"
+        }
+
+        annotations.append(annotation)
+
+    check_run.edit(
+        status="completed",
+        # conclusion="success" if len(annotations) == 0 else "action_required",
+        conclusion="success",
+
+        output={
+            "title": f"Found {len(annotations)} items",
+            "summary": "AI Code Review Results",
+            "annotations": annotations[:50]  # GitHub limits to 50 annotations per update
+        }
+    )
+
+def map_severity(level: str) -> str:
+    """Map custom severity levels to GitHub annotation levels"""
+    return {
+        "error": "failure",
+        "warning": "warning",
+        "info": "notice"
+    }.get(level.lower(), "notice")
+
+
+def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]:
+    """
+    Parse a unified diff string and return a structured list of changes using
+    actual file line numbers.
+
+    Returns a list of dicts, each representing a file change:
+    {
+      "file_name": str,
+      "changes": [
+          {
+              "type": "added" | "removed" | "context",
+              "line_number": int,  # For added or context lines, this is target_line_no.
+                                   # For removed lines, use source_line_no.
+              "content": str
+          },
+          ...
+      ]
+    }
+    """
+    patch = PatchSet(diff_content)
+    parsed_files = []
+
+    for patched_file in patch:
+        file_info = {
+            "file_name": patched_file.path,
+            "changes": []
+        }
+        for hunk in patched_file:
+            for line in hunk:
+                # Decide which line number to use based on change type.
+                if line.is_added or not line.is_removed:
+                    line_num = line.target_line_no
+                else:
+                    line_num = line.source_line_no
+
+                if line_num is None:
+                    continue  # Skip lines without a valid number
+
+                # Append each changed line along with its file-based line number.
+                file_info["changes"].append({
+                    "type": "added" if line.is_added else "removed" if line.is_removed else "context",
+                    "line_number": line_num,
+                    "content": line.value.rstrip("\n")
+                })
+        parsed_files.append(file_info)
+
+    return parsed_files
+
+
+def build_review_prompt_with_file_line_numbers(parsed_files: List[Dict]) -> str:
+    """
+    Create a prompt that includes the diff using actual file line numbers.
+    """
+    prompt_lines = []
+
+    for file_data in parsed_files:
+        file_name = file_data["file_name"]
+        prompt_lines.append(f"File: {file_name}\n")
+        prompt_lines.append("Changed lines:")
+
+        for change in file_data["changes"]:
+            # Mark added lines with +, removed with -, context with a space
+            sign = (
+                "+" if change["type"] == "added" else
+                "-" if change["type"] == "removed" else
+                " "
+            )
+            prompt_lines.append(
+                f"[Line {change['line_number']}] {sign} {change['content']}"
+            )
+        prompt_lines.append("\n")
+
+    return "\n".join(prompt_lines)
+
diff --git a/llm_utils.py b/llm_utils.py
@@ -0,0 +1,109 @@
+import os
+from openai import OpenAI
+from typing import List, Dict, Optional
+from pydantic import BaseModel, Field
+
+
+api_key = os.environ.get('OPENAI_API_KEY')
+if not api_key:
+    raise ValueError("OPENAI_API_KEY is not set")
+
+client = OpenAI(api_key=api_key)
+
+class ReviewModel(BaseModel):
+    class Step(BaseModel):
+        fileName: str = Field(description="The name of the file that has an issue")
+        start_line_with_prefix: str = Field(description="The starting line number in the file (REQUIRED). \
+                                            If the start_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
+        end_line_with_prefix: str = Field(description="The ending line number in the file (REQUIRED). \
+                                          If the end_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
+        language: str = Field(description="The language of the code segment")
+        codeSegmentToFix: str = Field(description="The code segment that needs to be fixed from code diff in diff style('+' for added, '-' for removed, or nothing for normal code)")
+        comment: str = Field(description="The comment on the code segment")
+        suggestion: str = Field(description="The suggestion to fix the code segment")
+        suggestedCode: Optional[str] = Field(None, description="The updated code segment for the fix")
+        severity: str = Field(description="The severity of the issue. Can be 'error', 'warning', or 'info'")
+
+    steps: list[Step]
+
+def analyze_code_changes(structured_diff_text: str) -> List[Dict]:
+    """
+    Analyze code changes using OpenAI's GPT model
+    Returns a list of review comments
+    """
+
+    # Prepare the prompt for the LLM
+    prompt = f"""
+    Analyze the following code changes and provide detailed review comments.
+    Focus on:
+    - Code quality and best practices
+    - Potential security vulnerabilities
+    - Performance implications
+
+    Important:
+    - Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary.
+    - Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code').
+    - If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol.
+    - Ensure that you provide all the necessary code lines in the codeSegmentToFix field.
+    - If there are multiple comments for the same code segment, provide the comments separated by commas.
+
+    CRITICAL REQUIREMENTS:
+    - Precisely mention the position where the comment should be placed.
+    - The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix.
+    - Use the file-based line numbers provided in the structured diff below.
+    - You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment.
+    - Never omit line numbers or the system will fail.
+
+    Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" 
+    Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" 
+
+    Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" 
+    Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" 
+
+    Diff content:
+    {structured_diff_text}
+    """
+
+    print("Before API CALL...")
+
+    # Get analysis from OpenAI
+    completion = client.beta.chat.completions.parse(
+        model="gpt-4o-2024-08-06",
+        messages=[
+            {"role": "system", "content": "You are an experienced code reviewer."},
+            {"role": "user", "content": prompt}
+        ],
+        response_format=ReviewModel,
+    )
+
+    print("After API CALL...")
+
+    # Parse and format the response
+    response_pydantic= completion.choices[0].message.parsed
+
+    review_steps = []
+
+    for step in response_pydantic.steps:
+
+        value1 = step.start_line_with_prefix
+        start_line = int(value1.replace("+", "").strip())  # Remove '+' and strip spaces
+
+        value2 = step.end_line_with_prefix
+        end_line = int(value2.replace("+", "").strip()) 
+
+        step_dict = {
+            "fileName": step.fileName,
+            "start_line": start_line, 
+            "start_line_with_prefix": step.start_line_with_prefix, 
+            "end_line": end_line,      
+            "end_line_with_prefix": step.end_line_with_prefix, 
+            "language": step.language,
+            "codeSegmentToFix": step.codeSegmentToFix,
+            "comment": step.comment,
+            "suggestion": step.suggestion,
+            "suggestedCode": step.suggestedCode,
+            "severity": step.severity
+        }
+        review_steps.append(step_dict)
+
+    return review_steps