Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr advanced display #124

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions authenticate_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
import hmac
import hashlib
from fastapi import HTTPException
from github import Github, GithubIntegration


APP_ID = os.environ.get("APP_ID")
if not APP_ID:
raise ValueError("APP_ID not set")

WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
if not WEBHOOK_SECRET:
raise ValueError("WEBHOOK_SECRET not set")

Check warning on line 14 in authenticate_github.py

View check run for this annotation

pr-respond-test / AI Code Review

authenticate_github.py#L12-L14

Missing logging for critical exceptions. Add logging to capture when environment variables are missing.
Raw output
Suggestion: Add logging to capture when critical environment variables are missing.

if not WEBHOOK_SECRET:
    print("Critical: WEBHOOK_SECRET environment variable is not set.")
    raise ValueError("WEBHOOK_SECRET not set")
Comment on lines +12 to +14

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Missing logging for critical exceptions. Add logging to capture when environment variables are missing.

Severity: warning

Suggestion: Add logging to capture when critical environment variables are missing.

if not WEBHOOK_SECRET:
    print("Critical: WEBHOOK_SECRET environment variable is not set.")
    raise ValueError("WEBHOOK_SECRET not set")


PRIVATE_KEY_PATH = os.environ.get("PRIVATE_KEY_PATH")
if not PRIVATE_KEY_PATH:
raise ValueError("PRIVATE_KEY_PATH not set")

try:
with open(PRIVATE_KEY_PATH) as fin:
private_key = fin.read()
except FileNotFoundError:
raise FileNotFoundError("Private key file not found. Ensure PRIVATE_KEY_PATH is correctly set.")

github_integration = GithubIntegration(APP_ID, private_key)

def generate_hash_signature(secret: bytes, payload: bytes, digest_method=hashlib.sha1):
return hmac.new(secret, payload, digest_method).hexdigest()

def verify_signature(payload: bytes, x_hub_signature: str):
secret = WEBHOOK_SECRET.encode("utf-8")
expected_signature = f"sha1={generate_hash_signature(secret, payload)}"
if not hmac.compare_digest(expected_signature, x_hub_signature):
raise HTTPException(status_code=401, detail="Invalid webhook signature")

Check notice on line 35 in authenticate_github.py

View check run for this annotation

pr-respond-test / AI Code Review

authenticate_github.py#L28-L35

Unused functions - These functions are defined but not used within the module, potentially leading to dead code.
Raw output
Suggestion: Remove unused functions or ensure they are called appropriately within the module.

None
Comment on lines +28 to +35

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Unused functions - These functions are defined but not used within the module, potentially leading to dead code.

Severity: info

Suggestion: Remove unused functions or ensure they are called appropriately within the module.


def connect_repo(owner: str, repo_name: str):
installation_id = github_integration.get_installation(owner, repo_name).id
access_token = github_integration.get_access_token(installation_id).token
return Github(login_or_token=access_token).get_repo(f"{owner}/{repo_name}")
138 changes: 138 additions & 0 deletions github_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from github import Github
from unidiff import PatchSet
from typing import List, Dict

def create_check_run(repo, sha):
"""Create a check run using the modern PyGithub API"""
return repo.create_check_run(
name="AI Code Review",
head_sha=sha,
status="queued", # Initial status should be 'queued'
output={
"title": "Analyzing Changes",
"summary": "🔍 Scanning code changes with AI...",
"text": "This may take 20-30 seconds"
}
)

def update_check_run(check_run, results):
"""Update check run with proper status transitions"""
# First update to in_progress
check_run.edit(
status="in_progress",
output={
"title": "Processing...",
"summary": "Analyzing code patterns"
}
)

# Then update with final results
annotations = []
for result in results:
# Extract line numbers from your analysis results
annotation = {
"path": result['fileName'],
"start_line": result['start_line'], # REQUIRED
"end_line": result['end_line'], # REQUIRED
"annotation_level": map_severity(result['severity']),
"message": result['comment'],
"raw_details": f"Suggestion: {result['suggestion']}\n\n{result.get('suggestedCode', '')}"
}

annotations.append(annotation)

check_run.edit(
status="completed",
# conclusion="success" if len(annotations) == 0 else "action_required",
conclusion="success",

output={
"title": f"Found {len(annotations)} items",
"summary": "AI Code Review Results",
"annotations": annotations[:50] # GitHub limits to 50 annotations per update
}
)

def map_severity(level: str) -> str:
"""Map custom severity levels to GitHub annotation levels"""
return {
"error": "failure",
"warning": "warning",
"info": "notice"
}.get(level.lower(), "notice")


def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]:
"""
Parse a unified diff string and return a structured list of changes using
actual file line numbers.

Returns a list of dicts, each representing a file change:
{
"file_name": str,
"changes": [
{
"type": "added" | "removed" | "context",
"line_number": int, # For added or context lines, this is target_line_no.
# For removed lines, use source_line_no.
"content": str
},
...
]
}
"""
patch = PatchSet(diff_content)
parsed_files = []

for patched_file in patch:
file_info = {
"file_name": patched_file.path,
"changes": []
}
for hunk in patched_file:
for line in hunk:
# Decide which line number to use based on change type.
if line.is_added or not line.is_removed:
line_num = line.target_line_no
else:
line_num = line.source_line_no

if line_num is None:
continue # Skip lines without a valid number

# Append each changed line along with its file-based line number.
file_info["changes"].append({
"type": "added" if line.is_added else "removed" if line.is_removed else "context",
"line_number": line_num,
"content": line.value.rstrip("\n")
})
parsed_files.append(file_info)

return parsed_files

Check failure on line 112 in github_utils.py

View check run for this annotation

pr-respond-test / AI Code Review

github_utils.py#L65-L112

Missing error handling for diff parsing. Add try/catch block around the parsing logic and log any exceptions.
Raw output
Suggestion: Wrap the parsing logic in a try/except block to handle exceptions and log them.

def parse_diff_file_line_numbers(diff_content: str) -> List[Dict]:
    """
    Parse a unified diff string and return a structured list of changes using
    actual file line numbers.
    """
    try:
        patch = PatchSet(diff_content)
        parsed_files = []
        
        for patched_file in patch:
            file_info = {
                "file_name": patched_file.path,
                "changes": []
            }
            for hunk in patched_file:
                for line in hunk:
                    # Decide which line number to use based on change type.
                    if line.is_added or not line.is_removed:
                        line_num = line.target_line_no
                    else:
                        line_num = line.source_line_no
                    
                    if line_num is None:
                        continue  # Skip lines without a valid number
                    
                    # Append each changed line along with its file-based line number.
                    file_info["changes"].append({
                        "type": "added" if line.is_added else "removed" if line.is_removed else "context",
                        "line_number": line_num,
                        "content": line.value.rstrip("\n")
                    })
            parsed_files.append(file_info)
        
        return parsed_files
    except Exception as e:
        print(f"Error parsing diff content: {str(e)}")
        raise

def build_review_prompt_with_file_line_numbers(parsed_files: List[Dict]) -> str:
"""
Create a prompt that includes the diff using actual file line numbers.
"""
prompt_lines = []

for file_data in parsed_files:
file_name = file_data["file_name"]
prompt_lines.append(f"File: {file_name}\n")
prompt_lines.append("Changed lines:")

for change in file_data["changes"]:
# Mark added lines with +, removed with -, context with a space
sign = (
"+" if change["type"] == "added" else
"-" if change["type"] == "removed" else
" "
)
prompt_lines.append(
f"[Line {change['line_number']}] {sign} {change['content']}"
)
prompt_lines.append("\n")

return "\n".join(prompt_lines)

109 changes: 109 additions & 0 deletions llm_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
from openai import OpenAI
from typing import List, Dict, Optional
from pydantic import BaseModel, Field


api_key = os.environ.get('OPENAI_API_KEY')
if not api_key:
raise ValueError("OPENAI_API_KEY is not set")

client = OpenAI(api_key=api_key)

class ReviewModel(BaseModel):
class Step(BaseModel):
fileName: str = Field(description="The name of the file that has an issue")
start_line_with_prefix: str = Field(description="The starting line number in the file (REQUIRED). \
If the start_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
end_line_with_prefix: str = Field(description="The ending line number in the file (REQUIRED). \
If the end_line is from the new file, indicate it with a '+' prefix, or if it is from the old file, indicate it with a '-' prefix")
language: str = Field(description="The language of the code segment")
codeSegmentToFix: str = Field(description="The code segment that needs to be fixed from code diff in diff style('+' for added, '-' for removed, or nothing for normal code)")
comment: str = Field(description="The comment on the code segment")
suggestion: str = Field(description="The suggestion to fix the code segment")
suggestedCode: Optional[str] = Field(None, description="The updated code segment for the fix")
severity: str = Field(description="The severity of the issue. Can be 'error', 'warning', or 'info'")

steps: list[Step]

def analyze_code_changes(structured_diff_text: str) -> List[Dict]:
"""
Analyze code changes using OpenAI's GPT model
Returns a list of review comments
"""

# Prepare the prompt for the LLM
prompt = f"""
Analyze the following code changes and provide detailed review comments.
Focus on:
- Code quality and best practices
- Potential security vulnerabilities
- Performance implications

Important:
- Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary.
- Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code').
- If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol.
- Ensure that you provide all the necessary code lines in the codeSegmentToFix field.
- If there are multiple comments for the same code segment, provide the comments separated by commas.

CRITICAL REQUIREMENTS:
- Precisely mention the position where the comment should be placed.
- The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix.
- Use the file-based line numbers provided in the structured diff below.
- You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment.
- Never omit line numbers or the system will fail.

Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61"
Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20"

Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65"
Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44"

Diff content:
{structured_diff_text}
"""

Check notice on line 65 in llm_utils.py

View check run for this annotation

pr-respond-test / AI Code Review

llm_utils.py#L35-L65

Complex prompt string - The prompt string is very long and complex, making it hard to maintain and modify. Consider breaking it into smaller parts or using a different approach to build the prompt.
Raw output
Suggestion: Break the prompt string into smaller parts or use a different approach to build the prompt for better maintainability.

prompt_intro = """
    Analyze the following code changes and provide detailed review comments.
    Focus on:
    - Code quality and best practices
    - Potential security vulnerabilities
    - Performance implications
    
    Important:
    - Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary.
    - Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code').
    - If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol.
    - Ensure that you provide all the necessary code lines in the codeSegmentToFix field.
    - If there are multiple comments for the same code segment, provide the comments separated by commas.
    """

prompt_critical = """
    CRITICAL REQUIREMENTS:
    - Precisely mention the position where the comment should be placed.
    - The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix.
    - Use the file-based line numbers provided in the structured diff below.
    - You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment.
    - Never omit line numbers or the system will fail.
    """

prompt_examples = """
    Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" 
    Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" 
    
    Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" 
    Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" 
    
    Diff content:
    {structured_diff_text}
    """

prompt = f"{prompt_intro}{prompt_critical}{prompt_examples}"
Comment on lines +35 to +65

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Complex prompt string - The prompt string is very long and complex, making it hard to maintain and modify. Consider breaking it into smaller parts or using a different approach to build the prompt.

Severity: info

Suggestion: Break the prompt string into smaller parts or use a different approach to build the prompt for better maintainability.

prompt_intro = """
    Analyze the following code changes and provide detailed review comments.
    Focus on:
    - Code quality and best practices
    - Potential security vulnerabilities
    - Performance implications
    
    Important:
    - Provide insights in the comment section for each code segment. Provide improvements in suggestions when necessary.
    - Always output the codeSegmentToFix in the diff format (e.g., '+ added code', '- removed code', 'or nothing for normal code').
    - If there is a new line in the codeSegmentToFix (when there are multiple lines), you MUST indicate it with the new line symbol.
    - Ensure that you provide all the necessary code lines in the codeSegmentToFix field.
    - If there are multiple comments for the same code segment, provide the comments separated by commas.
    """

prompt_critical = """
    CRITICAL REQUIREMENTS:
    - Precisely mention the position where the comment should be placed.
    - The codeSegmentToFix should exactly start from the start_line_with_prefix and end at the end_line_with_prefix.
    - Use the file-based line numbers provided in the structured diff below.
    - You MUST provide exact start_line_with_prefix and end_line_with_prefix numbers for each comment.
    - Never omit line numbers or the system will fail.
    """

prompt_examples = """
    Examples for start_line_with_prefix when the start_line is from new file: "+5, +2, +51, +61" 
    Examples for start_line_with_prefix when the start_line is from old file: "-8, -1, -56, -20" 
    
    Examples for end_line_with_prefix when the start_line is from new file: "+10, +2, +77, +65" 
    Examples for end_line_with_prefix when the start_line is from old file: "-1, -5, -22, -44" 
    
    Diff content:
    {structured_diff_text}
    """

prompt = f"{prompt_intro}{prompt_critical}{prompt_examples}"


print("Before API CALL...")

# Get analysis from OpenAI
completion = client.beta.chat.completions.parse(

Check failure on line 70 in llm_utils.py

View check run for this annotation

pr-respond-test / AI Code Review

llm_utils.py#L70

The OpenAI model name is incorrect. The correct model name should be 'gpt-4o' or another valid OpenAI model name. The current name 'gpt-4o-2024-08-06' is not a valid model name provided by OpenAI.
Raw output
Suggestion: Update the model name to a valid OpenAI model name such as 'gpt-4o'.

model="gpt-4o"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: The OpenAI model name is incorrect. The correct model name should be 'gpt-4o' or another valid OpenAI model name. The current name 'gpt-4o-2024-08-06' is not a valid model name provided by OpenAI.

Severity: error

Suggestion: Update the model name to a valid OpenAI model name such as 'gpt-4o'.

model="gpt-4o"

model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "You are an experienced code reviewer."},
{"role": "user", "content": prompt}
],
response_format=ReviewModel,
)

Check failure on line 77 in llm_utils.py

View check run for this annotation

pr-respond-test / AI Code Review

llm_utils.py#L70-L77

Missing error handling for API call. Add try/catch block around the API call and log any exceptions.
Raw output
Suggestion: Wrap the API call in a try/except block to handle potential exceptions and log them appropriately.

try:
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "You are an experienced code reviewer."},
            {"role": "user", "content": prompt}
        ],
        response_format=ReviewModel,
    )
except Exception as e:
    print(f"Error during API call: {str(e)}")
    raise
Comment on lines +70 to +77

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Missing error handling for API call. Add try/catch block around the API call and log any exceptions.

Severity: error

Suggestion: Wrap the API call in a try/except block to handle potential exceptions and log them appropriately.

try:
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "You are an experienced code reviewer."},
            {"role": "user", "content": prompt}
        ],
        response_format=ReviewModel,
    )
except Exception as e:
    print(f"Error during API call: {str(e)}")
    raise


print("After API CALL...")

# Parse and format the response
response_pydantic= completion.choices[0].message.parsed

review_steps = []

for step in response_pydantic.steps:

value1 = step.start_line_with_prefix
start_line = int(value1.replace("+", "").strip()) # Remove '+' and strip spaces

value2 = step.end_line_with_prefix
end_line = int(value2.replace("+", "").strip())

step_dict = {
"fileName": step.fileName,
"start_line": start_line,
"start_line_with_prefix": step.start_line_with_prefix,
"end_line": end_line,
"end_line_with_prefix": step.end_line_with_prefix,
"language": step.language,
"codeSegmentToFix": step.codeSegmentToFix,
"comment": step.comment,
"suggestion": step.suggestion,
"suggestedCode": step.suggestedCode,
"severity": step.severity
}
review_steps.append(step_dict)

return review_steps
Loading