From 69bd1f0c6618c82e8795865d5b0bfab92c2c0689 Mon Sep 17 00:00:00 2001
From: Fabian von Feilitzsch <fabian@fabianism.us>
Date: Tue, 3 Dec 2024 11:48:26 -0500
Subject: [PATCH] :bug: Stricter equality for analyzer issues (#507)

* :bug: Take variables into account when determining if an issue is a duplicate
* Various logging/debugger changes for ease of use/comprehension

Signed-off-by: Fabian von Feilitzsch <fabian@fabianism.us>
---
 Makefile                                      | 21 +++----
 kai/analyzer.py                               |  2 +-
 kai/reactive_codeplanner/main.py              | 63 ++++++++++++-------
 .../task_runner/analyzer_lsp/api.py           |  1 +
 .../task_runner/compiler/maven_validator.py   |  7 ++-
 kai/rpc_server/server.py                      |  8 +--
 6 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/Makefile b/Makefile
index 09cc6249..7fcb8526 100644
--- a/Makefile
+++ b/Makefile
@@ -53,22 +53,21 @@ get-analyzer-deps:
 	docker rm bundle
 
 # This will get the rulesets and set them to be used by run_demo.py
-get_rulesets:
+get-rulesets:
 	(cd example/analysis && rm -rf rulesets && git clone https://github.com/konveyor/rulesets); rm -rf example/analysis/rulesets/preview
 
 run_demo:
 	cd example && python run_demo.py
 
 run_debug_driver:
-	PYTHONPATH=$(KAI_PYTHON_PATH) python kai/reactive_codeplanner/main.py \
-						 example/config.toml \
-						 example/coolstore \
-						 example/analysis/rulesets/default/generated \
-						 example/analysis/kai-analyzer-rpc \
-						 example/analysis/jdtls/bin/jdtls \
-						 example/analysis/bundle.jar \
-						 "(konveyor.io/target=quarkus || konveyor.io/target=jakarta-ee || konveyor.io/target=jakarta-ee8 || konveyor.io/target=jakarta-ee9 || konveyor.io/target=cloud-readiness)" \
-						 ""
+	python kai/reactive_codeplanner/main.py \
+		--kai-config example/config.toml \
+		--source-directory example/coolstore \
+		--rules-directory example/analysis/rulesets/default/generated \
+		--analyzer-lsp-server-binary example/analysis/kai-analyzer-rpc \
+		--analyzer-lsp-path example/analysis/jdtls/bin/jdtls \
+		--analyzer-lsp-java-bundle example/analysis/bundle.jar \
+		--label-selector "(konveyor.io/target=quarkus || konveyor.io/target=jakarta-ee || konveyor.io/target=jakarta-ee8 || konveyor.io/target=jakarta-ee9 || konveyor.io/target=cloud-readiness)"
 
 # This will run all the things that you need to do, to configure the demo.
-config_demo: set-binaries-demo get_analyzer_deps get_rulesets
+config_demo: set-binaries-demo get-analyzer-deps get-rulesets
diff --git a/kai/analyzer.py b/kai/analyzer.py
index 1190bb24..63380991 100644
--- a/kai/analyzer.py
+++ b/kai/analyzer.py
@@ -25,7 +25,7 @@ def __init__(
         rules_directory: Path,
         analyzer_lsp_path: Path,
         analyzer_java_bundle_path: Path,
-        dep_open_source_labels_path: Path,
+        dep_open_source_labels_path: Optional[Path],
     ) -> None:
         """This will start and analyzer-lsp jsonrpc server"""
         # trunk-ignore-begin(bandit/B603)
diff --git a/kai/reactive_codeplanner/main.py b/kai/reactive_codeplanner/main.py
index aefd4ae9..76fdcc3e 100755
--- a/kai/reactive_codeplanner/main.py
+++ b/kai/reactive_codeplanner/main.py
@@ -35,66 +35,81 @@
 
 
 def main() -> None:
-
     parser = argparse.ArgumentParser(
         description="Run the CodePlan loop against a project"
     )
+
     parser.add_argument(
-        "kai_config",
+        "--kai-config",
+        "-k",
         help="The path to the kai config file",
         type=Path,
+        required=True,
     )
 
     parser.add_argument(
-        "source_directory",
+        "--source-directory",
+        "-s",
         help="The root directory of the project to be fixed",
         type=Path,
+        required=True,
     )
 
     parser.add_argument(
-        "rules_directory",
+        "--rules-directory",
+        "-r",
         help="The root directory of the rules to use during analysis",
         type=Path,
+        required=True,
     )
 
     parser.add_argument(
-        "analyzer_lsp_server_binary",
+        "--analyzer-lsp-server-binary",
+        "-b",
         help="The binary for running analyzer-lsp RPC server",
         type=Path,
+        required=True,
     )
 
     parser.add_argument(
-        "analyzer_lsp_path",
+        "--analyzer-lsp-path",
+        "-a",
         help="The binary for analyzer-lsp",
         type=Path,
+        required=True,
     )
+
     parser.add_argument(
-        "analyzer_lsp_java_bundle",
+        "--analyzer-lsp-java-bundle",
+        "-j",
         help="The path to the analyzer java bundle",
         type=Path,
+        required=True,
     )
 
     parser.add_argument(
-        "label_selector",
-        default="",
+        "--label-selector",
+        "-l",
         help="The label selector for rules",
-        type=Path,
+        type=str,
+        default="",
     )
 
     parser.add_argument(
-        "incident_selector",
-        default="",
+        "--incident-selector",
+        "-i",
         help="The incident selector for violations",
-        type=Path,
+        type=str,
+        default="",
     )
 
     parser.add_argument(
-        "dep_open_source_labels_path",
-        default="",
-        help="Path to the opensource labels for depenencies file",
+        "--dep-open-source-labels-path",
+        "-d",
+        help="Path to the open source labels for dependencies file",
         type=Path,
+        default=None,
     )
-
     args = parser.parse_args()
 
     config = RpcClientConfig(
@@ -121,7 +136,11 @@ def main() -> None:
         rules_directory=Path(args.rules_directory),
         analyzer_lsp_path=Path(args.analyzer_lsp_path),
         analyzer_java_bundle_path=Path(args.analyzer_lsp_java_bundle),
-        dep_open_source_labels_path=Path(args.dep_open_source_labels_path),
+        dep_open_source_labels_path=(
+            Path(args.dep_open_source_labels_path)
+            if args.dep_open_source_labels_path
+            else None
+        ),
     )
 
     task_manager = TaskManager(
@@ -175,15 +194,11 @@ def main() -> None:
         logger.info("QUEUE_STATE: END")
         logger.info("QUEUE_STATE: SUCCESSFUL_TASKS: START")
         for task in task_manager.processed_tasks:
-            logger.info(
-                f"QUEUE_STATE: SUCCESSFUL_TASKS: {task}(priority={task.priority}, depth={task.depth}, retries={task.retry_count})"
-            )
+            logger.info(f"QUEUE_STATE: SUCCESSFUL_TASKS: {task}")
         logger.info("QUEUE_STATE: SUCCESSFUL_TASKS: END")
         logger.info("QUEUE_STATE: IGNORED_TASKS: START")
         for task in task_manager.ignored_tasks:
-            logger.info(
-                f"QUEUE_STATE: IGNORED_TASKS: {task}(priority={task.priority}, depth={task.depth}, retries={task.retry_count})"
-            )
+            logger.info(f"QUEUE_STATE: IGNORED_TASKS: {task}")
         logger.info("QUEUE_STATE: IGNORED_TASKS: END")
     task_manager.stop()
     logger.info("Codeplan execution completed.")
diff --git a/kai/reactive_codeplanner/task_runner/analyzer_lsp/api.py b/kai/reactive_codeplanner/task_runner/analyzer_lsp/api.py
index 62af7128..23604c49 100644
--- a/kai/reactive_codeplanner/task_runner/analyzer_lsp/api.py
+++ b/kai/reactive_codeplanner/task_runner/analyzer_lsp/api.py
@@ -39,6 +39,7 @@ def fuzzy_equals(self, error2: Task, offset: int = 1) -> bool:
             self.ruleset.name == error2.ruleset.name
             and self.incident.message == error2.incident.message
             and self.file == error2.file
+            and self.incident.variables == error2.incident.variables
         ):
             logger.info("should match on line numbers %s -- %s", self.line, error2.line)
             return True
diff --git a/kai/reactive_codeplanner/task_runner/compiler/maven_validator.py b/kai/reactive_codeplanner/task_runner/compiler/maven_validator.py
index 65214422..b124b0a3 100755
--- a/kai/reactive_codeplanner/task_runner/compiler/maven_validator.py
+++ b/kai/reactive_codeplanner/task_runner/compiler/maven_validator.py
@@ -39,6 +39,9 @@ def run(self, scoped_paths: Optional[list[Path]] = None) -> ValidationResult:
         )
         # Build/dependency/other errors prevent the compilation errors from being reported
         # But we still want to return them so they aren't mistakenly marked as solved.
+        logger.debug(
+            f"determining whether maven cache should be set: build_errors: {bool(build_errors)}, dependency_errors: {bool(dependency_errors)}, catchall_errors: {bool(catchall_errors)}, compilation_errors: {bool(compilation_errors)}, cond: {(build_errors or dependency_errors or catchall_errors) and not compilation_errors}"
+        )
         if (
             build_errors or dependency_errors or catchall_errors
         ) and not compilation_errors:
@@ -48,7 +51,9 @@ def run(self, scoped_paths: Optional[list[Path]] = None) -> ValidationResult:
             )
         else:
             self.last_compilation_errors = compilation_errors
-            logger.debug("Setting the maven cache")
+            logger.debug(
+                f"Setting the maven cache with {len(self.last_compilation_errors)} compilation errors"
+            )
         errors = build_errors + dependency_errors + compilation_errors + catchall_errors
         return ValidationResult(passed=not errors, errors=errors)
 
diff --git a/kai/rpc_server/server.py b/kai/rpc_server/server.py
index 4aded5be..5233d118 100644
--- a/kai/rpc_server/server.py
+++ b/kai/rpc_server/server.py
@@ -530,15 +530,11 @@ class OverallResult(TypedDict):
             app.log.debug("QUEUE_STATE: END")
             app.log.debug("QUEUE_STATE: SUCCESSFUL_TASKS: START")
             for task in app.task_manager.processed_tasks:
-                app.log.debug(
-                    f"QUEUE_STATE: SUCCESSFUL_TASKS: {task}(priority={task.priority}, depth={task.depth}, retries={task.retry_count})"
-                )
+                app.log.debug(f"QUEUE_STATE: SUCCESSFUL_TASKS: {task}")
             app.log.debug("QUEUE_STATE: SUCCESSFUL_TASKS: END")
             app.log.debug("QUEUE_STATE: IGNORED_TASKS: START")
             for task in app.task_manager.ignored_tasks:
-                app.log.debug(
-                    f"QUEUE_STATE: IGNORED_TASKS: {task}(priority={task.priority}, depth={task.depth}, retries={task.retry_count})"
-                )
+                app.log.debug(f"QUEUE_STATE: IGNORED_TASKS: {task}")
             app.log.debug("QUEUE_STATE: IGNORED_TASKS: END")
 
         diff = app.rcm.snapshot.diff(app.rcm.first_snapshot)