-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: check verification result for contradictions
Sometimes LLMs would incorrectly fail the verification and provide an explanation that contradicts itself. For example, given the following verification "square A is positioned to the left of square B", the LLM sometimes fails it but provide weird explanation: "The visual representation shows square A to the right of square B, indicating that square B is not positioned to the left of square A." This commit compensates for such behavior by double-checking the expected result and verification explanation for contradictions.
- Loading branch information
Showing
6 changed files
with
52 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .actor_agent import ActorAgent | ||
from .contradiction_checker_agent import ContradictionCheckerAgent | ||
from .loading_detector_agent import LoadingDetectorAgent | ||
from .verifier_agent import VerifierAgent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import logging | ||
from pathlib import Path | ||
|
||
from langchain_core.language_models import BaseChatModel | ||
from pydantic import BaseModel, Field | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Response(BaseModel): | ||
result: bool = Field(description="True if contradiction is detected, False otherwise.") | ||
|
||
|
||
class ContradictionCheckerAgent: | ||
with open(Path(__file__).parent / "contradiction_checker_prompts/user.md") as f: | ||
USER_MESSAGE = f.read() | ||
|
||
def __init__(self, llm: BaseChatModel): | ||
self.chain = llm.with_structured_output(Response, include_raw=True) | ||
|
||
def invoke(self, statement: str, verification_explanation: str): | ||
logger.info(f"Starting contradiction checking:") | ||
|
||
message = self.chain.invoke( | ||
[ | ||
( | ||
"human", | ||
self.USER_MESSAGE.format( | ||
statement=statement, | ||
verification_explanation=verification_explanation, | ||
), | ||
), | ||
] | ||
) | ||
|
||
result = message["parsed"] | ||
logger.info(f" <- Result: {result.result}") | ||
logger.info(f' <- Usage: {message["raw"].usage_metadata}') | ||
|
||
return result.result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Does the following two statements contradict each other? | ||
|
||
1. {statement} | ||
2. {verification_explanation} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
def test_drag_and_drop(al, driver): | ||
driver.get("https://the-internet.herokuapp.com/drag_and_drop") | ||
al.check("square A is positioned to the left from square B", vision=True) | ||
al.check("square A is positioned to the left of square B", vision=True) | ||
al.do("move square A to square B") | ||
al.check("square B is positioned to the left from square A", vision=True) | ||
al.check("square B is positioned to the left of square A", vision=True) |