Skip to content

Commit 28ecb5d

Browse files
committed
screen cap tool in readme
1 parent 98f4938 commit 28ecb5d

File tree

1 file changed

+18
-25
lines changed

1 file changed

+18
-25
lines changed

README.md

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,40 +1337,33 @@ Tools are defined as YAMLs with `.tool` extension within the npc_team/tools dire
13371337
Here is an example of a tool file:
13381338
```yaml
13391339
tool_name: "screen_capture_analysis_tool"
1340+
description: Captures the whole screen and sends the image for analysis
13401341
inputs:
13411342
- "prompt"
1342-
preprocess:
1343+
steps:
13431344
- engine: "python"
13441345
code: |
13451346
# Capture the screen
13461347
import pyautogui
13471348
import datetime
13481349
import os
13491350
from PIL import Image
1350-
from npcsh.image import analyze_image_base
1351-
1352-
# Generate filename
1353-
filename = f"screenshot_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
1354-
screenshot = pyautogui.screenshot()
1355-
screenshot.save(filename)
1356-
print(f"Screenshot saved as {filename}")
1357-
1358-
# Load image
1359-
image = Image.open(filename)
1360-
1361-
# Full file path
1362-
file_path = os.path.abspath('./'+filename)
1363-
# Analyze the image
1364-
1365-
llm_output = analyze_image_base(inputs['prompt']+ '\n\n attached is a screenshot of my screen currently.', file_path, filename, npc=npc)
1366-
prompt:
1367-
engine: "natural"
1368-
code: ""
1369-
postprocess:
1370-
- engine: "natural"
1371-
code: |
1372-
Screenshot captured and saved as {{ filename }}.
1373-
Analysis Result: {{ llm_output }}
1351+
import time
1352+
from npcsh.image import analyze_image_base, capture_screenshot
1353+
1354+
out = capture_screenshot(npc = npc, full = True)
1355+
1356+
llm_response = analyze_image_base( '{{prompt}}' + "\n\nAttached is a screenshot of my screen currently. Please use this to evaluate the situation. If the user asked for you to explain what's on their screen or something similar, they are referring to the details contained within the attached image. You do not need to actually view their screen. You do not need to mention that you cannot view or interpret images directly. You only need to answer the user's request based on the attached screenshot!",
1357+
out['file_path'],
1358+
out['filename'],
1359+
npc=npc,
1360+
**out['model_kwargs'])
1361+
# To this:
1362+
if isinstance(llm_response, dict):
1363+
llm_response = llm_response.get('response', 'No response from image analysis')
1364+
else:
1365+
llm_response = 'No response from image analysis'
1366+
13741367
```
13751368
13761369

0 commit comments

Comments
 (0)