Skip to content

Commit 2ccf3d8

Browse files
authored
fix: strict mimetype validation (#70)
* use binaryornot check and add test * bump to 0.2.2 * test 2.0 multiplier
1 parent 3470179 commit 2ccf3d8

File tree

5 files changed

+85
-38
lines changed

5 files changed

+85
-38
lines changed

openhands_aci/editor/editor.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
import mimetypes
21
import os
32
import re
43
import shutil
54
import tempfile
65
from pathlib import Path
76
from typing import Literal, get_args
87

8+
from binaryornot.check import is_binary
9+
910
from openhands_aci.linter import DefaultLinter
1011
from openhands_aci.utils.shell import run_shell_cmd
1112

@@ -461,26 +462,11 @@ def validate_file(self, path: Path) -> None:
461462
reason=f'File is too large ({file_size / 1024 / 1024:.1f}MB). Maximum allowed size is {int(max_size / 1024 / 1024)}MB.',
462463
)
463464

464-
# Check if file is binary
465-
mime_type, _ = mimetypes.guess_type(str(path))
466-
if mime_type is None:
467-
# If mime_type is None, try to detect if it's binary by reading first chunk
468-
try:
469-
chunk = open(path, 'rb').read(1024)
470-
if b'\0' in chunk: # Common way to detect binary files
471-
raise FileValidationError(
472-
path=str(path),
473-
reason='File appears to be binary. Only text files can be edited.',
474-
)
475-
except Exception as e:
476-
raise FileValidationError(
477-
path=str(path), reason=f'Error checking file type: {str(e)}'
478-
)
479-
elif not mime_type.startswith('text/'):
480-
# Known non-text mime type
465+
# Check file type
466+
if is_binary(str(path)):
481467
raise FileValidationError(
482468
path=str(path),
483-
reason=f'File type {mime_type} is not supported. Only text files can be edited.',
469+
reason='File appears to be binary. Only text files can be edited.',
484470
)
485471

486472
def read_file(

poetry.lock

Lines changed: 29 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "openhands-aci"
3-
version = "0.2.1"
3+
version = "0.2.2"
44
description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
55
authors = ["OpenHands"]
66
license = "MIT"
@@ -22,6 +22,7 @@ grep-ast = "0.3.3"
2222
diskcache = "^5.6.3"
2323
flake8 = "*"
2424
whatthepatch = "^1.0.6"
25+
binaryornot = "^0.4.4"
2526

2627

2728
[tool.poetry.group.dev.dependencies]
Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,59 @@
11
"""Tests for file validation in file editor."""
22

3+
import os
4+
from pathlib import Path
5+
36
from openhands_aci.editor import file_editor
47

58
from .conftest import parse_result
69

710

811
def test_file_validation(temp_file):
912
"""Test file validation for various file types."""
13+
# Ensure temp_file has .sql suffix
14+
temp_file_sql = Path(temp_file).with_suffix('.sql')
15+
os.rename(temp_file, temp_file_sql)
16+
1017
# Test binary file
11-
with open(temp_file, 'wb') as f:
18+
with open(temp_file_sql, 'wb') as f:
1219
f.write(b'Some text\x00with binary\x00content')
1320

1421
result = file_editor(
1522
command='view',
16-
path=temp_file,
23+
path=str(temp_file_sql),
1724
enable_linting=False,
1825
)
1926
result_json = parse_result(result)
2027
assert 'binary' in result_json['formatted_output_and_error'].lower()
2128

2229
# Test large file
2330
large_size = 11 * 1024 * 1024 # 11MB
24-
with open(temp_file, 'w') as f:
31+
with open(temp_file_sql, 'w') as f:
2532
f.write('x' * large_size)
2633

2734
result = file_editor(
2835
command='view',
29-
path=temp_file,
36+
path=str(temp_file_sql),
3037
enable_linting=False,
3138
)
3239
result_json = parse_result(result)
3340
assert 'too large' in result_json['formatted_output_and_error']
3441
assert '10MB' in result_json['formatted_output_and_error']
42+
43+
# Test SQL file
44+
sql_content = """
45+
SELECT *
46+
FROM users
47+
WHERE id = 1;
48+
"""
49+
with open(temp_file_sql, 'w') as f:
50+
f.write(sql_content)
51+
52+
result = file_editor(
53+
command='view',
54+
path=str(temp_file_sql),
55+
enable_linting=False,
56+
)
57+
result_json = parse_result(result)
58+
assert 'SELECT *' in result_json['formatted_output_and_error']
59+
assert 'binary' not in result_json['formatted_output_and_error'].lower()

tests/integration/editor/test_peak_memory.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ def get_memory_info():
1515
"""Get current and peak memory usage in bytes."""
1616
process = psutil.Process(os.getpid())
1717
rss = process.memory_info().rss
18-
peak_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 # Convert KB to bytes
18+
peak_rss = (
19+
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
20+
) # Convert KB to bytes
1921
return {
2022
'rss': rss,
2123
'peak_rss': peak_rss,
@@ -28,17 +30,17 @@ def create_test_file(path: Path, size_mb: float = 5.0):
2830
line_size = 100 # bytes per line approximately
2931
num_lines = int((size_mb * 1024 * 1024) // line_size)
3032

31-
print(f"\nCreating test file with {num_lines} lines...")
33+
print(f'\nCreating test file with {num_lines} lines...')
3234
with open(path, 'w') as f:
3335
for i in range(num_lines):
3436
f.write(f'Line {i}: ' + 'x' * (line_size - 10) + '\n')
3537

3638
actual_size = os.path.getsize(path)
37-
print(f"File created, size: {actual_size / 1024 / 1024:.2f} MB")
39+
print(f'File created, size: {actual_size / 1024 / 1024:.2f} MB')
3840
return actual_size
3941

4042

41-
def set_memory_limit(file_size: int, multiplier: float = 1.5):
43+
def set_memory_limit(file_size: int, multiplier: float = 2.0):
4244
"""Set memory limit to multiplier * file_size."""
4345
# Add base memory for pytest and other processes (100MB)
4446
base_memory = 100 * 1024 * 1024 # 100MB
@@ -50,11 +52,13 @@ def set_memory_limit(file_size: int, multiplier: float = 1.5):
5052
current_usage = psutil.Process().memory_info().rss
5153
if memory_limit > current_usage:
5254
resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard))
53-
print(f"Memory limit set to {memory_limit / 1024 / 1024:.2f} MB")
55+
print(f'Memory limit set to {memory_limit / 1024 / 1024:.2f} MB')
5456
else:
55-
print(f"Warning: Current memory usage ({current_usage / 1024 / 1024:.2f} MB) higher than limit ({memory_limit / 1024 / 1024:.2f} MB)")
57+
print(
58+
f'Warning: Current memory usage ({current_usage / 1024 / 1024:.2f} MB) higher than limit ({memory_limit / 1024 / 1024:.2f} MB)'
59+
)
5660
except Exception as e:
57-
print(f"Warning: Could not set memory limit: {str(e)}")
61+
print(f'Warning: Could not set memory limit: {str(e)}')
5862
return memory_limit
5963

6064

@@ -82,6 +86,7 @@ def test_str_replace_peak_memory():
8286

8387
# Force Python to release file handles and clear buffers
8488
import gc
89+
8590
gc.collect()
8691

8792
# Get initial memory usage
@@ -93,7 +98,7 @@ def test_str_replace_peak_memory():
9398

9499
# Perform str_replace operation
95100
try:
96-
result = file_editor(
101+
_ = file_editor(
97102
command='str_replace',
98103
path=path,
99104
old_str='Line 5000', # Replace a line in the middle
@@ -118,6 +123,7 @@ def test_insert_peak_memory():
118123

119124
# Force Python to release file handles and clear buffers
120125
import gc
126+
121127
gc.collect()
122128

123129
# Get initial memory usage
@@ -129,7 +135,7 @@ def test_insert_peak_memory():
129135

130136
# Perform insert operation
131137
try:
132-
result = file_editor(
138+
_ = file_editor(
133139
command='insert',
134140
path=path,
135141
insert_line=5000, # Insert in the middle
@@ -154,6 +160,7 @@ def test_view_peak_memory():
154160

155161
# Force Python to release file handles and clear buffers
156162
import gc
163+
157164
gc.collect()
158165

159166
# Get initial memory usage
@@ -165,7 +172,7 @@ def test_view_peak_memory():
165172

166173
# Test viewing specific lines
167174
try:
168-
result = file_editor(
175+
_ = file_editor(
169176
command='view',
170177
path=path,
171178
view_range=[5000, 5100], # View 100 lines from middle
@@ -189,6 +196,7 @@ def test_view_full_file_peak_memory():
189196

190197
# Force Python to release file handles and clear buffers
191198
import gc
199+
192200
gc.collect()
193201

194202
# Get initial memory usage
@@ -200,7 +208,7 @@ def test_view_full_file_peak_memory():
200208

201209
# Test viewing entire file
202210
try:
203-
result = file_editor(
211+
_ = file_editor(
204212
command='view',
205213
path=path,
206214
enable_linting=False,
@@ -212,4 +220,4 @@ def test_view_full_file_peak_memory():
212220
pytest.fail('Memory limit exceeded - peak memory usage too high')
213221
raise
214222

215-
check_memory_usage(initial['max'], file_size, 'view_full')
223+
check_memory_usage(initial['max'], file_size, 'view_full')

0 commit comments

Comments
 (0)