Skip to content

Commit 628fd26

Browse files
committed
fix can't pickle bug
source: openai/human-eval#30
1 parent 312c5e5 commit 628fd26

File tree

2 files changed

+62
-49
lines changed

2 files changed

+62
-49
lines changed

human_eval/execution.py

Lines changed: 58 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,68 +9,77 @@
99
import signal
1010
import tempfile
1111

12+
def unsafe_execute(problem, completion, result, timeout):
13+
14+
with create_tempdir():
15+
16+
# These system calls are needed when cleaning up tempdir.
17+
import os
18+
import shutil
19+
rmtree = shutil.rmtree
20+
rmdir = os.rmdir
21+
chdir = os.chdir
22+
23+
# Disable functionalities that can make destructive changes to the test.
24+
reliability_guard()
25+
26+
# Construct the check program and run it.
27+
check_program = (
28+
problem["prompt"] + completion + "\n" +
29+
problem["test"] + "\n" +
30+
f"check({problem['entry_point']})"
31+
)
32+
33+
try:
34+
exec_globals = {}
35+
with swallow_io():
36+
with time_limit(timeout):
37+
# WARNING
38+
# This program exists to execute untrusted model-generated code. Although
39+
# it is highly unlikely that model-generated code will do something overtly
40+
# malicious in response to this test suite, model-generated code may act
41+
# destructively due to a lack of model capability or alignment.
42+
# Users are strongly encouraged to sandbox this evaluation suite so that it
43+
# does not perform destructive actions on their host or network. For more
44+
# information on how OpenAI sandboxes its code, see the accompanying paper.
45+
# Once you have read this disclaimer and taken appropriate precautions,
46+
# uncomment the following line and proceed at your own risk:
47+
exec(check_program, exec_globals)
48+
result.append("passed")
49+
except TimeoutException:
50+
result.append("timed out")
51+
except BaseException as e:
52+
result.append(f"failed: {e}")
53+
54+
# Needed for cleaning up.
55+
shutil.rmtree = rmtree
56+
os.rmdir = rmdir
57+
os.chdir = chdir
58+
1259

1360
def check_correctness(problem: Dict, completion: str, timeout: float,
1461
completion_id: Optional[int] = None) -> Dict:
1562
"""
1663
Evaluates the functional correctness of a completion by running the test
17-
suite provided in the problem.
64+
suite provided in the problem.
1865
1966
:param completion_id: an optional completion ID so we can match
2067
the results later even if execution finishes asynchronously.
2168
"""
2269

23-
def unsafe_execute():
24-
25-
with create_tempdir():
26-
27-
# These system calls are needed when cleaning up tempdir.
28-
import os
29-
import shutil
30-
rmtree = shutil.rmtree
31-
rmdir = os.rmdir
32-
chdir = os.chdir
33-
34-
# Disable functionalities that can make destructive changes to the test.
35-
reliability_guard()
36-
37-
# Construct the check program and run it.
38-
check_program = (
39-
problem["prompt"] + completion + "\n" +
40-
problem["test"] + "\n" +
41-
f"check({problem['entry_point']})"
42-
)
43-
44-
try:
45-
exec_globals = {}
46-
with swallow_io():
47-
with time_limit(timeout):
48-
# WARNING
49-
# This program exists to execute untrusted model-generated code. Although
50-
# it is highly unlikely that model-generated code will do something overtly
51-
# malicious in response to this test suite, model-generated code may act
52-
# destructively due to a lack of model capability or alignment.
53-
# Users are strongly encouraged to sandbox this evaluation suite so that it
54-
# does not perform destructive actions on their host or network. For more
55-
# information on how OpenAI sandboxes its code, see the accompanying paper.
56-
# Once you have read this disclaimer and taken appropriate precautions,
57-
# uncomment the following line and proceed at your own risk:
58-
# exec(check_program, exec_globals)
59-
result.append("passed")
60-
except TimeoutException:
61-
result.append("timed out")
62-
except BaseException as e:
63-
result.append(f"failed: {e}")
64-
65-
# Needed for cleaning up.
66-
shutil.rmtree = rmtree
67-
os.rmdir = rmdir
68-
os.chdir = chdir
69-
7070
manager = multiprocessing.Manager()
7171
result = manager.list()
7272

73-
p = multiprocessing.Process(target=unsafe_execute)
73+
# p = multiprocessing.Process(target=unsafe_execute)
74+
p = multiprocessing.Process(
75+
target=unsafe_execute,
76+
args=(
77+
problem,
78+
completion,
79+
result,
80+
timeout
81+
),
82+
)
7483
p.start()
7584
p.join(timeout=timeout + 1)
7685
if p.is_alive():
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"task_id": "./human-eval-bia/human_eval_bia_0.ipynb", "completion": "\n labeled_image, num_labels = skimage.measure.label(binary_image, background=0, return_num=True)\n return num_labels\n", "result": "failed: module 'signal' has no attribute 'setitimer'", "passed": false}
2+
{"task_id": "./human-eval-bia/human_eval_bia_1.ipynb", "completion": "\nfrom skimage import filters\nimport numpy as np\n\ndef apply_otsu_threshold_and_count_postiive_pixels(image):\n threshold_value = filters.threshold_otsu(image)\n binary_image = image > threshold_value\n positive_pixels_count = np.sum(binary_image)\n return binary_image, positive_pixels_count\n", "result": "failed: module 'signal' has no attribute 'setitimer'", "passed": false}
3+
{"task_id": "./human-eval-bia/human_eval_bia_0.ipynb", "completion": "\n # Convert the image to uint8\n binary_image = binary_image.astype(np.uint8)\n\n # Perform connected component labeling\n labeled_image, num_labels = skimage.measure.label(binary_image, return_num=True)\n\n # Return the count of labeled objects\n return num_labels\n", "result": "failed: module 'signal' has no attribute 'setitimer'", "passed": false}
4+
{"task_id": "./human-eval-bia/human_eval_bia_1.ipynb", "completion": "\nimport skimage.filters\n\ndef apply_otsu_threshold_and_count_postiive_pixels(image):\n \"\"\"\n Takes an image, applies Otus's threshold method to it to create a binary image and \n counts the positive pixels.\n \"\"\"\n # Apply Otsu's thresholding to create a binary image\n threshold = skimage.filters.threshold_otsu(image)\n binary_image = image > threshold\n \n # Count positive pixels\n positive_pixels_count = np.sum(binary_image)\n\n return positive_pixels_count\n", "result": "failed: module 'signal' has no attribute 'setitimer'", "passed": false}

0 commit comments

Comments
 (0)