From 4d32b6890f52163ae0ed110a4b530dae401f60ee Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 29 Dec 2024 05:35:23 -0500 Subject: [PATCH 01/10] Bump RKT compiler time limit to 20 seconds --- dmoj/executors/RKT.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dmoj/executors/RKT.py b/dmoj/executors/RKT.py index 095b323cb..b09f59895 100644 --- a/dmoj/executors/RKT.py +++ b/dmoj/executors/RKT.py @@ -8,6 +8,7 @@ class Executor(CompiledExecutor): ext = 'rkt' fs = [RecursiveDir('/etc/racket'), ExactFile('/etc/passwd'), ExactDir('/')] compiler_read_fs = [RecursiveDir('~/.local/share/racket')] + compiler_time_limit = 20 command = 'racket' From 28cb229ec60352c1c27f8beba8300f892cf2ef8b Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 28 Feb 2021 18:44:38 -0500 Subject: [PATCH 02/10] Add streaming line-ending normalization utility --- dmoj/tests/test_normalize.py | 56 ++++++++++++++++++++++++++++++++++++ dmoj/utils/normalize.py | 20 +++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 dmoj/tests/test_normalize.py create mode 100644 dmoj/utils/normalize.py diff --git a/dmoj/tests/test_normalize.py b/dmoj/tests/test_normalize.py new file mode 100644 index 000000000..b7fa2d868 --- /dev/null +++ b/dmoj/tests/test_normalize.py @@ -0,0 +1,56 @@ +import unittest +from io import BytesIO + +from dmoj.utils.normalize import normalized_file_copy + +TEST_CASE = b'a\r\n\r\r\nb\r\r\nc\nd\n' +TEST_CASE_NO_NEWLINE = b'a\r\n\r\r\nb\r\r\nc\nd' +TEST_CASE_TRAILING_R = b'a\r\n\r\r\nb\r\r\nc\nd\r' +RESULT = b'a\n\n\nb\n\nc\nd\n' + + +class TestNormalizedCopy(unittest.TestCase): + def test_simple(self): + with BytesIO(TEST_CASE) as src, BytesIO() as dst: + normalized_file_copy(src, dst) + self.assertEqual(dst.getvalue(), RESULT) + + def test_newline_add(self): + with BytesIO(TEST_CASE_NO_NEWLINE) as src, BytesIO() as dst: + normalized_file_copy(src, dst) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_after_r(self): + with BytesIO(TEST_CASE) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=TEST_CASE.rindex(b'\r\n')) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_after_r_newline_add(self): + with BytesIO(TEST_CASE_NO_NEWLINE) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=TEST_CASE_NO_NEWLINE.rindex(b'\r\n')) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_between_r_n(self): + with BytesIO(TEST_CASE) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=TEST_CASE.rindex(b'\r\n') + 1) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_between_r_n_newline_add(self): + with BytesIO(TEST_CASE_NO_NEWLINE) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=TEST_CASE_NO_NEWLINE.rindex(b'\r\n') + 1) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_before_trailing_newline(self): + with BytesIO(TEST_CASE) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=len(TEST_CASE) - 1) + self.assertEqual(dst.getvalue(), RESULT) + + def test_trailing_r(self): + with BytesIO(TEST_CASE_TRAILING_R) as src, BytesIO() as dst: + normalized_file_copy(src, dst) + self.assertEqual(dst.getvalue(), RESULT) + + def test_break_before_trailing_r(self): + with BytesIO(TEST_CASE_TRAILING_R) as src, BytesIO() as dst: + normalized_file_copy(src, dst, block_size=len(TEST_CASE_TRAILING_R)) + self.assertEqual(dst.getvalue(), RESULT) diff --git a/dmoj/utils/normalize.py b/dmoj/utils/normalize.py new file mode 100644 index 000000000..e03d4914b --- /dev/null +++ b/dmoj/utils/normalize.py @@ -0,0 +1,20 @@ +from io import TextIOWrapper + + +def normalized_file_copy(src, dst, block_size=16384): + src_wrap = TextIOWrapper(src, encoding='iso-8859-1', newline=None) + dst_wrap = TextIOWrapper(dst, encoding='iso-8859-1', newline='') + + add_newline = False + while True: + buf = src_wrap.read(block_size) + if not buf: + break + dst_wrap.write(buf) + add_newline = not buf.endswith('\n') + + if add_newline: + dst_wrap.write('\n') + + src_wrap.detach() + dst_wrap.detach() From 705894b24a18e4a909fe15076eb0b8b39e96452d Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 28 Feb 2021 18:45:25 -0500 Subject: [PATCH 03/10] Offer input data as memfd in standard judge --- dmoj/cptbox/utils.py | 35 ++++++++++++++++++-- dmoj/graders/bridged.py | 4 +-- dmoj/graders/interactive.py | 5 ++- dmoj/graders/standard.py | 14 ++++---- dmoj/problem.py | 55 ++++++++++++++++++++++++++------ testsuite/shortest1/shortest1.py | 2 +- 6 files changed, 92 insertions(+), 23 deletions(-) diff --git a/dmoj/cptbox/utils.py b/dmoj/cptbox/utils.py index 476387b10..9e5f0c30b 100644 --- a/dmoj/cptbox/utils.py +++ b/dmoj/cptbox/utils.py @@ -1,11 +1,42 @@ +import errno import io +import mmap +import os +from typing import Optional from dmoj.cptbox._cptbox import memory_fd_create, memory_fd_seal class MemoryIO(io.FileIO): - def __init__(self) -> None: + def __init__(self, prefill: Optional[bytes] = None, seal=False) -> None: super().__init__(memory_fd_create(), 'r+') + if prefill: + self.write(prefill) + if seal: + self.seal() def seal(self) -> None: - memory_fd_seal(self.fileno()) + fd = self.fileno() + try: + memory_fd_seal(fd) + except OSError as e: + if e.errno == errno.ENOSYS: + # FreeBSD + self.seek(0, os.SEEK_SET) + return + raise + + new_fd = os.open(f'/proc/self/fd/{fd}', os.O_RDONLY) + try: + os.dup2(new_fd, fd) + finally: + os.close(new_fd) + + def to_bytes(self) -> bytes: + try: + with mmap.mmap(self.fileno(), 0, access=mmap.ACCESS_READ) as f: + return bytes(f) + except ValueError as e: + if e.args[0] == 'cannot mmap an empty file': + return b'' + raise diff --git a/dmoj/graders/bridged.py b/dmoj/graders/bridged.py index 75659c9a8..f3a4bca66 100644 --- a/dmoj/graders/bridged.py +++ b/dmoj/graders/bridged.py @@ -57,7 +57,7 @@ def check_result(self, case: TestCase, result: Result) -> CheckerOutput: return (not result.result_flag) and parsed_result - def _launch_process(self, case: TestCase) -> None: + def _launch_process(self, case: TestCase, input_file=None) -> None: self._interactor_stdin_pipe, submission_stdout_pipe = os.pipe() submission_stdin_pipe, self._interactor_stdout_pipe = os.pipe() self._current_proc = self.binary.launch( @@ -72,7 +72,7 @@ def _launch_process(self, case: TestCase) -> None: os.close(submission_stdin_pipe) os.close(submission_stdout_pipe) - def _interact_with_process(self, case: TestCase, result: Result, input: bytes) -> bytes: + def _interact_with_process(self, case: TestCase, result: Result) -> bytes: assert self._current_proc is not None assert self._current_proc.stderr is not None diff --git a/dmoj/graders/interactive.py b/dmoj/graders/interactive.py index d52b43468..59e33827a 100644 --- a/dmoj/graders/interactive.py +++ b/dmoj/graders/interactive.py @@ -105,7 +105,10 @@ def close(self) -> None: class InteractiveGrader(StandardGrader): check: CheckerOutput - def _interact_with_process(self, case: TestCase, result: Result, input: bytes) -> bytes: + def _launch_process(self, case, input_file=None): + super()._launch_process(case, input_file=None) + + def _interact_with_process(self, case: TestCase, result: Result) -> bytes: assert self._current_proc is not None assert self._current_proc.stderr is not None diff --git a/dmoj/graders/standard.py b/dmoj/graders/standard.py index 33f62df01..3729f3400 100644 --- a/dmoj/graders/standard.py +++ b/dmoj/graders/standard.py @@ -17,11 +17,11 @@ class StandardGrader(BaseGrader): def grade(self, case: TestCase) -> Result: result = Result(case) - input = case.input_data() # cache generator data + input_file = case.input_data_fd() - self._launch_process(case) + self._launch_process(case, input_file) - error = self._interact_with_process(case, result, input) + error = self._interact_with_process(case, result) process = self._current_proc @@ -80,23 +80,23 @@ def check_result(self, case: TestCase, result: Result) -> CheckerOutput: return check - def _launch_process(self, case: TestCase) -> None: + def _launch_process(self, case: TestCase, input_file=None) -> None: self._current_proc = self.binary.launch( time=self.problem.time_limit, memory=self.problem.memory_limit, symlinks=case.config.symlinks, - stdin=subprocess.PIPE, + stdin=input_file or subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, wall_time=case.config.wall_time_factor * self.problem.time_limit, ) - def _interact_with_process(self, case: TestCase, result: Result, input: bytes) -> bytes: + def _interact_with_process(self, case: TestCase, result: Result) -> bytes: process = self._current_proc assert process is not None try: result.proc_output, error = process.communicate( - input, outlimit=case.config.output_limit_length, errlimit=1048576 + None, outlimit=case.config.output_limit_length, errlimit=1048576 ) except OutputLimitExceeded: error = b'' diff --git a/dmoj/problem.py b/dmoj/problem.py index 46aa87092..e84e931b5 100644 --- a/dmoj/problem.py +++ b/dmoj/problem.py @@ -1,6 +1,7 @@ import itertools import os import re +import shutil import subprocess import zipfile from collections import defaultdict @@ -8,7 +9,6 @@ from typing import ( DefaultDict, Dict, - IO, Iterable, Iterator, List, @@ -30,9 +30,11 @@ from dmoj import checkers from dmoj.checkers import Checker from dmoj.config import ConfigNode, InvalidInitException +from dmoj.cptbox.utils import MemoryIO from dmoj.judgeenv import env, get_problem_root from dmoj.utils.helper_files import compile_with_auxiliary_files, parse_helper_file_error from dmoj.utils.module import load_module_from_file +from dmoj.utils.normalize import normalized_file_copy if TYPE_CHECKING: from dmoj.graders.base import BaseGrader @@ -264,18 +266,29 @@ def __init__(self, problem_root_dir: str, **kwargs): self.problem_root_dir = problem_root_dir self.archive = None - def __missing__(self, key: str) -> bytes: - f: IO[bytes] + def open(self, key: str): try: - with open(os.path.join(self.problem_root_dir, key), 'rb') as f: - return f.read() + return open(os.path.join(self.problem_root_dir, key), 'rb') except IOError: if self.archive: zipinfo = self.archive.getinfo(key) - with self.archive.open(zipinfo) as f: - return f.read() + return self.archive.open(zipinfo) raise KeyError('file "%s" could not be found in "%s"' % (key, self.problem_root_dir)) + def as_fd(self, key: str, normalize: bool = False) -> MemoryIO: + memory = MemoryIO() + with self.open(key) as f: + if normalize: + normalized_file_copy(f, memory) + else: + shutil.copyfileobj(f, memory) + memory.seal() + return memory + + def __missing__(self, key: str) -> bytes: + with self.open(key) as f: + return f.read() + def __del__(self): if self.archive: self.archive.close() @@ -331,6 +344,7 @@ class TestCase(BaseTestCase): batch: int output_prefix_length: int has_binary_data: bool + _input_data_fd: Optional[MemoryIO] _generated: Optional[Tuple[bytes, bytes]] def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Problem): @@ -342,6 +356,7 @@ def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Probl self.output_prefix_length = config.output_prefix_length self.has_binary_data = config.binary_data self._generated = None + self._input_data_fd = None def _normalize(self, data: bytes) -> bytes: # Perhaps the correct answer may be 'no output', in which case it'll be @@ -434,6 +449,16 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st parse_helper_file_error(proc, executor, 'generator', stderr, time_limit, memory_limit) def input_data(self) -> bytes: + return self.input_data_fd().to_bytes() + + def input_data_fd(self) -> MemoryIO: + if self._input_data_fd: + return self._input_data_fd + + result = self._input_data_fd = self._make_input_data_fd() + return result + + def _make_input_data_fd(self) -> MemoryIO: gen = self.config.generator # don't try running the generator if we specify an output file explicitly, @@ -442,10 +467,18 @@ def input_data(self) -> bytes: if self._generated is None: self._run_generator(gen, args=self.config.generator_args) assert self._generated is not None + # FIXME: generate into the MemoryIO. if self._generated[0]: - return self._generated[0] + memory = MemoryIO() + memory.write(self._generated[0]) + memory.seal() + return memory + # in file is optional - return self._normalize(self.problem.problem_data[self.config['in']]) if self.config['in'] else b'' + if self.config['in']: + return self.problem.problem_data.as_fd(self.config['in'], normalize=not self.has_binary_data) + else: + return MemoryIO(seal=True) def output_data(self) -> bytes: if self.config.out: @@ -482,13 +515,15 @@ def checker(self) -> partial: def free_data(self) -> None: self._generated = None + if self._input_data_fd: + self._input_data_fd.close() def __str__(self) -> str: return f'TestCase(in={self.config["in"]},out={self.config["out"]},points={self.config["points"]})' # FIXME(tbrindus): this is a hack working around the fact we can't pickle these fields, but we do need parts of # TestCase itself on the other end of the IPC. - _pickle_blacklist = ('_generated', 'config', 'problem') + _pickle_blacklist = ('_generated', 'config', 'problem', '_input_data_fd') def __getstate__(self) -> dict: k = {k: v for k, v in self.__dict__.items() if k not in self._pickle_blacklist} diff --git a/testsuite/shortest1/shortest1.py b/testsuite/shortest1/shortest1.py index a1ef8260d..e94a1914f 100644 --- a/testsuite/shortest1/shortest1.py +++ b/testsuite/shortest1/shortest1.py @@ -8,7 +8,7 @@ def check_result(self, case, result): result.result_flag &= ~Result.TLE & ~Result.RTE & ~Result.IR return CheckerResult(passed, min((9. / len(self.source)) ** 5 * case.points, case.points) if passed else 0) - def _interact_with_process(self, case, result, input): + def _interact_with_process(self, case, result): process = self._current_proc for handle in [process.stdin, process.stdout, process.stderr]: if handle: From 1bd00d70aac196a039e0c5d8dcc94ccb5a919e4a Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 30 Jan 2022 00:32:27 -0500 Subject: [PATCH 04/10] Allow passing memfd to bridged interactors --- dmoj/cptbox/isolate.py | 4 ++-- dmoj/cptbox/utils.py | 3 +++ dmoj/executors/base_executor.py | 9 ++++++--- dmoj/executors/shell_executor.py | 4 ++-- dmoj/graders/bridged.py | 8 ++++++-- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dmoj/cptbox/isolate.py b/dmoj/cptbox/isolate.py index 10506682f..1bcca842d 100644 --- a/dmoj/cptbox/isolate.py +++ b/dmoj/cptbox/isolate.py @@ -362,7 +362,7 @@ def _access_check(self, debugger: Debugger, file: str, fs_jail: FilesystemPolicy real = os.path.realpath(file) try: - same = normalized == real or os.path.samefile(projected, real) + same = normalized == real or real.startswith('/memfd:') or os.path.samefile(projected, real) except OSError: raise DeniedSyscall(ACCESS_ENOENT, f'Cannot stat, file: {file}, projected: {projected}, real: {real}') @@ -385,7 +385,7 @@ def _access_check(self, debugger: Debugger, file: str, fs_jail: FilesystemPolicy else: real = os.path.join('/proc/self', relpath) - if not fs_jail.check(real): + if not real.startswith('/memfd:') and not fs_jail.check(real): raise DeniedSyscall(ACCESS_EACCES, f'Denying {file}, real path {real}') def handle_kill(self, debugger: Debugger) -> None: diff --git a/dmoj/cptbox/utils.py b/dmoj/cptbox/utils.py index 9e5f0c30b..323271639 100644 --- a/dmoj/cptbox/utils.py +++ b/dmoj/cptbox/utils.py @@ -32,6 +32,9 @@ def seal(self) -> None: finally: os.close(new_fd) + def to_path(self) -> str: + return f'/proc/{os.getpid()}/fd/{self.fileno()}' + def to_bytes(self) -> bytes: try: with mmap.mmap(self.fileno(), 0, access=mmap.ACCESS_READ) as f: diff --git a/dmoj/executors/base_executor.py b/dmoj/executors/base_executor.py index e40fa4b90..25816e804 100644 --- a/dmoj/executors/base_executor.py +++ b/dmoj/executors/base_executor.py @@ -230,8 +230,11 @@ def _add_syscalls(self, sec: IsolateTracer, handlers: List[Union[str, Tuple[str, sec[getattr(syscalls, f'sys_{name}')] = handler return sec - def get_security(self, launch_kwargs=None) -> IsolateTracer: - sec = IsolateTracer(read_fs=self.get_fs(), write_fs=self.get_write_fs()) + def get_security(self, launch_kwargs=None, extra_fs=None) -> IsolateTracer: + read_fs = self.get_fs() + if extra_fs: + read_fs += extra_fs + sec = IsolateTracer(read_fs=read_fs, write_fs=self.get_write_fs()) return self._add_syscalls(sec, self.get_allowed_syscalls()) def get_fs(self) -> List[FilesystemAccessRule]: @@ -299,7 +302,7 @@ def launch(self, *args, **kwargs) -> TracedPopen: return TracedPopen( [utf8bytes(a) for a in self.get_cmdline(**kwargs) + list(args)], executable=utf8bytes(executable), - security=self.get_security(launch_kwargs=kwargs), + security=self.get_security(launch_kwargs=kwargs, extra_fs=kwargs.get('extra_fs')), address_grace=self.get_address_grace(), data_grace=self.data_grace, personality=self.personality, diff --git a/dmoj/executors/shell_executor.py b/dmoj/executors/shell_executor.py index f7ec8eb89..d4e5f4e57 100644 --- a/dmoj/executors/shell_executor.py +++ b/dmoj/executors/shell_executor.py @@ -22,10 +22,10 @@ def get_fs(self): def get_allowed_syscalls(self): return super().get_allowed_syscalls() + ['fork', 'waitpid', 'wait4'] - def get_security(self, launch_kwargs=None): + def get_security(self, launch_kwargs=None, extra_fs=None): from dmoj.cptbox.syscalls import sys_execve, sys_access, sys_eaccess - sec = super().get_security(launch_kwargs) + sec = super().get_security(launch_kwargs=launch_kwargs, extra_fs=extra_fs) allowed = set(self.get_allowed_exec()) def handle_execve(debugger) -> None: diff --git a/dmoj/graders/bridged.py b/dmoj/graders/bridged.py index f3a4bca66..e64f2b683 100644 --- a/dmoj/graders/bridged.py +++ b/dmoj/graders/bridged.py @@ -6,6 +6,7 @@ from dmoj.checkers import CheckerOutput from dmoj.config import ConfigNode from dmoj.contrib import contrib_modules +from dmoj.cptbox.filesystem_policies import ExactFile from dmoj.error import CompileError, InternalError from dmoj.executors.base_executor import BaseExecutor from dmoj.graders.standard import StandardGrader @@ -85,14 +86,16 @@ def _interact_with_process(self, case: TestCase, result: Result) -> bytes: or contrib_modules[self.contrib_type].ContribModule.get_interactor_args_format_string() ) - with mktemp(input) as input_file, mktemp(judge_output) as answer_file: + with mktemp(judge_output) as answer_file: + input_path = case.input_data_fd().to_path() + # TODO(@kirito): testlib.h expects a file they can write to, # but we currently don't have a sane way to allow this. # Thus we pass /dev/null for now so testlib interactors will still # work, albeit with diminished capabilities interactor_args = shlex.split( args_format_string.format( - input_file=shlex.quote(input_file.name), + input_file=shlex.quote(input_path), output_file=shlex.quote(os.devnull), answer_file=shlex.quote(answer_file.name), ) @@ -104,6 +107,7 @@ def _interact_with_process(self, case: TestCase, result: Result) -> bytes: stdin=self._interactor_stdin_pipe, stdout=self._interactor_stdout_pipe, stderr=subprocess.PIPE, + extra_fs=[ExactFile(input_path)], ) os.close(self._interactor_stdin_pipe) From 0a925fd72a7ac5b31f21cf3c96b0d0542063995b Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 30 Jan 2022 01:19:44 -0500 Subject: [PATCH 05/10] Use LazyBytes to avoid converting memfd to bytes unless needed --- dmoj/cptbox/_cptbox.pyi | 3 ++ dmoj/cptbox/_cptbox.pyx | 9 ++++ dmoj/cptbox/lazy_bytes.py | 88 +++++++++++++++++++++++++++++++++++++++ dmoj/graders/standard.py | 3 +- 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 dmoj/cptbox/lazy_bytes.py diff --git a/dmoj/cptbox/_cptbox.pyi b/dmoj/cptbox/_cptbox.pyi index 7a52be6bf..d7124f890 100644 --- a/dmoj/cptbox/_cptbox.pyi +++ b/dmoj/cptbox/_cptbox.pyi @@ -102,3 +102,6 @@ bsd_get_proc_fdno: Callable[[int, int], str] memory_fd_create: Callable[[], int] memory_fd_seal: Callable[[int], None] + +class BufferProxy: + def _get_real_buffer(self): ... diff --git a/dmoj/cptbox/_cptbox.pyx b/dmoj/cptbox/_cptbox.pyx index 3f296ea2c..03c9937d6 100644 --- a/dmoj/cptbox/_cptbox.pyx +++ b/dmoj/cptbox/_cptbox.pyx @@ -1,5 +1,6 @@ # cython: language_level=3 from cpython.exc cimport PyErr_NoMemory, PyErr_SetFromErrno +from cpython.buffer cimport PyObject_GetBuffer from cpython.bytes cimport PyBytes_AsString, PyBytes_FromStringAndSize from libc.stdio cimport FILE, fopen, fclose, fgets, sprintf from libc.stdlib cimport malloc, free, strtoul @@ -600,3 +601,11 @@ cdef class Process: if not self._exited: return None return self._exitcode + + +cdef class BufferProxy: + def _get_real_buffer(self): + raise NotImplementedError + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyObject_GetBuffer(self._get_real_buffer(), buffer, flags) diff --git a/dmoj/cptbox/lazy_bytes.py b/dmoj/cptbox/lazy_bytes.py new file mode 100644 index 000000000..b6b3cd8f7 --- /dev/null +++ b/dmoj/cptbox/lazy_bytes.py @@ -0,0 +1,88 @@ +# Based off https://github.com/django/django/blob/main/django/utils/functional.py, licensed under 3-clause BSD. +from functools import total_ordering + +from dmoj.cptbox._cptbox import BufferProxy + +_SENTINEL = object() + + +@total_ordering +class LazyBytes(BufferProxy): + """ + Encapsulate a function call and act as a proxy for methods that are + called on the result of that function. The function is not evaluated + until one of the methods on the result is called. + """ + + def __init__(self, func): + self.__func = func + self.__value = _SENTINEL + + def __get_value(self): + if self.__value is _SENTINEL: + self.__value = self.__func() + return self.__value + + @classmethod + def _create_promise(cls, method_name): + # Builds a wrapper around some magic method + def wrapper(self, *args, **kw): + # Automatically triggers the evaluation of a lazy value and + # applies the given magic method of the result type. + res = self.__get_value() + return getattr(res, method_name)(*args, **kw) + + return wrapper + + def __cast(self): + return bytes(self.__get_value()) + + def _get_real_buffer(self): + return self.__cast() + + def __bytes__(self): + return self.__cast() + + def __repr__(self): + return repr(self.__cast()) + + def __str__(self): + return str(self.__cast()) + + def __eq__(self, other): + if isinstance(other, LazyBytes): + other = other.__cast() + return self.__cast() == other + + def __lt__(self, other): + if isinstance(other, LazyBytes): + other = other.__cast() + return self.__cast() < other + + def __hash__(self): + return hash(self.__cast()) + + def __mod__(self, rhs): + return self.__cast() % rhs + + def __add__(self, other): + return self.__cast() + other + + def __radd__(self, other): + return other + self.__cast() + + def __deepcopy__(self, memo): + # Instances of this class are effectively immutable. It's just a + # collection of functions. So we don't need to do anything + # complicated for copying. + memo[id(self)] = self + return self + + +for type_ in bytes.mro(): + for method_name in type_.__dict__: + # All __promise__ return the same wrapper method, they + # look up the correct implementation when called. + if hasattr(LazyBytes, method_name): + continue + setattr(LazyBytes, method_name, LazyBytes._create_promise(method_name)) diff --git a/dmoj/graders/standard.py b/dmoj/graders/standard.py index 3729f3400..013418b15 100644 --- a/dmoj/graders/standard.py +++ b/dmoj/graders/standard.py @@ -3,6 +3,7 @@ from dmoj.checkers import CheckerOutput from dmoj.cptbox import TracedPopen +from dmoj.cptbox.lazy_bytes import LazyBytes from dmoj.error import OutputLimitExceeded from dmoj.executors import executors from dmoj.executors.base_executor import BaseExecutor @@ -60,7 +61,7 @@ def check_result(self, case: TestCase, result: Result) -> CheckerOutput: result.proc_output, case.output_data(), submission_source=self.source, - judge_input=case.input_data(), + judge_input=LazyBytes(case.input_data), point_value=case.points, case_position=case.position, batch=case.batch, From 4bbe86ac7e8acc42613aa68344d3d8d3a057cd7a Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 30 Jan 2022 01:34:11 -0500 Subject: [PATCH 06/10] Fix memfd_create emulation bug on FreeBSD --- dmoj/cptbox/helper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dmoj/cptbox/helper.cpp b/dmoj/cptbox/helper.cpp index ef0613ade..cc3ac89f9 100644 --- a/dmoj/cptbox/helper.cpp +++ b/dmoj/cptbox/helper.cpp @@ -332,7 +332,7 @@ int memory_fd_create(void) { #ifdef __FreeBSD__ char filename[] = "/tmp/cptbox-memoryfd-XXXXXXXX"; int fd = mkstemp(filename); - if (fd > 0) + if (fd >= 0) unlink(filename); return fd; #else From 20ed2d1c56d474982e06a08dc4c68fba9bf9fbdf Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 30 Jan 2022 15:18:06 -0500 Subject: [PATCH 07/10] Allow passing memfd to bridged checkers --- dmoj/checkers/bridged.py | 16 ++++++++++++---- dmoj/graders/standard.py | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dmoj/checkers/bridged.py b/dmoj/checkers/bridged.py index 81c0a53cd..ba5cb00eb 100644 --- a/dmoj/checkers/bridged.py +++ b/dmoj/checkers/bridged.py @@ -3,6 +3,7 @@ import subprocess from dmoj.contrib import contrib_modules +from dmoj.cptbox.filesystem_policies import ExactFile from dmoj.error import InternalError from dmoj.judgeenv import env, get_problem_root from dmoj.result import CheckerResult @@ -25,10 +26,10 @@ def get_executor(problem_id, files, flags, lang, compiler_time_limit): def check( process_output, judge_output, - judge_input, problem_id, files, lang, + case, time_limit=env['generator_time_limit'], memory_limit=env['generator_memory_limit'], compiler_time_limit=env['generator_compiler_limit'], @@ -46,16 +47,23 @@ def check( args_format_string = args_format_string or contrib_modules[type].ContribModule.get_checker_args_format_string() - with mktemp(judge_input) as input_file, mktemp(process_output) as output_file, mktemp(judge_output) as answer_file: + with mktemp(process_output) as output_file, mktemp(judge_output) as answer_file: + input_path = case.input_data_fd().to_path() + checker_args = shlex.split( args_format_string.format( - input_file=shlex.quote(input_file.name), + input_file=shlex.quote(input_path), output_file=shlex.quote(output_file.name), answer_file=shlex.quote(answer_file.name), ) ) process = executor.launch( - *checker_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, memory=memory_limit, time=time_limit + *checker_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + memory=memory_limit, + time=time_limit, + extra_fs=[ExactFile(input_path)], ) proc_output, error = process.communicate() diff --git a/dmoj/graders/standard.py b/dmoj/graders/standard.py index 013418b15..200247a66 100644 --- a/dmoj/graders/standard.py +++ b/dmoj/graders/standard.py @@ -69,6 +69,7 @@ def check_result(self, case: TestCase, result: Result) -> CheckerOutput: binary_data=case.has_binary_data, execution_time=result.execution_time, problem_id=self.problem.id, + case=case, result=result, ) except UnicodeDecodeError: From b0ca2e4d93fac44057dc7c907f8d6ef5dd5dd6b9 Mon Sep 17 00:00:00 2001 From: Quantum Date: Sun, 30 Jan 2022 15:55:04 -0500 Subject: [PATCH 08/10] Implement graceful memfd fallback for FreeBSD --- dmoj/cptbox/_cptbox.pyi | 4 +- dmoj/cptbox/_cptbox.pyx | 12 ++--- dmoj/cptbox/helper.cpp | 11 ++-- dmoj/cptbox/helper.h | 4 +- dmoj/cptbox/utils.py | 113 ++++++++++++++++++++++++++++++++-------- dmoj/problem.py | 10 ++-- 6 files changed, 111 insertions(+), 43 deletions(-) diff --git a/dmoj/cptbox/_cptbox.pyi b/dmoj/cptbox/_cptbox.pyi index d7124f890..362aad3dd 100644 --- a/dmoj/cptbox/_cptbox.pyi +++ b/dmoj/cptbox/_cptbox.pyi @@ -100,8 +100,8 @@ AT_FDCWD: int bsd_get_proc_cwd: Callable[[int], str] bsd_get_proc_fdno: Callable[[int, int], str] -memory_fd_create: Callable[[], int] -memory_fd_seal: Callable[[int], None] +memfd_create: Callable[[], int] +memfd_seal: Callable[[int], None] class BufferProxy: def _get_real_buffer(self): ... diff --git a/dmoj/cptbox/_cptbox.pyx b/dmoj/cptbox/_cptbox.pyx index 03c9937d6..4ee3a80b3 100644 --- a/dmoj/cptbox/_cptbox.pyx +++ b/dmoj/cptbox/_cptbox.pyx @@ -134,8 +134,8 @@ cdef extern from 'helper.h' nogil: PTBOX_SPAWN_FAIL_EXECVE PTBOX_SPAWN_FAIL_SETAFFINITY - int _memory_fd_create "memory_fd_create"() - int _memory_fd_seal "memory_fd_seal"(int fd) + int cptbox_memfd_create() + int cptbox_memfd_seal(int fd) cdef extern from 'fcntl.h' nogil: @@ -215,14 +215,14 @@ def bsd_get_proc_fdno(pid_t pid, int fd): free(buf) return res -def memory_fd_create(): - cdef int fd = _memory_fd_create() +def memfd_create(): + cdef int fd = cptbox_memfd_create() if fd < 0: PyErr_SetFromErrno(OSError) return fd -def memory_fd_seal(int fd): - cdef int result = _memory_fd_seal(fd) +def memfd_seal(int fd): + cdef int result = cptbox_memfd_seal(fd) if result == -1: PyErr_SetFromErrno(OSError) diff --git a/dmoj/cptbox/helper.cpp b/dmoj/cptbox/helper.cpp index cc3ac89f9..fd2bf45c4 100644 --- a/dmoj/cptbox/helper.cpp +++ b/dmoj/cptbox/helper.cpp @@ -328,19 +328,16 @@ char *bsd_get_proc_fdno(pid_t pid, int fdno) { return bsd_get_proc_fd(pid, 0, fdno); } -int memory_fd_create(void) { +int cptbox_memfd_create(void) { #ifdef __FreeBSD__ - char filename[] = "/tmp/cptbox-memoryfd-XXXXXXXX"; - int fd = mkstemp(filename); - if (fd >= 0) - unlink(filename); - return fd; + errno = ENOSYS; + return -1; #else return memfd_create("cptbox memory_fd", MFD_ALLOW_SEALING); #endif } -int memory_fd_seal(int fd) { +int cptbox_memfd_seal(int fd) { #ifdef __FreeBSD__ errno = ENOSYS; return -1; diff --git a/dmoj/cptbox/helper.h b/dmoj/cptbox/helper.h index a22e06ec6..ec96f9392 100644 --- a/dmoj/cptbox/helper.h +++ b/dmoj/cptbox/helper.h @@ -35,7 +35,7 @@ int cptbox_child_run(const struct child_config *config); char *bsd_get_proc_cwd(pid_t pid); char *bsd_get_proc_fdno(pid_t pid, int fdno); -int memory_fd_create(void); -int memory_fd_seal(int fd); +int cptbox_memfd_create(void); +int cptbox_memfd_seal(int fd); #endif diff --git a/dmoj/cptbox/utils.py b/dmoj/cptbox/utils.py index 323271639..133a0ae60 100644 --- a/dmoj/cptbox/utils.py +++ b/dmoj/cptbox/utils.py @@ -1,39 +1,42 @@ -import errno import io import mmap import os +from abc import ABCMeta, abstractmethod +from tempfile import NamedTemporaryFile, TemporaryFile from typing import Optional -from dmoj.cptbox._cptbox import memory_fd_create, memory_fd_seal +from dmoj.cptbox._cptbox import memfd_create, memfd_seal -class MemoryIO(io.FileIO): - def __init__(self, prefill: Optional[bytes] = None, seal=False) -> None: - super().__init__(memory_fd_create(), 'r+') +def _make_fd_readonly(fd): + new_fd = os.open(f'/proc/self/fd/{fd}', os.O_RDONLY) + try: + os.dup2(new_fd, fd) + finally: + os.close(new_fd) + + +class MmapableIO(io.FileIO, metaclass=ABCMeta): + def __init__(self, fd, *, prefill: Optional[bytes] = None, seal=False) -> None: + super().__init__(fd, 'r+') + if prefill: self.write(prefill) if seal: self.seal() - def seal(self) -> None: - fd = self.fileno() - try: - memory_fd_seal(fd) - except OSError as e: - if e.errno == errno.ENOSYS: - # FreeBSD - self.seek(0, os.SEEK_SET) - return - raise + @classmethod + @abstractmethod + def usable_with_name(cls) -> bool: + ... - new_fd = os.open(f'/proc/self/fd/{fd}', os.O_RDONLY) - try: - os.dup2(new_fd, fd) - finally: - os.close(new_fd) + @abstractmethod + def seal(self) -> None: + ... + @abstractmethod def to_path(self) -> str: - return f'/proc/{os.getpid()}/fd/{self.fileno()}' + ... def to_bytes(self) -> bytes: try: @@ -43,3 +46,71 @@ def to_bytes(self) -> bytes: if e.args[0] == 'cannot mmap an empty file': return b'' raise + + +class NamedFileIO(MmapableIO): + _name: str + + def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: + with NamedTemporaryFile(delete=False) as f: + self._name = f.name + super().__init__(os.dup(f.fileno()), prefill=prefill, seal=seal) + + def seal(self) -> None: + self.seek(0, os.SEEK_SET) + + def close(self) -> None: + super().close() + os.unlink(self._name) + + def to_path(self) -> str: + return self._name + + @classmethod + def usable_with_name(cls): + return True + + +class UnnamedFileIO(MmapableIO): + def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: + with TemporaryFile() as f: + super().__init__(os.dup(f.fileno()), prefill=prefill, seal=seal) + + def seal(self) -> None: + self.seek(0, os.SEEK_SET) + _make_fd_readonly(self.fileno()) + + def to_path(self) -> str: + return f'/proc/{os.getpid()}/fd/{self.fileno()}' + + @classmethod + def usable_with_name(cls): + with cls() as f: + return os.path.exists(f.to_path()) + + +class MemfdIO(MmapableIO): + def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: + super().__init__(memfd_create(), prefill=prefill, seal=seal) + + def seal(self) -> None: + fd = self.fileno() + memfd_seal(fd) + _make_fd_readonly(fd) + + def to_path(self) -> str: + return f'/proc/{os.getpid()}/fd/{self.fileno()}' + + @classmethod + def usable_with_name(cls): + try: + with cls() as f: + return os.path.exists(f.to_path()) + except OSError: + return False + + +# Try to use memfd if possible, otherwise fallback to unlinked temporary files +# (UnnamedFileIO). On FreeBSD and some other systems, /proc/[pid]/fd doesn't +# exist, so to_path() will not work. We fall back to NamedFileIO in that case. +MemoryIO = next((i for i in (MemfdIO, UnnamedFileIO, NamedFileIO) if i.usable_with_name())) diff --git a/dmoj/problem.py b/dmoj/problem.py index e84e931b5..407e136da 100644 --- a/dmoj/problem.py +++ b/dmoj/problem.py @@ -30,7 +30,7 @@ from dmoj import checkers from dmoj.checkers import Checker from dmoj.config import ConfigNode, InvalidInitException -from dmoj.cptbox.utils import MemoryIO +from dmoj.cptbox.utils import MemoryIO, MmapableIO from dmoj.judgeenv import env, get_problem_root from dmoj.utils.helper_files import compile_with_auxiliary_files, parse_helper_file_error from dmoj.utils.module import load_module_from_file @@ -275,7 +275,7 @@ def open(self, key: str): return self.archive.open(zipinfo) raise KeyError('file "%s" could not be found in "%s"' % (key, self.problem_root_dir)) - def as_fd(self, key: str, normalize: bool = False) -> MemoryIO: + def as_fd(self, key: str, normalize: bool = False) -> MmapableIO: memory = MemoryIO() with self.open(key) as f: if normalize: @@ -344,7 +344,7 @@ class TestCase(BaseTestCase): batch: int output_prefix_length: int has_binary_data: bool - _input_data_fd: Optional[MemoryIO] + _input_data_fd: Optional[MmapableIO] _generated: Optional[Tuple[bytes, bytes]] def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Problem): @@ -451,14 +451,14 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st def input_data(self) -> bytes: return self.input_data_fd().to_bytes() - def input_data_fd(self) -> MemoryIO: + def input_data_fd(self) -> MmapableIO: if self._input_data_fd: return self._input_data_fd result = self._input_data_fd = self._make_input_data_fd() return result - def _make_input_data_fd(self) -> MemoryIO: + def _make_input_data_fd(self) -> MmapableIO: gen = self.config.generator # don't try running the generator if we specify an output file explicitly, From 6ad987d06a92b5b791cca9710636dc0661859ac3 Mon Sep 17 00:00:00 2001 From: Quantum Date: Mon, 30 Dec 2024 00:03:48 -0500 Subject: [PATCH 09/10] Implement generator writing to memfd --- dmoj/problem.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dmoj/problem.py b/dmoj/problem.py index 407e136da..0ecca2934 100644 --- a/dmoj/problem.py +++ b/dmoj/problem.py @@ -345,7 +345,7 @@ class TestCase(BaseTestCase): output_prefix_length: int has_binary_data: bool _input_data_fd: Optional[MmapableIO] - _generated: Optional[Tuple[bytes, bytes]] + _generated: Optional[Tuple[MmapableIO, bytes]] def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Problem): self.position = count @@ -425,6 +425,10 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st assert args is not None args = map(str, args) + input_io = MemoryIO() + # Enable generators to write any size files. + executor.fsize = -1 + # setting large buffers is really important, because otherwise stderr is unbuffered # and the generator begins calling into cptbox Python code really frequently proc = executor.launch( @@ -432,7 +436,7 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st time=time_limit, memory=memory_limit, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, + stdout=input_io, stderr=subprocess.PIPE, stderr_buffer_size=65536, stdout_buffer_size=65536, @@ -443,8 +447,9 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st except KeyError: input = None - stdout, stderr = proc.unsafe_communicate(input) - self._generated = self._normalize(stdout), self._normalize(stderr) + _, stderr = proc.unsafe_communicate(input) + input_io.seal() + self._generated = input_io, self._normalize(stderr) parse_helper_file_error(proc, executor, 'generator', stderr, time_limit, memory_limit) @@ -467,12 +472,8 @@ def _make_input_data_fd(self) -> MmapableIO: if self._generated is None: self._run_generator(gen, args=self.config.generator_args) assert self._generated is not None - # FIXME: generate into the MemoryIO. if self._generated[0]: - memory = MemoryIO() - memory.write(self._generated[0]) - memory.seal() - return memory + return self._generated[0] # in file is optional if self.config['in']: From 315ede589ba5ed02f807ba0c8f073ee4c1288b9a Mon Sep 17 00:00:00 2001 From: Quantum Date: Mon, 30 Dec 2024 00:08:18 -0500 Subject: [PATCH 10/10] Rename MemoryIO *_fd to *_io --- dmoj/checkers/bridged.py | 2 +- dmoj/graders/bridged.py | 2 +- dmoj/graders/standard.py | 2 +- dmoj/problem.py | 22 +++++++++++----------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dmoj/checkers/bridged.py b/dmoj/checkers/bridged.py index ba5cb00eb..7da8e169f 100644 --- a/dmoj/checkers/bridged.py +++ b/dmoj/checkers/bridged.py @@ -48,7 +48,7 @@ def check( args_format_string = args_format_string or contrib_modules[type].ContribModule.get_checker_args_format_string() with mktemp(process_output) as output_file, mktemp(judge_output) as answer_file: - input_path = case.input_data_fd().to_path() + input_path = case.input_data_io().to_path() checker_args = shlex.split( args_format_string.format( diff --git a/dmoj/graders/bridged.py b/dmoj/graders/bridged.py index e64f2b683..78341ce51 100644 --- a/dmoj/graders/bridged.py +++ b/dmoj/graders/bridged.py @@ -87,7 +87,7 @@ def _interact_with_process(self, case: TestCase, result: Result) -> bytes: ) with mktemp(judge_output) as answer_file: - input_path = case.input_data_fd().to_path() + input_path = case.input_data_io().to_path() # TODO(@kirito): testlib.h expects a file they can write to, # but we currently don't have a sane way to allow this. diff --git a/dmoj/graders/standard.py b/dmoj/graders/standard.py index 200247a66..d636ee544 100644 --- a/dmoj/graders/standard.py +++ b/dmoj/graders/standard.py @@ -18,7 +18,7 @@ class StandardGrader(BaseGrader): def grade(self, case: TestCase) -> Result: result = Result(case) - input_file = case.input_data_fd() + input_file = case.input_data_io() self._launch_process(case, input_file) diff --git a/dmoj/problem.py b/dmoj/problem.py index 0ecca2934..fdc8c257a 100644 --- a/dmoj/problem.py +++ b/dmoj/problem.py @@ -344,7 +344,7 @@ class TestCase(BaseTestCase): batch: int output_prefix_length: int has_binary_data: bool - _input_data_fd: Optional[MmapableIO] + _input_data_io: Optional[MmapableIO] _generated: Optional[Tuple[MmapableIO, bytes]] def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Problem): @@ -356,7 +356,7 @@ def __init__(self, count: int, batch_no: int, config: ConfigNode, problem: Probl self.output_prefix_length = config.output_prefix_length self.has_binary_data = config.binary_data self._generated = None - self._input_data_fd = None + self._input_data_io = None def _normalize(self, data: bytes) -> bytes: # Perhaps the correct answer may be 'no output', in which case it'll be @@ -454,16 +454,16 @@ def _run_generator(self, gen: Union[str, ConfigNode], args: Optional[Iterable[st parse_helper_file_error(proc, executor, 'generator', stderr, time_limit, memory_limit) def input_data(self) -> bytes: - return self.input_data_fd().to_bytes() + return self.input_data_io().to_bytes() - def input_data_fd(self) -> MmapableIO: - if self._input_data_fd: - return self._input_data_fd + def input_data_io(self) -> MmapableIO: + if self._input_data_io: + return self._input_data_io - result = self._input_data_fd = self._make_input_data_fd() + result = self._input_data_io = self._make_input_data_io() return result - def _make_input_data_fd(self) -> MmapableIO: + def _make_input_data_io(self) -> MmapableIO: gen = self.config.generator # don't try running the generator if we specify an output file explicitly, @@ -516,15 +516,15 @@ def checker(self) -> partial: def free_data(self) -> None: self._generated = None - if self._input_data_fd: - self._input_data_fd.close() + if self._input_data_io: + self._input_data_io.close() def __str__(self) -> str: return f'TestCase(in={self.config["in"]},out={self.config["out"]},points={self.config["points"]})' # FIXME(tbrindus): this is a hack working around the fact we can't pickle these fields, but we do need parts of # TestCase itself on the other end of the IPC. - _pickle_blacklist = ('_generated', 'config', 'problem', '_input_data_fd') + _pickle_blacklist = ('_generated', 'config', 'problem', '_input_data_io') def __getstate__(self) -> dict: k = {k: v for k, v in self.__dict__.items() if k not in self._pickle_blacklist}