From 428200321d7c4dc5bdbb961c9fafcc4d038b2971 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 16 Mar 2024 16:31:44 +0100 Subject: [PATCH] Suppress "Compile called before Add" in re2.Filter When compiling an empty set, ``FilteredRE2::Compile`` logs a warning to stderr which can not be suppressed (google/re2#485). Replace `re2.Filter` by a null object if the corresponding matchers list is empty: not only do we need to skip `Filter.Compile` to suppress the warning message, we need to skip `Filter.Match` or the program will segfault (google/re2#484). Using a null object seems safer and more reliable than adding conditionals, even if it requires more code and reindenting half the file. Doing this also seems safer than my first instinct of trying to use low-level fd redirection: fd redirection suffers from race conditions[^thread] and could suffer from other cross-platform compatibility issues (e.g. does every python-supported OS have stderr on fd 2 and correctly supports dup, dup2, and close?) [^thread]: AFAIK CPython does not provide a python-level GIL-pin feature (even less so with the GILectomy plans), so we have no way to prevent context-switching and any message sent to stderr by sibling threads would be lost --- pyproject.toml | 3 ++- src/ua_parser/re2.py | 50 ++++++++++++++++++++++++++++---------------- tests/test_re2.py | 13 ++++++++++++ 3 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 tests/test_re2.py diff --git a/pyproject.toml b/pyproject.toml index 47452ec..84ce896 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,8 @@ ignore = [ ] [tool.ruff.lint.isort] -classes = ["LRU", "OS"] +classes = ["OS"] +known-first-party = ["ua_parser"] combine-as-imports = true [tool.mypy] diff --git a/src/ua_parser/re2.py b/src/ua_parser/re2.py index c8cdd0b..268701d 100644 --- a/src/ua_parser/re2.py +++ b/src/ua_parser/re2.py @@ -16,6 +16,11 @@ ) +class DummyFilter: + def Match(self, _: str) -> None: + pass + + class Resolver: ua: re2.Filter user_agent_matchers: List[Matcher[UserAgent]] @@ -30,26 +35,35 @@ def __init__( ) -> None: self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers - self.ua = re2.Filter() - for u in self.user_agent_matchers: - self.ua.Add(u.pattern) - self.ua.Compile() + if self.user_agent_matchers: + self.ua = re2.Filter() + for u in self.user_agent_matchers: + self.ua.Add(u.pattern) + self.ua.Compile() + else: + self.ua = DummyFilter() - self.os = re2.Filter() - for o in self.os_matchers: - self.os.Add(o.pattern) - self.os.Compile() + if self.os_matchers: + self.os = re2.Filter() + for o in self.os_matchers: + self.os.Add(o.pattern) + self.os.Compile() + else: + self.os = DummyFilter() - self.devices = re2.Filter() - for d in self.device_matchers: - # Prepend the i global flag if IGNORECASE is set. Assumes - # no pattern uses global flags, but since they're not - # supported in JS that seems safe. - if d.flags & re.IGNORECASE: - self.devices.Add("(?i)" + d.pattern) - else: - self.devices.Add(d.pattern) - self.devices.Compile() + if self.device_matchers: + self.devices = re2.Filter() + for d in self.device_matchers: + # Prepend the i global flag if IGNORECASE is set. Assumes + # no pattern uses global flags, but since they're not + # supported in JS that seems safe. + if d.flags & re.IGNORECASE: + self.devices.Add("(?i)" + d.pattern) + else: + self.devices.Add(d.pattern) + self.devices.Compile() + else: + self.devices = DummyFilter() def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult: user_agent = os = device = None diff --git a/tests/test_re2.py b/tests/test_re2.py new file mode 100644 index 0000000..88f6dfe --- /dev/null +++ b/tests/test_re2.py @@ -0,0 +1,13 @@ +import pytest # type: ignore + +from ua_parser import Domain, PartialParseResult + +re2 = pytest.importorskip("ua_parser.re2") + + +def test_empty(capfd: pytest.CaptureFixture[str]) -> None: + r = re2.Resolver(([], [], [])) + assert r("", Domain.ALL) == PartialParseResult(Domain.ALL, None, None, None, "") + out, err = capfd.readouterr() + assert out == "" + assert err == ""