From b005cf1ede74d4135a63d33db2081ef7d61a8aba Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Thu, 25 Apr 2024 21:22:26 +0300 Subject: [PATCH] Detect country before postal code --- pyap/source_US/data.py | 4 ++-- tests/test_parser_us.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index b818b0d..812bff1 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -1154,7 +1154,7 @@ def make_region1(idx: Optional[str] = None): country = r""" (?: - [Uu]\.?[Ss]\.?[Aa]\.?| + [Uu]\.?[Ss]\.?(?:[Aa]\.?)?| [Uu][Nn][Ii][Tt][Ee][Dd]\ [Ss][Tt][Aa][Tt][Ee][Ss](?:\ [Oo][Ff]\ [Aa][Mm][Ee][Rr][Ii][Cc][Aa])? ) """ @@ -1171,7 +1171,7 @@ def _indexed_region1(idx: Optional[str] = None): _postal_code = f"""(?:{part_div}|\-)? {postal_code}""" return rf""" - (?:{_indexed_region1("a")}?{_postal_code}{_indexed_region1("b")}? + (?:{_indexed_region1("a")}?(?:{part_div}{country})?{_postal_code}{_indexed_region1("b")}? |{_indexed_region1("c")}(?![-,.\sA-Za-z]{{0,10}}{postal_code_re})) """ diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index 5ff8554..4fc26bd 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -475,8 +475,10 @@ def test_full_street_positive(input, expected): "input,expected", [ # positive assertions + ("2755 CARPENTER RD SUITE 1W\nANN ARBOR, MI, US, 48108", True), ("P.O. BOX 10323 PH (205) 595-3511\nBIRMINGHAM, AL 35202", True), ("25 HARBOR PARK DRIVE\nPORT WASHINGTON\nNY 11050", True), + ("222 W. Las Colinas Blvd\nSuite 900N\nIrving, Texas, USA 75039-5421", True), ("1100 VIRGINIA DR\nFORT WASHINGTON, PA, 19034", True), ("3602 HIGHPOINT\nSAN ANTONIO TX78217", True), ("8025 BLACK HORSE\nSTE 300\nPLEASANTVILLE NJ 08232", True),