From 2918212ef41a5a378a506b763b0283b58402db1e Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Thu, 25 Apr 2024 21:10:07 +0300 Subject: [PATCH 1/2] Detect postal code after a line break --- pyap/source_US/data.py | 2 +- tests/test_parser_us.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index ad2040f..b818b0d 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -1172,7 +1172,7 @@ def _indexed_region1(idx: Optional[str] = None): _postal_code = f"""(?:{part_div}|\-)? {postal_code}""" return rf""" (?:{_indexed_region1("a")}?{_postal_code}{_indexed_region1("b")}? - |{_indexed_region1("c")}(?![-,.\ A-Za-z]{{0,10}}{postal_code_re})) + |{_indexed_region1("c")}(?![-,.\sA-Za-z]{{0,10}}{postal_code_re})) """ diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index 58bb663..5ff8554 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -476,6 +476,7 @@ def test_full_street_positive(input, expected): [ # positive assertions ("P.O. BOX 10323 PH (205) 595-3511\nBIRMINGHAM, AL 35202", True), + ("25 HARBOR PARK DRIVE\nPORT WASHINGTON\nNY 11050", True), ("1100 VIRGINIA DR\nFORT WASHINGTON, PA, 19034", True), ("3602 HIGHPOINT\nSAN ANTONIO TX78217", True), ("8025 BLACK HORSE\nSTE 300\nPLEASANTVILLE NJ 08232", True), From b005cf1ede74d4135a63d33db2081ef7d61a8aba Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Thu, 25 Apr 2024 21:22:26 +0300 Subject: [PATCH 2/2] Detect country before postal code --- pyap/source_US/data.py | 4 ++-- tests/test_parser_us.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index b818b0d..812bff1 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -1154,7 +1154,7 @@ def make_region1(idx: Optional[str] = None): country = r""" (?: - [Uu]\.?[Ss]\.?[Aa]\.?| + [Uu]\.?[Ss]\.?(?:[Aa]\.?)?| [Uu][Nn][Ii][Tt][Ee][Dd]\ [Ss][Tt][Aa][Tt][Ee][Ss](?:\ [Oo][Ff]\ [Aa][Mm][Ee][Rr][Ii][Cc][Aa])? ) """ @@ -1171,7 +1171,7 @@ def _indexed_region1(idx: Optional[str] = None): _postal_code = f"""(?:{part_div}|\-)? {postal_code}""" return rf""" - (?:{_indexed_region1("a")}?{_postal_code}{_indexed_region1("b")}? + (?:{_indexed_region1("a")}?(?:{part_div}{country})?{_postal_code}{_indexed_region1("b")}? |{_indexed_region1("c")}(?![-,.\sA-Za-z]{{0,10}}{postal_code_re})) """ diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index 5ff8554..4fc26bd 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -475,8 +475,10 @@ def test_full_street_positive(input, expected): "input,expected", [ # positive assertions + ("2755 CARPENTER RD SUITE 1W\nANN ARBOR, MI, US, 48108", True), ("P.O. BOX 10323 PH (205) 595-3511\nBIRMINGHAM, AL 35202", True), ("25 HARBOR PARK DRIVE\nPORT WASHINGTON\nNY 11050", True), + ("222 W. Las Colinas Blvd\nSuite 900N\nIrving, Texas, USA 75039-5421", True), ("1100 VIRGINIA DR\nFORT WASHINGTON, PA, 19034", True), ("3602 HIGHPOINT\nSAN ANTONIO TX78217", True), ("8025 BLACK HORSE\nSTE 300\nPLEASANTVILLE NJ 08232", True),