From 16ba3b4a0d3c1ac2e1b7321560fd4c4b605f34b0 Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Tue, 23 Apr 2024 15:35:14 +0300 Subject: [PATCH] Support inverse occupancy, post-dir in type-first street name --- pyap/source_US/data.py | 12 +++++++++--- tests/test_parser_us.py | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index 84f0128..2c19b35 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -769,7 +769,7 @@ def str_list_to_upper_lower_regex(str_list: List[str]) -> str: street_type_leading_list = ["Camino", "El\ Camino"] -def street_type_list_to_regex(street_type_list): +def street_type_list_to_regex(street_type_list: list[str]) -> str: """Converts a list of street types into a regex""" street_types = str_list_to_upper_lower_regex(street_type_list) @@ -803,7 +803,10 @@ def street_type_list_to_regex(street_type_list): (?: (?:{street_name_a}{space_div}{street_type_a}) | - (?:{street_type_b}{space_div}{street_name_b}) + (?: + (?:{post_direction_re}{space_div})? + {street_type_b}{space_div}{street_name_b} + ) ) """.format( space_div=space_div, @@ -811,6 +814,7 @@ def street_type_list_to_regex(street_type_list): street_type_a=street_type_extended, street_type_b=rf"(?P{street_types_leading_re})", street_name_b=rf"(?P{street_name_one_word_re})", + post_direction_re=post_direction_re, ) floor_indic = r""" @@ -893,6 +897,8 @@ def street_type_list_to_regex(street_type_list): (?: [A-Za-z\#\&\-\d]{1,7}(?:\s?[SWNE])? )? + | + \d{2,4}\ [Ss][Tt][Ee](?:\ \*)? ) | (?: @@ -952,7 +958,7 @@ def street_type_list_to_regex(street_type_list): | (?: {post_direction_re}\ - \d{{,3}}[A-Z][A-Za-z\-]{{,31}} + \d{{,3}}[A-Za-z\-]{{1,31}} ) ) (?:{space_div}{post_direction})? diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index f171a3f..5ceb240 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -387,6 +387,7 @@ def test_po_box_positive(input, expected): "input,expected", [ # positive assertions + ("800 W EL CAMINO REAL\n350 STE *", True), ("899 HEATHROW PARK LN", True), ("1806 Dominion Way Ste B", True), ("696 BEAL PKWY", True), @@ -479,6 +480,7 @@ def test_full_street_positive(input, expected): ("696 BEAL PKWY NW\nFT WALTON BCH FL 32547", True), ("2633 Camino Ramon Ste. 400 San Ramon, CA 94583-2176", True), ("2951 El Camino Real Palo Alto, CA 94306", True), + ("800 W EL CAMINO REAL\n350 STE *\nMOUNTAIN VIEW, CA 94040", True), ("3821 ED DR, RALEIGH, NC 27612", True), ("213 WEST 35TH STREET SUITE, 400, NEW YORK, NY", True), ("326 33RD AVE., EAST, SEATTLE, WA 98112", True),