diff --git a/pyap/address.py b/pyap/address.py index e0bbb16..965a65e 100644 --- a/pyap/address.py +++ b/pyap/address.py @@ -33,7 +33,7 @@ class Address: street_type: Union[str, None] = None building_id: Union[str, None] = None postal_code: Union[str, None] = None - single_street_name: Union[str, None] = None + typeless_street_name: Union[str, None] = None street_name: Union[str, None] = None street_number: Union[str, None] = None po_box: Union[str, None] = None diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index 49e4aaa..17a1f20 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -182,24 +182,33 @@ def str_list_to_upper_lower_regex(str_list: List[str]) -> str: r"Black\ Hou?rse", ] +numbered_road_re = r"""[Ss][Tt][Aa][Tt][Ee]\ [Rr][Oo][Aa][Dd]\ \d{1,4}(?!\d)""" # Used to handle edge cases where streets don't have a street type: # eg. `55 HIGHPOINT`, `600 HIGHWAY 32` -single_street_name = r""" - (?P - {single_street_name_regex} - | - [Aa][Tt]\ {interstate_street_type} - | - {highway_re} - | - {numbered_avenue_re} +numbered_or_typeless_street_name = r""" + (?P + (?:{post_direction_re}{space_div})? + (?: + {single_street_name_regex} + | + [Aa][Tt]\ {interstate_street_type} + | + {highway_re} + | + {numbered_avenue_re} + | + {numbered_road_re} + ) ) """.format( + post_direction_re=post_direction_re, + space_div=space_div, single_street_name_regex=str_list_to_upper_lower_regex(single_street_name_list), interstate_street_type=interstate_street_type, highway_re=highway_re, numbered_avenue_re=numbered_avenue_re, + numbered_road_re=numbered_road_re, ) post_direction = r""" @@ -964,9 +973,9 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: (?:(?P{po_box}){part_div})? {street_number}{space_div}? (?: - (?:{typed_street_name}(?![A-Za-z\d\.])) + (?:{numbered_or_typeless_street_name}) | - (?:{single_street_name}) + (?:{typed_street_name}(?![A-Za-z\d\.])) | (?: {post_direction_re}\ @@ -988,7 +997,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: part_div=part_div, street_number=street_number, typed_street_name=typed_street_name, - single_street_name=single_street_name, + numbered_or_typeless_street_name=numbered_or_typeless_street_name, post_direction=post_direction, post_direction_re=post_direction_re, floor=floor, diff --git a/tests/test_parser.py b/tests/test_parser.py index 6761e68..25fbd7b 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -163,7 +163,7 @@ def test_combine_results(): "STAFFING LLC, 242 N AVENUE 25 SUITE 300, LOS ANGELES, CA 900031, Period ", { "street_number": "242", - "single_street_name": "N AVENUE 25", + "typeless_street_name": "N AVENUE 25", "occupancy": "SUITE 300", "city": "LOS ANGELES", "region1": "CA", diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index d457fea..fa8beb4 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -172,12 +172,14 @@ def test_street_name(input, expected): # positive assertions ("Highway 32", True), ("Parkway", True), + ("STATE ROAD 123", True), + ("W. STATE ROAD 123", True), # negative assertions ], ) -def test_single_street_name(input, expected): +def test_numbered_or_typeless_street_name(input, expected): """tests positive string match for a street name""" - execute_matching_test(input, expected, data_us.single_street_name) + execute_matching_test(input, expected, data_us.numbered_or_typeless_street_name) @pytest.mark.parametrize( @@ -406,6 +408,7 @@ def test_po_box_positive(input, expected): "input,expected", [ # positive assertions + ("2101 W. STATE ROAD 434\nSUITE 315", True), ("14001 E. ILIFF AVE 5-7TH FLOOR", True), ("1111 WILSON BVD STE 2222", True), ("800 W EL CAMINO REAL\n350 STE *", True), @@ -498,6 +501,7 @@ def test_full_street_positive(input, expected): "input,expected", [ # positive assertions + ("2101 W. STATE ROAD 434\nSUITE 315\nLONGWOOD, FL 32779", True), ("2222 WASHINGTON PK SUITE 401 BRIDGEVILLE, PA 11111", True), ("1234 Avenue N, Rosenberg, Texas 77777", True), ("One Baylor Plaza MS: BCM204\nHouston TX 77030-3411", True),