From 311523325367d9e80b8068681df510de0cbb4606 Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Mon, 8 Jul 2024 14:16:09 +0300 Subject: [PATCH] Support US mail stop --- pyap/address.py | 1 + pyap/source_US/data.py | 9 +++++++++ tests/test_parser.py | 12 ++++++++++++ tests/test_parser_us.py | 19 +++++++++++++++++++ 4 files changed, 41 insertions(+) diff --git a/pyap/address.py b/pyap/address.py index 59bc350..e0bbb16 100644 --- a/pyap/address.py +++ b/pyap/address.py @@ -29,6 +29,7 @@ class Address: country: Union[str, None] = None route_id: Union[str, None] = None occupancy: Union[str, None] = None + mail_stop: Union[str, None] = None street_type: Union[str, None] = None building_id: Union[str, None] = None postal_code: Union[str, None] = None diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index 598279a..bf68726 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -917,6 +917,13 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: ) """ +mail_stop = r""" + (?P + # attention: do not to mix up with postal code + MSC?:?\s[A-Z]{,4}\s?\d{3,4}\b + ) + """ + po_box = r""" (?: [Pp]\.?\ ?[Oo]\.?\ ? @@ -968,6 +975,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: (?:{part_div}{floor})? (?:{part_div}{building})? (?:{part_div}{occupancy})? + (?:{part_div}{mail_stop})? (?:{part_div}(?P{po_box}))? ) | @@ -984,6 +992,7 @@ def street_type_list_to_regex(street_type_list: list[str]) -> str: floor=floor, building=building, occupancy=occupancy, + mail_stop=mail_stop, po_box=po_box, ) diff --git a/tests/test_parser.py b/tests/test_parser.py index 104a6f0..78c809a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -195,6 +195,18 @@ def test_combine_results(): "postal_code": "67530", }, ), + ( + "One Baylor Plaza MS: BCM204\nHouston TX 77030-3411", + { + "street_number": "One", + "street_type": "Plaza", + "street_name": "Baylor", + "mail_stop": "MS: BCM204", + "city": "Houston", + "region1": "TX", + "postal_code": "77030-3411", + }, + ), ], ) def test_parse_address(input: str, expected): diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index 89f10b3..817c345 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -353,6 +353,23 @@ def test_occupancy(input, expected): execute_matching_test(input, expected, data_us.occupancy) +@pytest.mark.parametrize( + "input, expected", + [ + # positive assertions + ("MS CORP 003", True), + ("MS: BCM204", True), + ("MSC 1234", True), + ("MS 1234", True), + # negative assertions + ("MS 12345", False), + ("MS CORP", False), + ], +) +def test_mail_stop(input, expected): + execute_matching_test(input, expected, data_us.mail_stop) + + @pytest.mark.parametrize( "input,expected", [ @@ -476,6 +493,8 @@ def test_full_street_positive(input, expected): "input,expected", [ # positive assertions + ("One Baylor Plaza MS: BCM204\nHouston TX 77030-3411", True), + ("ONE ADP DRIVE\nMS CORP 003\nAUGUSTA, GA 30909", True), ("2755 CARPENTER RD SUITE 1W\nANN ARBOR, MI, US, 48108", True), ("1111 3rd Street Promenade, Santa Monica, CA 90000", True), ("P.O. BOX 10323 PH (205) 595-3511\nBIRMINGHAM, AL 35202", True),