From a5cceee31eba5ab131bd6d9099afa63940d8c1f3 Mon Sep 17 00:00:00 2001 From: Lukas Melninkas Date: Wed, 17 Apr 2024 14:22:34 +0300 Subject: [PATCH] Support one-letter street names --- pyap/source_US/data.py | 4 ++-- tests/test_parser_us.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyap/source_US/data.py b/pyap/source_US/data.py index 0b9aab2..454f790 100644 --- a/pyap/source_US/data.py +++ b/pyap/source_US/data.py @@ -117,10 +117,10 @@ def str_list_to_upper_lower_regex(str_list: List[str]) -> str: # 'Jean Baptiste Point du Sable Lake Shore Drive' and # 'Northeast Kentucky Industrial Parkway' # https://atkinsbookshelf.wordpress.com/tag/longest-street-name-in-us/ -# On the other hand, there are streets like "Ed Drive". +# On the other hand, there are streets like "Ed Drive" and "M Street". street_name_multi_word_re = r""" (?: - [a-zA-Z0-9\ \.\-\'\’]{3,41}|[a-zA-Z]{2}(?=\ [a-zA-Z]) + \b[a-zA-Z0-9\ \.\-\'\’]{3,41}|\b[A-Z][A-Za-z]?(?=\ [A-Z]) ) """ diff --git a/tests/test_parser_us.py b/tests/test_parser_us.py index 2751239..1e5273a 100644 --- a/tests/test_parser_us.py +++ b/tests/test_parser_us.py @@ -565,6 +565,7 @@ def test_full_street_positive(input, expected): ("PO Box # A3656\nChicago, IL 60690", True), ("2006 Broadway Ave Suite 2A, PO Drawer J, Great Bend, KS 67530", True), ("135 Pinelawn Road STE 130 S, Melville, NY 11747", True), + ("1800 M STREET NW SUITE 375 N, WASHINGTON, DC 20036", True), # negative assertions ("ONE HEALING CENTER LLC, 16444", False), ("85 STEEL REGULAR SHAFT - NE", False),