diff --git a/Pipfile b/Pipfile index 1251f9b..529959b 100644 --- a/Pipfile +++ b/Pipfile @@ -14,6 +14,7 @@ faker = "*" streamlit-player = "*" requests = "*" pandas = "*" +random-address = "*" [dev-packages] mock-generators = {editable = true, path = "."} diff --git a/Pipfile.lock b/Pipfile.lock index 93cf2d3..2d1108e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3ff999e6051e3b1bb0b7187e0b3be6f8ffc877541da36e4f2f06204720b5db8b" + "sha256": "e68b6c227d7d07ab29f7a02e2848e8cfdb543683331e4fb9def89c446ade3d11" }, "pipfile-spec": 6, "requires": { @@ -37,7 +37,7 @@ "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da", "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==4.12.2" }, "blinker": { @@ -61,7 +61,7 @@ "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==2023.5.7" }, "charset-normalizer": { @@ -219,7 +219,7 @@ "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3", "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.11.0" }, "decorator": { @@ -235,7 +235,7 @@ "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4", "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.4" }, "faker": { @@ -517,7 +517,7 @@ "sha256:553e2db454e2be4567caebef5176c98a40a7e24f7ea9c2fe8a1f05c1d9ea4005", "sha256:b58bb539dcb52e0b040ab2fed32f1f3146cbb2746dc3812940d9dd359c378bb6" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.0.7" }, "markupsafe": { @@ -678,7 +678,7 @@ "sha256:ecde0f8adef7dfdec993fd54b0f78183051b6580f606111a6d789cd14c61ea0c", "sha256:f21c442fdd2805e91799fbe044a7b999b8571bb0ab0f7850d0cb9641a687092b" ], - "markers": "python_version >= '3.8'", + "markers": "python_version >= '3.11'", "version": "==1.24.3" }, "packaging": { @@ -797,7 +797,7 @@ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==1.0.0" }, "protobuf": { @@ -888,7 +888,7 @@ "sha256:993f1a3599ca3f4fcd7160c7545ad06310c9e12f70174ae7ae8d4e25f6c5d3fa", "sha256:d260dda9ae781e1eab6ea15bacb84015849833ba5555f141d2d9b7b7473b307d" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==1.0.1" }, "pyparsing": { @@ -1013,9 +1013,17 @@ "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==6.0" }, + "random-address": { + "hashes": [ + "sha256:52785b8fc661fd5cf5cf7ff0413d7e3f0ea04f502f263df46468b3370cc15f5b", + "sha256:93c16439139417d29812a0e64d48f871bd31ab5529ae74cb9e4d6d3bdfc7c7c5" + ], + "index": "pypi", + "version": "==1.1.1" + }, "rdflib": { "hashes": [ "sha256:36b4e74a32aa1e4fa7b8719876fb192f19ecd45ff932ea5ebbd2e417a0247e63", @@ -1053,7 +1061,7 @@ "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94", "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==5.0.0" }, "soupsieve": { @@ -1108,7 +1116,7 @@ "sha256:22a50eb43407bab3d0ed2d4b58e89819da477cd0592ef87edbd373c286712e3a", "sha256:b3c9520c1b48f2eef3c702b5a967f64c9a8ff2ea8e74ebb26c0e9195965bb923" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.1.2" }, "streamlit-extras": { @@ -1124,7 +1132,7 @@ "sha256:3b6625fefb5c2bc759a3b2407c53c97b23cf69924ec31bd2fa7d5313b7691068", "sha256:93d3d910d9be3e60f07c21b28d2ed0487737e071e48ce1faf858e1f68e05b624" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==0.0.2" }, "streamlit-image-coordinates": { @@ -1156,7 +1164,7 @@ "sha256:0081212d80d178bda337acf2432425e2016d757f57834b18645d4c5b928d4c0f", "sha256:991b103cd3448b0f6507f8051777b996a17b4630956d5b6fa13344175b20e572" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==1.0.2" }, "streamlit-vertical-slider": { @@ -1164,7 +1172,7 @@ "sha256:6eaee79a397341eee6ec7862b77d27d548d2bdd126812fd811f831bd4d561f48", "sha256:ab727cd5c1799c1d9a19c6201ff2a9bcda08222c849c5670ad7a0d994c9fdcdc" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==1.0.2" }, "tenacity": { @@ -1172,7 +1180,7 @@ "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0", "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0" ], - "markers": "python_version >= '3.6'", + "markers": "python_full_version >= '3.6.0'", "version": "==8.2.2" }, "toml": { diff --git a/mock_generators/generators/4b0db60a.py b/mock_generators/generators/4b0db60a.py deleted file mode 100644 index 9abffc3..0000000 --- a/mock_generators/generators/4b0db60a.py +++ /dev/null @@ -1,11 +0,0 @@ -import random -import logging -# Do not change function name or arguments -def generate( - args: list[any] - ) -> tuple[dict, list[dict]]: - # TODO: This doesn't support actual args, just a list of values - to_node_values = args - result = random.choice(to_node_values) - logging.info(f'4b0db60a: result: {result}, values_remaining: {len(to_node_values)}') - return (result, to_node_values) \ No newline at end of file diff --git a/mock_generators/generators/address_usa.py b/mock_generators/generators/address_usa.py new file mode 100644 index 0000000..4048354 --- /dev/null +++ b/mock_generators/generators/address_usa.py @@ -0,0 +1,10 @@ +from random_address import real_random_address_by_state +import random + +def generate(args: list[any]): + # Generate a dictionary with valid random address information + states = [ + "AL", "AR", "CA", "CO", "CT", "DC", "FL", "GA", "HI", "KY", "MA" "MD", "TN", "TX", "OK", "VT" + ] + state_code = random.choice(states) + return real_random_address_by_state(state_code) \ No newline at end of file diff --git a/mock_generators/generators/92eeddbb.py b/mock_generators/generators/city.py similarity index 100% rename from mock_generators/generators/92eeddbb.py rename to mock_generators/generators/city.py diff --git a/mock_generators/generators/470ff56f.py b/mock_generators/generators/country.py similarity index 100% rename from mock_generators/generators/470ff56f.py rename to mock_generators/generators/country.py diff --git a/mock_generators/generators/73853311.py b/mock_generators/generators/exhaustive_random.py similarity index 75% rename from mock_generators/generators/73853311.py rename to mock_generators/generators/exhaustive_random.py index ead04f3..bc20b4b 100644 --- a/mock_generators/generators/73853311.py +++ b/mock_generators/generators/exhaustive_random.py @@ -1,5 +1,4 @@ from random import shuffle -import logging # Do not change function name or arguments def generate( @@ -10,5 +9,5 @@ def generate( node_values = args shuffle(node_values) choice = node_values.pop(0) - # logging.info(f'73853311: choice: {choice}, values remaining: {len(node_values)}') + return (choice, node_values) \ No newline at end of file diff --git a/mock_generators/generators/postcode.py b/mock_generators/generators/postcode.py new file mode 100644 index 0000000..abb2d28 --- /dev/null +++ b/mock_generators/generators/postcode.py @@ -0,0 +1,5 @@ +from faker import Faker +fake = Faker() + +def generate(args: list[any]): + return fake.postcode() \ No newline at end of file diff --git a/mock_generators/generators/pure_random.py b/mock_generators/generators/pure_random.py new file mode 100644 index 0000000..68f6ed7 --- /dev/null +++ b/mock_generators/generators/pure_random.py @@ -0,0 +1,9 @@ +import random +# Do not change function name or arguments +def generate( + args: list[any] + ) -> tuple[dict, list[dict]]: + + targets = args[:] + result = random.choice(targets) + return (result, targets) \ No newline at end of file diff --git a/mock_generators/generators/state.py b/mock_generators/generators/state.py new file mode 100644 index 0000000..bd5ae73 --- /dev/null +++ b/mock_generators/generators/state.py @@ -0,0 +1,9 @@ +import random + +def generate(args: list[any]): + states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", + "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", + "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", + "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", + "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"] + return random.choice(states) \ No newline at end of file diff --git a/mock_generators/generators/street.py b/mock_generators/generators/street.py new file mode 100644 index 0000000..e25babc --- /dev/null +++ b/mock_generators/generators/street.py @@ -0,0 +1,5 @@ +from faker import Faker +fake = Faker() + +def generate(args: list[any]): + return fake.street_address() \ No newline at end of file diff --git a/mock_generators/logic/generate_mapping.py b/mock_generators/logic/generate_mapping.py index a363a1d..b7f86d9 100644 --- a/mock_generators/logic/generate_mapping.py +++ b/mock_generators/logic/generate_mapping.py @@ -6,10 +6,60 @@ from models.relationship_mapping import RelationshipMapping from models.property_mapping import PropertyMapping from models.generator import Generator -from logic.generate_values import generator_for_raw_property +from logic.generate_values import generator_for_raw_property, actual_generator_for_raw_property, assignment_generator_for import logging import uuid + +def generate_addresses_to( + raw_properties: dict[str, str], + generators: dict[str, Generator]): + + # Insert all values - they will be read as literals during the node generation process + raw_properties['street'] = f'{{"street":[]}}' + raw_properties['city'] = f'{{"city":[]}}' + raw_properties['state'] = f'{{"state":[]}}' + raw_properties['zip'] = f'{{"postcode":[]}}' + raw_properties['country'] = f'{{"country":[]}}' + return raw_properties + +def xgenerate_addresses_to( + raw_properties: dict[str, str], + generators: dict[str, Generator]): + + generator, args = actual_generator_for_raw_property('{"address_usa": []}', generators) + value = generator.generate(args) + # Insert all values - they will be read as literals during the node generation process + try: + address1 = value.get('address1', None) + if address1 is not None: + raw_properties['address1'] = f'{{"string":["{address1}"]}}' + address2 = value.get('address2', None) + if address2 is not None: + raw_properties['address2'] = f'{{"string":["{address2}"]}}' + city = value.get('city', None) + if city is not None: + raw_properties['city'] = f'{{"string":["{city}"]}}' + state = value.get('state', None) + if state is not None: + raw_properties['state'] = f'{{"string":["{state}"]}}' + postalCode = value.get('postalCode', None) + if postalCode is not None: + raw_properties['postalCode'] = f'{{"string":["{postalCode}"]}}' + lat = value.get('coordinates', None).get('lat', None) + if lat is not None: + raw_properties['latitude'] = f'{{"string":["{lat}"]}}' + lng = value.get('coordinates', None).get('lng', None) + if lng is not None: + raw_properties['longitude'] = f'{{"string":["{lng}"]}}' + # Add country + raw_properties['country'] = f'{{"string":["USA"]}}' + except Exception as e: + logging.error(f'Problem extracting data from address object: {value}: ERROR: {e}') + + return raw_properties + + def propertymappings_for_raw_properties( raw_properties: dict[str, str], generators: dict[str, Generator] @@ -29,16 +79,57 @@ def propertymappings_for_raw_properties( if generators is None or len(generators) == 0: raise Exception(f'generate_mapping.py: propertymappings_for_raw_properties: No generators assignment received.') + # Special handling for addresses + raw_keys = raw_properties.keys() + # Assign uuid if not key property assignment was made + if "{key}" not in raw_keys and "KEY" not in raw_keys: + raw_properties["KEY"] = "_uid" + raw_properties["_uid"] = f'{{"uuid":[]}}' + if "ADDRESS" in raw_keys: + raw_properties.pop("ADDRESS") + raw_properties = generate_addresses_to(raw_properties, generators) + # Going to an address generator to create following properties: + # address_line_1, address_line_2, city, state, zip, latitude, longitude + # generator, args = actual_generator_for_raw_property('{"address_usa": []}', generators) + # value = generator.generate(args) + # # Insert all values - they will be read as literals during the node generation process + # try: + # address1 = value.get('address1', None) + # if address1 is not None: + # raw_properties['address1'] = f'{{"string":["{address1}"]}}' + # address2 = value.get('address2', None) + # if address2 is not None: + # raw_properties['address2'] = f'{{"string":["{address2}"]}}' + # city = value.get('city', None) + # if city is not None: + # raw_properties['city'] = f'{{"string":["{city}"]}}' + # state = value.get('state', None) + # if state is not None: + # raw_properties['state'] = f'{{"string":["{state}"]}}' + # postalCode = value.get('postalCode', None) + # if postalCode is not None: + # raw_properties['postalCode'] = f'{{"string":["{postalCode}"]}}' + # lat = value.get('coordinates', None).get('lat', None) + # if lat is not None: + # raw_properties['latitude'] = f'{{"string":["{lat}"]}}' + # lng = value.get('coordinates', None).get('lng', None) + # if lng is not None: + # raw_properties['longitude'] = f'{{"string":["{lng}"]}}' + # # Add country + # raw_properties['country'] = f'{{"string":["USA"]}}' + # except Exception as e: + # logging.error(f'Problem extracting data from address object: {value}: ERROR: {e}') + for key, value in raw_properties.items(): # Skip any keys with { } (brackets) as these are special cases for defining count/assignment/filter generators if key.startswith('{') and key.endswith('}'): continue - # TODO: Skip special COUNT and KEY literals + # Skip special COUNT and KEY literals if key == "COUNT" or key == "KEY": continue - + try: generator, args = generator_for_raw_property(value, generators) if generator is None: @@ -207,10 +298,12 @@ def relationshipmappings_from( count_generator_config = '{"int_range": [1,3]}' logging.info(f"Relationship properties is missing COUNT or '{{count}}' key from properties: {properties}: Using default int_range generator") - assignment_generator_config = properties.get("{assignment}", None) - # If missing, use ExhaustiveRandom + assignment_generator_config = properties.get("ASSIGNMENT", None) if assignment_generator_config is None: - assignment_generator_config = "{\"exhaustive_random\":[]}" + assignment_generator_config = properties.get("{assignment}", None) + # If missing, use ExhaustiveRandom + if assignment_generator_config is None: + assignment_generator_config = "{\"exhaustive_random\":[]}" # Get proper generators for count generator try: @@ -227,9 +320,9 @@ def relationshipmappings_from( continue try: - assignment_generator, assignment_args = generator_for_raw_property(assignment_generator_config, generators) + assignment_generator, assignment_args = assignment_generator_for(assignment_generator_config, generators) except Exception as e: - logging.warning(f"generate_mappings: relationshipmappings_from: could not create assignment generator for relationship: {relationship_dict}: {e}") + logging.warning(f"generate_mappings: relationshipmappings_from: could not get assignment generator for relationship: {relationship_dict}: {e}") continue from_node = nodes.get(from_id, None) diff --git a/mock_generators/logic/generate_values.py b/mock_generators/logic/generate_values.py index c5dfc52..67d951b 100644 --- a/mock_generators/logic/generate_values.py +++ b/mock_generators/logic/generate_values.py @@ -1,5 +1,5 @@ -from models.generator import Generator +from models.generator import Generator, GeneratorType import logging import json @@ -197,6 +197,17 @@ def literal_generator_from_value( actual_string = json.dumps(result) return actual_generator_for_raw_property(actual_string, generators) +def assignment_generator_for( + config: str, + generators: dict[str, Generator] +) -> tuple[Generator, list[any]]: + + gen, args = actual_generator_for_raw_property(config, generators) + if gen.type != GeneratorType.ASSIGNMENT: + logging.error(f'Generator {gen.name} is not an assignment generator.') + return (None, None) + return gen, args + def generator_for_raw_property( property_value: str, generators: dict[str, Generator] diff --git a/mock_generators/models/relationship_mapping.py b/mock_generators/models/relationship_mapping.py index 9aec53e..6783e81 100644 --- a/mock_generators/models/relationship_mapping.py +++ b/mock_generators/models/relationship_mapping.py @@ -124,11 +124,13 @@ def generate_values( # TODO: Run filter generator here to determine which source nodes to process - # Make a copy of the generated list - values = deepcopy(self.to_node.generated_values) + # Make a copy of source and target dicts + sources = self.from_node.generated_values[:] + targets = self.to_node.generated_values[:] # Iterate through every generated source node - for value_dict in self.from_node.generated_values: + # for value_dict in self.from_node.generated_values: + for value_dict in sources: # dict of property names and generated values # Decide on how many of these relationships to generate @@ -158,17 +160,18 @@ def generate_values( for i in range(count): # Select a random target node - if values is None or len(values) == 0: - # TODO: This appears to break the randomization - logging.info(f'relationship_mapping.py: values exhausted at index {i} before count of {count} reached. Values: {len(values)}') - continue + if targets is None or len(targets) == 0: + # targets exhausted, reset + targets = self.to_node.generated_values[:] # Extract results. Values will be passed back through the next iteration in case the generator returns a modified list - # TODO: values does not change after this call - to_node_value_dict, new_values = self.assignment_generator.generate(values) + # print(f'Attempting to run assignment generator: {self.assignment_generator}. Targets: {len(targets)}') - values = new_values + to_node_value_dict, new_targets = self.assignment_generator.generate(targets) + + # print(f'Assignment generator returned target node: {to_node_value_dict}') + targets = new_targets # Types of randomization generators to consider: # - Pure Random diff --git a/mock_generators/named_generators.json b/mock_generators/named_generators.json index 0fbcc4e..cc02fdc 100644 --- a/mock_generators/named_generators.json +++ b/mock_generators/named_generators.json @@ -2,6 +2,39 @@ "README":{ "content": "This is the default list of all generators used by the app. If you add new generators they will be added to this file. The default_generators.json file contains a copy of this from the repo maintainer(s)" }, + "address_usa": { + "args": [], + "code_url": "mock_generators/generators/address_usa.py", + "description": "Random Address using the random-address package.", + "name": "URL", + "tags": [ + "address", + "location" + ], + "type": "String" + }, + "street": { + "args": [], + "code_url": "mock_generators/generators/street.py", + "description": "Random Street Address using the Faker library.", + "name": "String", + "tags": [ + "address", + "street" + ], + "type": "String" + }, + "postcode": { + "args": [], + "code_url": "mock_generators/generators/postcode.py", + "description": "Random Postal Code using the Faker library.", + "name": "String", + "tags": [ + "address", + "street" + ], + "type": "String" + }, "uri": { "args": [], "code_url": "mock_generators/generators/05711cac.py", @@ -115,7 +148,7 @@ }, "country": { "args": [], - "code_url": "mock_generators/generators/470ff56f.py", + "code_url": "mock_generators/generators/country.py", "description": "Country name generator using the Faker library.", "name": "Country", "tags": [ @@ -126,7 +159,7 @@ }, "pure_random": { "args": [], - "code_url": "mock_generators/generators/4b0db60a.py", + "code_url": "mock_generators/generators/pure_random.py", "description": "Randomly assigns to a target node. Duplicates and orphan nodes possible.", "name": "Pure Random", "tags": [ @@ -224,7 +257,7 @@ }, "exhaustive_random": { "args": [], - "code_url": "mock_generators/generators/73853311.py", + "code_url": "mock_generators/generators/exhaustive_random.py", "description": "Assigns each source node to a random target node, until target node records are exhausted. No duplicates, no orphan to nodes.", "name": "Exhaustive Random", "tags": [ @@ -267,7 +300,7 @@ }, "city": { "args": [], - "code_url": "mock_generators/generators/92eeddbb.py", + "code_url": "mock_generators/generators/city.py", "description": "City name generator using the Faker library.", "name": "City", "tags": [ @@ -276,6 +309,19 @@ ], "type": "String" }, + "state": { + "args": [], + "code_url": "mock_generators/generators/state.py", + "description": "Random US state code", + "name": "State", + "tags": [ + "state", + "state code", + "US", + "name" + ], + "type": "String" + }, "date": { "args": [ { diff --git a/tests/test_generators.py b/tests/test_generators.py index a4d0f93..5a58bfe 100644 --- a/tests/test_generators.py +++ b/tests/test_generators.py @@ -5,6 +5,23 @@ from datetime import datetime test_generators = load_generators("mock_generators/named_generators.json") +class TestAddressGenerator: + def test_address_generator_elements(self): + try: + test_string = '{"address_usa": []}' + generator, args = actual_generator_for_raw_property(test_string, test_generators) + value = generator.generate(args) + + assert value['address1'], f'address object missing address_line_1: {value}' + assert value['city'], f'address object missing city: {value}' + assert value['state'], f'address object missing state: {value}' + assert value['postalCode'], f'address object missing postalCode: {value}' + assert value['coordinates']['lat'], f'address object missing coordinates.lat: {value}' + assert value['coordinates']['lng'], f'address object missing coordinates.lng: {value}' + + except Exception as e: + assert False, f'Exception: {e}' + class TestDateGenerator: def test_date_generator(self): try: