Skip to content

Dev #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 22, 2024
Merged

Dev #33

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,41 @@
# MOCK GRAPH DATA GENERATOR

Applet using [Streamlit](https://streamlit.io) to conveniently design and generate interwoven mock data. A running cloud instance of this can be found [here](https://dev.neo4j.com/mock-graph-data-generator)

## Install Poetry

This applet uses [Poetry](https://python-poetry.org) for dependency management.

## Dependencies

This applet uses several packages that will auto-install if you use either the poetry or pipenv commands below. Notable is the use of 2 small packages:

1. [graph-data-generator](https://pypi.org/project/graph-data-generator/) for generating the actual mock data from a .json configuration
2. [neo4j-uploader](https://pypi.org/project/neo4j-uploader/) for uploading generated .json output to a [Neo4j](https://neo4j.com/developer/) graph database instance

## Local Running

```
poetry update
poetry run streamlit run graph_data_generator_streamlit/app.py
```

## Testing with local packages

`poetry add --editable /path/to/package`

## Running in Google Cloud

- Set up a [Google Cloud account](https://cloud.google.com)
- Create a [Google Cloud Project](https://developers.google.com/workspace/guides/create-project)
- [Enable billing](https://cloud.google.com/billing/docs/how-to/modify-project) for that project
- Temporarily move any .streamlit/secret.toml file to the root folder director (same level as Dockerfile)
- Install [glcoud cli](https://cloud.google.com/sdk/docs/install)
- Run the following commands from the terminal of your local dev machine:

```
gcloud builds submit --tag gcr.io/<google_cloud_project_id>/mock-graph-generator
gcloud run deploy --image gcr.io/<google_cloud_project_id>/mock-graph-generator --platform managed --allow-unauthenticated

When completed, can move secrets.toml file back to .streamlit/ - that or maintain a separate external secrets.toml file just for Google Cloud
```
```
12 changes: 6 additions & 6 deletions graph_data_generator_streamlit/app.py
Original file line number Diff line number Diff line change
@@ -7,13 +7,13 @@
from ui.samples_ui import samples_list
import logging


# Heavy import support
import sys
from pathlib import Path
from streamlit.config import on_config_parsed
from streamlit.web import cli


# noinspection PyUnresolvedReferences
def heavy_imports() -> None:
"""For an explanation, please refer to this thread -
@@ -22,7 +22,7 @@ def heavy_imports() -> None:
from streamlit_agraph import agraph, Node, Edge, Config


def main()-> None:
def main() -> None:

# Heavy import support
on_config_parsed(heavy_imports)
@@ -36,9 +36,9 @@ def main()-> None:
)

# SETUP
st.set_page_config(layout="wide",initial_sidebar_state='collapsed')
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
logging.getLogger().setLevel(logging.DEBUG)
logging.info(f'App Started')
logging.info(f"App Started")

# Uncomment to start graph_data_generator logging
# start_logging()
@@ -79,7 +79,6 @@ def main()-> None:
if "SAMPLE_IMAGES" not in st.session_state:
st.session_state["SAMPLE_IMAGES"] = []


# Header
instructions_ui()

@@ -117,5 +116,6 @@ def main()-> None:
with tab2:
samples_list()


if __name__ == "__main__":
main()
main()
50 changes: 3 additions & 47 deletions graph_data_generator_streamlit/ui/config_ui.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,9 @@
import streamlit as st

def config_ui():

# LOAD OPENAI KEY
# open_ai_key = st.secrets.get("OPENAI_API_KEY", None)
# if open_ai_key is None or open_ai_key == "":
# open_ai_key = st.session_state.get("OPENAI_API_KEY", None)
# else:
# st.session_state["OPENAI_API_KEY"] = open_ai_key

# new_open_ai_key = st.text_input(f'OpenAI KEY', type="password", value=open_ai_key)
# if new_open_ai_key != open_ai_key:
# st.session_state["OPENAI_API_KEY"] = new_open_ai_key

# # LOAD NEO4J URI
# neo4j_uri = st.secrets.get("NEO4J_URI", None)
# if neo4j_uri is None or neo4j_uri == "":
# neo4j_uri = st.session_state.get("NEO4J_URI", None)
# else:
# st.session_state["NEO4J_URI"] = neo4j_uri

# new_neo4j_uri = st.text_input(f'Neo4j URI', value = neo4j_uri)
# if new_neo4j_uri != neo4j_uri:
# st.session_state["NEO4J_URI"] = new_neo4j_uri

# # NEO4J USER
# neo4j_user = st.secrets.get("NEO4J_USER", None)
# if neo4j_user is None or neo4j_user == "":
# neo4j_user = st.session_state.get("NEO4J_USER", None)
# else:
# st.session_state["NEO4J_USER"] = neo4j_user


# new_neo4j_user = st.text_input(f'Neo4j USER', value = neo4j_user, placeholder = "neo4j")
# if new_neo4j_uri != neo4j_user:
# st.session_state["NEO4J_USER"] = new_neo4j_user

# # NEO4J PASSWORD
# neo4j_pass = st.secrets.get("NEO4J_PASSWORD", None)
# if neo4j_pass is None or neo4j_pass == "":
# neo4j_pass = st.session_state.get("NEO4J_PASSWORD", None)
# else:
# st.session_state["NEO4J_PASSWORD"] = neo4j_pass

# new_neo4j_pass = st.text_input(f'Neo4j PASSWORD', type = "password", value = neo4j_pass)
# if new_neo4j_pass != neo4j_pass:
# st.session_state["NEO4J_PASSWORD"] = new_neo4j_pass
def config_ui():

# Display current app version
# TODO: Pull this directly from the pyproject.toml
version = "0.7.1"
st.write(f"Version {version}")
version = "0.7.10"
st.write(f"Version {version}")
176 changes: 132 additions & 44 deletions graph_data_generator_streamlit/ui/export_ui.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@


import streamlit as st
import graph_data_generator as gdg
from neo4j_uploader import upload, start_logging
from neo4j_uploader import (
Neo4jConfig,
upload,
start_logging,
UploadResult,
convert_legacy_node_records,
convert_legacy_relationship_records,
batch_upload_generator,
)
from typing import Callable

import json
import logging

# Limit default Neo4j verbosity level
logging.getLogger("neo4j.io").setLevel(logging.INFO)
logging.getLogger("neo4j.pool").setLevel(logging.INFO)
logging.getLogger("neo4j_uploader").setLevel(logging.INFO)


def export_ui():

@@ -13,12 +28,12 @@ def export_ui():
return

# TODO: Add a generate data button here?

# Generate data
mapping = gdg.generate_mapping(txt)
data = gdg.generate_dictionaries(mapping)

with st.expander('Generated Data'):
with st.expander("Generated Data"):
pretty = json.dumps(data, indent=4, default=str)
st.code(pretty)

@@ -33,66 +48,139 @@ def export_ui():
for _, relationships_list in all_relationships.items():
relationships_count += len(relationships_list)

st.write(f'{nodes_count} Nodes and {relationships_count} Relationships generated')
st.write(f"{nodes_count} Nodes and {relationships_count} Relationships generated")

st.markdown("**③ EXPORT**")

c1, c2 = st.columns([1,1])
c1, c2 = st.columns([1, 1])
with c1:
with st.expander('Download .zip file'):
with st.expander("Download .zip file"):

st.markdown(
"That can be uploaded into [Neo4j's Data Importer](https://neo4j.com/docs/data-importer/current/)"
)

st.markdown("That can be uploaded into [Neo4j's Data Importer](https://neo4j.com/docs/data-importer/current/)")

# Create .zip file for data-importer
filename = st.text_input("Name of file", value="mock_data", help="Name of file to be used for the.zip file. Ignored if pushing directly to a Neo4j database instance.")
filename = st.text_input(
"Name of file",
value="mock_data",
help="Name of file to be used for the.zip file. Ignored if pushing directly to a Neo4j database instance.",
)

def on_download():
st.session_state["DOWNLOADING"] == True

try:
zip = gdg.package(mapping)
if zip is None:
st.warning('Unexpected problem generating file. Try an alternate JSON input')
st.warning(
"Unexpected problem generating file. Try an alternate JSON input"
)
else:
st.download_button(
label = "Download .zip file",
data = zip,
file_name = f"{filename}.zip",
mime = "text/plain",
on_click = on_download
label="Download .zip file",
data=zip,
file_name=f"{filename}.zip",
mime="text/plain",
on_click=on_download,
)
except Exception as e:
st.error(e)

with c2:
with st.expander("Upload to Neo4j"):

uri = st.text_input(f'Neo4j URI', value = st.session_state["NEO4J_URI"], placeholder="neo4j+s//92bd05dc.databases.neo4j.io", help="URI for your Aura Neo4j instance")

user = st.text_input(f'Neo4j USER', value = st.session_state["NEO4J_USER"], placeholder = "neo4j")

password = st.text_input(f'Neo4j PASSWORD', type = "password", value = st.session_state["NEO4J_PASSWORD"])
uri = st.text_input(
f"Neo4j URI",
value=st.session_state["NEO4J_URI"],
placeholder="neo4j+s//92bd05dc.databases.neo4j.io",
help="URI for your Aura Neo4j instance",
)

user = st.text_input(
f"Neo4j USER", value=st.session_state["NEO4J_USER"], placeholder="neo4j"
)

password = st.text_input(
f"Neo4j PASSWORD",
type="password",
value=st.session_state["NEO4J_PASSWORD"],
)

should_overwrite = st.toggle(
"Reset DB",
value=True,
help="All data in target database be deleted before upload if enabled. Default Enabled. Note: Large databases may take a long time to reset.",
)

if st.button(
"Upload to Neo4j", help="Upload generated data to a Neo4j instance"
):
# Upload credentials check
if uri is None or user is None or password is None:
st.error(
"Please specify the Neo4j instance credentials in the Configuration tab"
)
return

should_overwrite = st.toggle("Reset DB?", value=True)
# Execute upload
else:

# Optionally upload generated data to Neo4j
# TODO: Clicking on this button will force the generator to rerun
# Using st.spinner to show progress
# with st.spinner("Uploading..."):
# result = upload(
# neo4j_creds=(uri, user, password),
# data=data,
# should_overwrite=should_overwrite,
# )
# if result is None:
# st.error(f"Unexpected result: {result}")
# else:
# if result.was_successful == False:
# st.error(f"Upload Errors encountered\n{result}")
# else:
# st.info(f"Upload completed\n{result}")

# Using st.progress to show progress
progress_indicator = st.progress(0.0)
remaining_seconds_placeholder = st.empty()
expected_end_time_placeholder = st.empty()
final_result = None

nodes = convert_legacy_node_records(data.get("nodes"), True, "_uid")
rels = convert_legacy_relationship_records(
data.get("relationships"), True, "_uid"
)
converted_data = {"nodes": nodes, "relationships": rels}

if st.button("Upload to Neo4j", help="Upload generated data to a Neo4j instance"):
if uri is None or user is None or password is None:
st.error("Please specify the Neo4j instance credentials in the Configuration tab")
return
else:
# Enable uploader logging
start_logging()

try:
result = upload(neo4j_creds=(uri, user, password), data=data, should_overwrite=should_overwrite)
print(f'Upload result: {result}')
if result.was_successful == False:
st.error(f'Upload failed. Error encountered: {result.error_message}')
else:
st.info(f"Upload completed in {result.seconds_to_complete} seconds, {result.nodes_created} nodes created, {result.relationships_created} relationships created, {result.properties_set} properties set.")
except Exception as e:
st.error(f"Upload failed. Please check your credentials and try again. Error encountered: {e}")

config = Neo4jConfig(
neo4j_uri=uri,
neo4j_user=user,
neo4j_password=password,
overwrite=should_overwrite,
)
for result in batch_upload_generator(
config=config,
data=converted_data,
):
if result is None:
print(f"Unexpected result: {result}")
continue
completion = result.float_completed()
projected_sec = result.projected_seconds_to_complete()
progress_text = f"Upload {round(completion * 100)}% complete"

# Update progress indicatior + supporting text
progress_indicator.progress(completion, progress_text)
remaining_seconds_placeholder.text(
f"Remaining: {projected_sec} seconds"
)
expected_end_time_placeholder.text(
f"Expected completion:{result.projected_completion_time()}"
)
final_result = result

if final_result.was_successful == False:
st.error(f"Upload Errors encountered\n{result}")
else:
st.info(f"Upload completed\n{result}")
Loading