Skip to content

Commit 5206c08

Browse files
authoredJul 22, 2024
Dev (#8)
* batch_upload_generator() added as iterator option. Can be used for progress status tracking * batch_upload() updated to use new batch_upload_generator * clear_db() updated to iteratively cycle larger datasets * start_logging() and stop_logging() deprecated. Logging updated to use standard logger by name - 'neo4j_uploader' * upload() marked for deprecation
1 parent bf4aa55 commit 5206c08

12 files changed

+639
-473
lines changed
 

‎README.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# Neo4j-uploader
2+
23
For uploading specially formatted dictionary data to a Neo4j database instance.
34

45
## Installation
6+
57
`pip install neo4j-uploader`
68

79
## Usage
10+
811
```
912
from neo4j_uploader import batch_upload
1013
@@ -44,7 +47,7 @@ data = {
4447
],
4548
"relationships": [
4649
{
47-
"type":"loves",
50+
"type":"LOVES",
4851
"from_node": {
4952
"record_key":"_from_uid",
5053
"node_key":"uid",
@@ -93,5 +96,16 @@ data = {
9396
upload(credentials, data, node_key="name)
9497
```
9598

99+
## Progress Tracking
100+
101+
The `batch_upload_generator` function can be used as a generator. Example usage:
102+
103+
```
104+
for result in batch_upload(config, data):
105+
print(f"Upload progress: {result.nodes_created} nodes created")
106+
107+
```
108+
96109
## Documentation
110+
97111
[Documentation](https://jalakoo.github.io/neo4j-uploader/neo4j_uploader.html) for the current version.

‎neo4j_uploader/__init__.py

+184-110
Large diffs are not rendered by default.

‎neo4j_uploader/_logger.py

+12-77
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,14 @@
11
import logging
22

3-
class ModuleLogger(object):
4-
'''
5-
@private
6-
Used to pass logging information to the calling module.
7-
'''
8-
9-
is_enabled : bool = False
10-
_logger = None
11-
12-
def __new__(cls):
13-
if not hasattr(cls, 'instance'):
14-
cls.instance = super(ModuleLogger, cls).__new__(cls)
15-
return cls.instance
16-
17-
def logger(self):
18-
if self.is_enabled is False:
19-
return EmptyLogger()
20-
21-
if self._logger is None:
22-
self._logger = logging.getLogger(__name__)
23-
# For custom formatting - This will cause duplicate logs for each message
24-
# FORMAT = "[%(asctime)s: %(filename)s: %(lineno)s - %(funcName)20s()] %(message)s"
25-
# formatter = logging.Formatter(FORMAT)
26-
# mh = logging.StreamHandler()
27-
# mh.setFormatter(formatter)
28-
# self._logger.addHandler(mh)
29-
self._logger.addHandler(logging.NullHandler())
30-
31-
return self._logger
32-
33-
def notset(self, arg: str):
34-
if self.is_enabled:
35-
return self.logger().notset(arg)
36-
pass
37-
38-
def debug(self, arg: str):
39-
if self.is_enabled:
40-
return self.logger().debug(arg)
41-
pass
42-
43-
def info(self, arg: str):
44-
if self.is_enabled:
45-
return self.logger().info(arg)
46-
pass
47-
48-
def warning(self, arg: str):
49-
if self.is_enabled:
50-
return self.logger().warning(arg)
51-
pass
52-
53-
def error(self, arg: str):
54-
if self.is_enabled:
55-
return self.logger().error(arg)
56-
pass
57-
58-
def critical(self, arg: str):
59-
if self.is_enabled:
60-
return self.logger().critical(arg)
61-
pass
62-
63-
class EmptyLogger:
64-
'''
65-
@private
66-
'''
67-
68-
def notset(self, arg:str):
69-
pass
70-
def debug(self, arg: str):
71-
pass
72-
def info(self, arg: str):
73-
pass
74-
def warning(self, arg: str):
75-
pass
76-
def error(self, arg: str):
77-
pass
78-
def critical(self, arg: str):
79-
pass
3+
module_name = "neo4j_uploader"
4+
logger = logging.getLogger(module_name)
5+
if not len(logger.handlers):
6+
stream_handler = logging.StreamHandler()
7+
formatter = logging.Formatter(
8+
f"%(levelname)s:{module_name}:%(asctime)s:%(message)s"
9+
)
10+
stream_handler.setFormatter(formatter)
11+
logger.addHandler(stream_handler)
12+
13+
# To remove/clear the module stream handler
14+
# logger.removeHandler(stream_handler)

‎neo4j_uploader/_n4j.py

+60-20
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,59 @@
1-
import asyncio
21
from neo4j import GraphDatabase
3-
from neo4j_uploader._logger import ModuleLogger
2+
from neo4j_uploader._logger import logger
3+
from typing import Tuple
44

5-
def validate_credentials(creds: (str, str, str)):
5+
6+
def validate_credentials(creds: Tuple[str, str, str]):
67
host, user, password = creds
78
with GraphDatabase.driver(host, auth=(user, password)) as driver:
89
driver.verify_connectivity()
910

10-
def upload_query(creds: (str, str, str), query, params={}, database: str = "neo4j"):
11+
12+
def upload_query(
13+
creds: Tuple[str, str, str],
14+
query,
15+
params={},
16+
database: str = "neo4j",
17+
):
1118
host, user, password = creds
1219
with GraphDatabase.driver(host, auth=(user, password)) as driver:
13-
_, summary, _ = driver.execute_query(query, params, database=database)
20+
_, summary, _ = driver.execute_query(query, params, database=database)
1421
return summary
1522

16-
def execute_query(creds: (str, str, str), query, params={}, database: str = "neo4j"):
23+
24+
def execute_query(
25+
creds: Tuple[str, str, str],
26+
query,
27+
params={},
28+
database: str = "neo4j",
29+
):
1730
host, user, password = creds
18-
ModuleLogger().debug(f'Using host: {host}, user: {user} to execute query: {query}')
31+
logger.debug(f"Using host: {host}, user: {user} to execute query: {query}")
1932
# Returns a tuple of records, summary, keys
2033
with GraphDatabase.driver(host, auth=(user, password)) as driver:
2134
return driver.execute_query(query, params, database=database)
2235

23-
def drop_constraints(creds: (str, str, str), database: str = "neo4j"):
24-
query = 'SHOW CONSTRAINTS'
36+
37+
def run_query(
38+
uri: str,
39+
username: str,
40+
password: str,
41+
query: str,
42+
params: dict = {},
43+
database: str = "neo4j",
44+
):
45+
with GraphDatabase.driver(uri, auth=(username, password)) as driver:
46+
return driver.execute_query(query, params, database=database)
47+
48+
49+
def drop_constraints(
50+
creds: Tuple[str, str, str],
51+
database: str = "neo4j",
52+
):
53+
query = "SHOW CONSTRAINTS"
2554
result = execute_query(creds, query, database=database)
2655

27-
ModuleLogger().info(f"Drop constraints results: {result}")
56+
logger.info(f"Drop constraints results: {result}")
2857

2958
# Have to make a drop constraint request for each individually!
3059
for record in result.records:
@@ -33,31 +62,42 @@ def drop_constraints(creds: (str, str, str), database: str = "neo4j"):
3362
drop_query = f"DROP CONSTRAINT {constraint_name}"
3463
drop_result = execute_query(creds, drop_query, database=database)
3564

36-
ModuleLogger().info(f"Drop constraint {constraint_name} results: {drop_result}")
65+
logger.info(f"Drop constraint {constraint_name} results: {drop_result}")
3766

3867
# This should now show empty
3968
result = execute_query(creds, query, database=database)
4069

4170
return result
4271

43-
def reset(creds : (str, str, str), database: str = "neo4j"):
4472

73+
def reset(
74+
creds: Tuple[str, str, str],
75+
database: str = "neo4j",
76+
):
4577
drop_constraints(creds, database)
4678

47-
# Clears nodes and relationships - but labels remain and can only be cleared via GUI
48-
query = """MATCH (n) DETACH DELETE n"""
49-
records, summary, keys = execute_query(creds, query, database=database)
79+
deleted_nodes_count = -1
80+
while deleted_nodes_count != 0:
81+
query = """
82+
MATCH (n)
83+
OPTIONAL MATCH (n)-[r]-()
84+
WITH n, r LIMIT 50000
85+
DELETE n, r
86+
RETURN count(n) as deletedNodesCount
87+
"""
88+
records, summary, keys = execute_query(creds, query, database=database)
89+
deleted_nodes_count = records[0]["deletedNodesCount"]
5090

51-
ModuleLogger().info(f"Reset results: {summary}")
5291
return summary
5392

93+
5494
def create_new_node_constraints(
55-
creds: (str, str, str),
56-
node_key: str,
57-
database: str = "neo4j"
95+
creds: Tuple[str, str, str],
96+
node_key: str,
97+
database: str = "neo4j",
5898
):
5999
query = f"""CREATE CONSTRAINT node_key IF NOT EXISTS FOR (u:`{node_key}`)\nREQUIRE u.`node_key` IS UNIQUE"""
60100
result = execute_query(creds, query, database=database)
61101

62-
ModuleLogger().info(f"Create new constraints results: {result}")
102+
logger.info(f"Create new constraints results: {result}")
63103
return result

0 commit comments

Comments
 (0)