Skip to content

Commit

Permalink
Metadata ordering (#132)
Browse files Browse the repository at this point in the history
* fixing items

* version increment

* change ordering and provide metadata creation

* adding more logging information

* automatically upload ndjson to info sibling folder
  • Loading branch information
BryonLewis authored Apr 19, 2024
1 parent d429715 commit 21e2418
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 5 deletions.
111 changes: 111 additions & 0 deletions scripts/generateMetadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import click
import girder_client
import json

apiURL = "127.0.0.1" # url of the server
port = 8010 # set to your local port being used
rootFolderId = '662155dc50595d98ae1821d6' # root folderId to recursively look at
limit = 5 # only want to process X videos
# Below is a global variable which shouldn't be edited
totalFolders = 0 # use to maintain a total count of items added

def login():
gc = girder_client.GirderClient(apiURL, port=port, apiRoot='girder/api/v1', scheme='http')
gc.authenticate(interactive=True)
return gc

def remove_before_folder(path, folder_name):
index = path.find(folder_name)
if index != -1:
return path[index:]
else:
return None


def load_ndjson(file_path):
data = []
with open(file_path, 'r') as file:
for line in file:
data.append(json.loads(line))
return data

def create_folder(gc: girder_client.GirderClient, parentId: str, name: str):
result = gc.createFolder(parentId=parentId, name=name)
return str(result["_id"])

def get_or_create_folder(gc: girder_client.GirderClient, path: str, root: str, existing):
modified_path = remove_before_folder(path, root)
if modified_path is None:
modified_path = remove_before_folder(path, 'rawdata')
base_folder_id = existing[root]
if modified_path not in existing.keys():
splits = modified_path.split('/')
base_item = ""
for index, item in enumerate(splits):

if index == len(splits) - 1:
# now we upload the video
new_id = create_folder(gc, base_folder_id, item)
gc.uploadFileToFolder(new_id, './SampleVideo.mp4', filename=item)
postprocess(gc, new_id)
break
if base_item == "":
modified_item = f'{root}/{item}'
else:
modified_item = f'{base_item}/{item}'
if modified_item in existing.keys():
base_folder_id = existing[modified_item]
else:
new_id = create_folder(gc, base_folder_id, item)
existing[modified_item] = new_id
base_folder_id = new_id

base_item = modified_item



def generate_structure(data):
for item in data:
path= item['Key']


def postprocess(gc: girder_client.GirderClient, folderId: str):
global totalFolders
if totalFolders > limit: # after the limit just stop.
return
folderData = gc.getFolder(folderId)
gc.addMetadataToFolder(folderId, { 'fps' :20, 'annotate': True, 'type': 'video', 'originalFPS': 20})
meta = folderData.get('meta', {})
# We need to mark this folder for post processing
#gc.post(f'dive_rpc/postprocess/{folderId}', data={'skipTranscoding': True})
print(f'Running Post Process on Folder: {folderData["name"]}')
return


@click.command(
name="MetadataCreation",
help="Creates a metadat folder structure from and ndJSON file"
)
@click.argument('ndfile')
def run_script(ndfile):
gc = login()
folderData = gc.getFolder(rootFolderId)
existing = {}
existing[folderData['name']] = rootFolderId
data = load_ndjson(ndfile)

total = len(data)
count = 0
for item in data:
replaced = item["Key"].replace('\\', '')
count += 1
print(f'Completed item: {count} of {total}')
get_or_create_folder(gc, replaced, folderData['name'], existing)
# now we place the ndjson file in a info folder
info_folder = gc.createFolder(rootFolderId, name="info")
gc.uploadFileToFolder(info_folder["_id"], ndfile)



if __name__ == "__main__":
run_script()
11 changes: 6 additions & 5 deletions server/dive_server/views_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def load_metadata_json(search_folder, type='ndjson'):
Folder().childItems(
search_folder,
filters={"lowerName": {"$regex": regex}},
sort=[("updated", pymongo.ASCENDING)],
sort=[("updated", pymongo.DESCENDING)],
)
)
if len(json_items) > 0:
Expand All @@ -96,7 +96,7 @@ def load_metadata_json(search_folder, type='ndjson'):
if not isinstance(json_data, list):
print("JSON metadata isn't an array")
return False
return json_data
return json_data, file['name']


class DIVEMetadata(Resource):
Expand Down Expand Up @@ -193,8 +193,9 @@ def process_metadata(
data = None
errorLog = []
added = 0
dataFileName = ''
if fileType in ['json', 'ndjson']:
data = load_metadata_json(search_folder, fileType)
data, dataFileName = load_metadata_json(search_folder, fileType)
if not data:
return False
else:
Expand Down Expand Up @@ -232,7 +233,6 @@ def process_metadata(
}
)
resource_path = ""
print(modified_key_paths)
for datasetFolder in results:
resource_path = path_util.getResourcePath(
'folder', datasetFolder, user=user
Expand Down Expand Up @@ -274,7 +274,7 @@ def process_metadata(
for key in item.keys():
if key not in metadataKeys.keys() and item[key] is not None:
datatype = python_to_javascript_type(type(item[key]))
metadataKeys[key] = {"type": datatype, "set": set(), "count": 1}
metadataKeys[key] = {"type": datatype, "set": set(), "count": 0}
if item[key] is None:
continue # we skip null values for processing
if metadataKeys[key]['type'] == 'string':
Expand Down Expand Up @@ -317,6 +317,7 @@ def process_metadata(
Folder().save(folder)

return {
"dataFileName": dataFileName,
"results": f"added {added} folders",
"errors": errorLog,
"metadataKeys": metadataKeys,
Expand Down

0 comments on commit 21e2418

Please sign in to comment.