Skip to content

Commit

Permalink
task/WC-171 - Digital Rocks Migration Scripts (#1061)
Browse files Browse the repository at this point in the history
* migration utils for drp

* add exception handling

* add settings for drp mysql db

* update poetry lock file

* update drp sql utils

* fixes for pub date and string bug

* bug fix for sample path and added field sanitization

* fix for analysis data and pub doi errors
  • Loading branch information
shayanaijaz authored Feb 24, 2025
1 parent 056b184 commit 6a9e8e9
Show file tree
Hide file tree
Showing 9 changed files with 852 additions and 109 deletions.
233 changes: 125 additions & 108 deletions server/poetry.lock

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions server/portal/apps/_custom/drp/metadata_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Mapping of metadata fields from old DRP database to new DRP metadata model."""

SAMPLE_POROUS_MEDIA_TYPE_MAPPINGS = {
'Beads': 'beads',
'BEAD': 'beads',
'Sandstone': 'sandstone',
'SAND': 'sandstone',
'CARB': 'carbonate',
'SOIL': 'soil',
'FIBR': 'fibrous_media',
'GRAN': 'granite',
'COAL': 'coal',
'Other': 'other',
'OTHE': 'other',
}

SAMPLE_SOURCE_MAPPINGS = {
'Artificial': 'artificial',
'A': 'artificial',
'Natural': 'natural',
'N': 'natural',
}

ORIGIN_DATA_IS_SEGMENTED_MAPPING = {
1: 'yes',
2: 'no'
}

ORIGIN_DATA_VOXEL_UNIT_MAPPING = {
'micrometer': 'micrometer',
'um': 'micrometer',
'mm': 'millimeter',
'nm': 'nanometer',
'other': 'other'
}

ANALYSIS_DATA_TYPE_MAPPING = {
'Simulation': 'simulation',
'GeometricAnalysis': 'geometric_analysis',
'Other': 'other',
}

FILE_IMAGE_TYPE_MAPPING = {
'8-bit': '8_bit',
'64-bit Real': '64_bit_real',
'16-bit Unsigned': '16_bit_unsigned',
'32-bit Real': '32_bit_real',
'32-bit Signed': '32_bit_signed',
'24-bit RGB': '24_bit_rgb',
'32-bit Unsigned': '32_bit_unsigned',
}

FILE_BYTE_ORDER_MAPPING = {
'little-endian': 'little_endian',
'big-endian': 'big_endian',
}

FILE_USE_BINARY_CORRECTION_MAPPING = {
1: True,
0: False,
}
2 changes: 2 additions & 0 deletions server/portal/apps/_custom/drp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class FileObj(DrpMetadataModel):
system: str
name: str
path: str
legacy_path: Optional[str] = None
type: Literal["file", "dir"]
length: Optional[int] = None
last_modified: Optional[str] = None
Expand Down Expand Up @@ -97,6 +98,7 @@ class DrpProjectRelatedPublications(DrpMetadataModel):

publication_title: str
publication_author: str
publication_doi: str
publication_date_of_publication: str
publication_publisher: str
publication_description: Optional[str] = None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from django.core.management.base import BaseCommand
from django.conf import settings
import networkx as nx
from portal.apps.projects.migration_utils.sql_db_utils import get_project_by_id, query_projects, query_published_projects
from portal.apps.publications.models import Publication
from portal.libs.agave.utils import service_account
from portal.apps.projects.workspace_operations.project_publish_operations import _add_values_to_tree

class Command(BaseCommand):
help = "Migrate DRP project files to the new location."

def add_arguments(self, parser):
parser.add_argument(
'--dry-run',
action='store_true',
help="Run the command without saving changes."
)

parser.add_argument(
'--project-id',
type=str,
help="The ID of the project to migrate."
)

parser.add_argument(
'--publication',
action='store_true',
help="Moves files to a published project location"
)

parser.add_argument(
'--project',
action='store_true',
help="Moves files to a regular project location"
)

def handle(self, *args, **options):
self.dry_run = options['dry_run']

if not options['project'] and not options['publication']:
print("Please specify either --project or --publication")
return

if options['project'] and options['publication']:
print("Please specify either --project or --publication, not both")
return

self.publication = options['publication']
self.project = options['project']

if options['project_id']:
projects = get_project_by_id(options['project_id'])
elif self.publication:
projects = query_published_projects()
else:
projects = query_projects()

client = service_account()

for project in projects:
try:
pub_id = f"{settings.PORTAL_PROJECTS_ID_PREFIX}-{project['id']}"
project_prefix = settings.PORTAL_PROJECTS_PUBLISHED_SYSTEM_PREFIX if self.publication else settings.PORTAL_PROJECTS_SYSTEM_PREFIX
project_id = f'{project_prefix}.{pub_id}'


if self.publication:
pub = Publication.objects.get(project_id=pub_id)
project_graph = pub.tree
else:
project_graph = nx.node_link_data(_add_values_to_tree(project_id))

file_mapping = {}

for node in project_graph.get('nodes', []):
file_objs = node.get("value", {}).get("fileObjs", [])
for file_obj in file_objs:
legacy_path = file_obj.get("legacyPath")
path = file_obj.get("path")
if legacy_path and path:
file_mapping[legacy_path] = path

transfer_elements = []
for legacy_path, new_path in file_mapping.items():

transfer_elements.append(
{
'sourceURI': f'tapis://cloud.data/corral-repl/utexas/pge-nsf/media/{legacy_path.strip("/")}',
'destinationURI': f'tapis://{project_id}/{new_path.strip("/")}'
})

if not self.dry_run:
transfer = client.files.createTransferTask(elements=transfer_elements)
print(f"Transfer started for {len(file_mapping)} files: {transfer}")
else:
print(f"Dry run complete for project {project['id']} with {len(file_mapping)} files to transfer. No changes made.")
except Exception as e:
print(f"Error processing project {project['id']}: {e}")
continue
Loading

0 comments on commit 6a9e8e9

Please sign in to comment.