Skip to content

Commit

Permalink
Task/WC-120: Datacite operations and pipeline integration (#1037)
Browse files Browse the repository at this point in the history
* Rebase datacite operations on latest DRP branch

* integrate datacite utils in pipeline

---------

Co-authored-by: Shayan Khan <skhan@tacc.utexas.edu>
  • Loading branch information
jarosenb and shayanaijaz authored Feb 18, 2025
1 parent cd9f844 commit bf07c6a
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import datetime
from typing import Optional
import json
import requests
import networkx as nx
from django.conf import settings


def get_datacite_json(pub_graph: nx.DiGraph):
"""
Generate datacite payload for a publishable entity. `pub_graph` is the output of
either `get_publication_subtree` or `get_publication_full_tree`.
"""

datacite_json = {}

base_meta_node = "NODE_ROOT"

base_meta = pub_graph.nodes[base_meta_node]["value"]

author_attr = []
institutions = []
for author in base_meta.get("authors", []):
author_attr.append(
{
"nameType": "Personal",
"givenName": author.get("first_name", ""),
"familyName": author.get("last_name", ""),
}
)
institutions.append(author.get("inst", ""))

datacite_json["contributors"] = [
{
"contributorType": "HostingInstitution",
"nameType": "Organizational",
"name": institution,
}
for institution in list(set(institutions))
]
datacite_json["creators"] = author_attr
datacite_json["titles"] = [{"title": base_meta["title"]}]

datacite_json["publisher"] = "Digital Rocks Portal"

datacite_json["publicationYear"] = datetime.datetime.now().year

project_id = base_meta["projectId"]
datacite_url = f"{settings.PORTAL_PUBLICATION_DATACITE_URL_PREFIX}/{project_id}"

datacite_json["url"] = datacite_url
datacite_json["prefix"] = settings.PORTAL_PUBLICATION_DATACITE_SHOULDER

return datacite_json


def upsert_datacite_json(datacite_json: dict, doi: Optional[str] = None):
"""
Create a draft DOI in datacite with the specified metadata. If a DOI is specified,
the metadata for that DOI is updated instead.
"""
if doi:
datacite_json.pop("publicationYear", None)

datacite_payload = {
"data": {
"type": "dois",
"relationships": {
"client": {"data": {"type": "clients", "id": "tdl.tacc"}}
},
"attributes": datacite_json,
}
}
if not doi:
res = requests.post(
f"{settings.DATACITE_URL.strip('/')}/dois",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(datacite_payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
else:
res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(datacite_payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)

return res.json()


def publish_datacite_doi(doi: str):
"""
Set a DOI's status to `Findable` in Datacite.
"""
payload = {"data": {"type": "dois", "attributes": {"event": "publish"}}}

res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
return res.json()


def hide_datacite_doi(doi: str):
"""
Remove a Datacite DOI from public consumption.
"""
payload = {"data": {"type": "dois", "attributes": {"event": "hide"}}}

res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
return res.json()


def get_doi_publication_date(doi: str) -> str:
"""Look up the publication date for a DOI"""
res = requests.get(f"{settings.DATACITE_URL.strip('/')}/dois/{doi}", timeout=30)
res.raise_for_status()
return res.json()["data"]["attributes"]["created"]
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from portal.apps._custom.drp import constants
from portal.libs.agave.utils import user_account, service_account
from portal.apps.publications.models import Publication, PublicationRequest
from portal.apps.projects.workspace_operations.datacite_operations import get_datacite_json, upsert_datacite_json, publish_datacite_doi
from django.db import transaction
from portal.apps.projects.workspace_operations.graph_operations import remove_trash_nodes
from portal.apps.search.tasks import index_publication
Expand Down Expand Up @@ -118,7 +119,12 @@ def publish_project(self, project_id: str, version: Optional[int] = 1):
value=nx.node_link_data(publication_tree),
)

doi = 'test_doi' # Replace with actual DOI retrieval logic
# Mint a DataCite DOI
existing_doi = source_project.value.get("doi", None)

datacite_json = get_datacite_json(publication_tree)
datacite_resp = upsert_datacite_json(datacite_json, doi=existing_doi)
doi = datacite_resp["data"]["id"]

# Update project metadata with datacite doi
source_project_id = f'{settings.PORTAL_PROJECTS_SYSTEM_PREFIX}.{project_id}'
Expand All @@ -140,6 +146,9 @@ def publish_project(self, project_id: str, version: Optional[int] = 1):
defaults={"value": published_project.value, "tree": nx.node_link_data(pub_tree), "version": version},
)

if not settings.DEBUG:
publish_datacite_doi(doi)

index_publication(project_id)

# transfer files
Expand Down
15 changes: 15 additions & 0 deletions server/portal/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,21 @@
PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME = settings_custom.\
_PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME

PORTAL_PUBLICATION_DATACITE_SHOULDER = settings_custom.\
_PORTAL_PUBLICATION_DATACITE_SHOULDER

PORTAL_PUBLICATION_DATACITE_URL_PREFIX = settings_custom.\
_PORTAL_PUBLICATION_DATACITE_URL_PREFIX

DATACITE_URL = settings_custom.\
_DATACITE_URL

DATACITE_USER = settings_secret.\
_DATACITE_USER

DATACITE_PASS = settings_secret.\
_DATACITE_PASS

PORTAL_PROJECTS_PRIVATE_KEY = settings_secret.\
_PORTAL_PROJECTS_PRIVATE_KEY

Expand Down
5 changes: 5 additions & 0 deletions server/portal/settings/settings_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@

_PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME = 'PROJECT_REVIEWER'

# Datacite
_PORTAL_PUBLICATION_DATACITE_SHOULDER = "10.80023"
_PORTAL_PUBLICATION_DATACITE_URL_PREFIX = "https://cep.test/data/tapis/projects/drp.project.published.test"
_DATACITE_URL = "https://api.test.datacite.org/"

########################
# Custom Portal Template Assets
# Asset path root is static files output dir.
Expand Down
3 changes: 3 additions & 0 deletions server/portal/settings/settings_secret.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,6 @@
"directory": "external-resources"
}
}

_DATACITE_USER = "tdl.tacc"
_DATACITE_PASS = "CHANGEME"

0 comments on commit bf07c6a

Please sign in to comment.