Merge pull request #15 from Rappsilber-Laboratory/xiview-requests

for parallel Xiview http requests
PRIDE-Archive · Oct 29, 2024 · 776535e · 776535e
2 parents 9da56fe + 7ac4619
commit 776535e
Showing 1 changed file with 89 additions and 6 deletions.
diff --git a/app/routes/xiview.py b/app/routes/xiview.py
@@ -6,11 +6,12 @@
 
 import fastapi
 import psycopg2
-from fastapi import APIRouter, Depends, Request, Response
+from fastapi import APIRouter, Depends, Request, Response, HTTPException, status
 import orjson
 from psycopg2 import sql
 from psycopg2.extras import RealDictCursor
 from sqlalchemy.orm import session, Session
+from typing import List, Any, Optional
 
 from models.upload import Upload
 from app.routes.shared import get_db_connection, get_most_recent_upload_ids, log_execution_time_async
@@ -19,6 +20,27 @@
 
 xiview_data_router = APIRouter()
 
+# async def execute_query(query: str, params: Optional[List[Any]] = None, fetch_one: bool = False):
+#     """
+#     Execute a query and return the result
+#     :param query: the query to execute
+#     :param params: the parameters to pass to the query
+#     :param fetch_one: whether to fetch one result or all
+#     :return: the result of the query
+#     """
+#     try:
+#         conn = await get_db_connection()
+#         with conn.cursor() as cur:
+#             cur.execute(query, params)
+#             result = cur.fetchone() if fetch_one else cur.fetchall()
+#         conn.commit()
+#         return result
+#     except Exception as e:
+#         logging.error(f"Database operation failed: {e}")
+#         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Database operation failed")
+#     finally:
+#             conn.close()
+
 
 class EndpointFilter(logging.Filter):
     """
@@ -102,7 +124,7 @@ def visualisations(project_id: str, request: Request, session: Session = Depends
         if filename not in processed_filenames:
             datafile = {
                 "filename": filename,
-                "visualisation": "cross-linking",
+                "visualisation": "cross-linking", # todo - we're not hyphenating crosslinking
                 "link": (xiview_base_url + "?project=" + project_id + "&file=" +
                          str(filename))
             }
@@ -226,10 +248,45 @@ async def get_results_metadata(cur, ids):
 
     return metadata
 
+@log_execution_time_async
+@xiview_data_router.get('/get_xiview_metadata', tags=["xiVIEW"])
+async def get_xiview_metadata(project, file=None):
+    """
+    Get the metadata for the xiVIEW visualisation.
+    URLs have the following structure:
+    https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453&file=Cullin_SDA_1pcFDR.mzid
+    Users may provide only projects, meaning we need to have an aggregated view.
+    https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453
+
+    :return: json of the metadata
+    """
+    logger.info(f"get_xiview_metadata for {project}, file: {file}")
+    most_recent_upload_ids = await get_most_recent_upload_ids(project, file)
+
+    conn = None
+    data = {}
+    error = None
+
+    try:
+        conn = await get_db_connection()
+        cur = conn.cursor(cursor_factory=RealDictCursor)
+        data = await get_results_metadata(cur, most_recent_upload_ids)
+        cur.close()
+    except (Exception, psycopg2.DatabaseError) as e:
+        logger.error(e)
+        return {"error": "Database error"}, 500
+    finally:
+        if conn is not None:
+            conn.close()
+
+    start_time = time.time()
+    json_bytes = orjson.dumps(data)
+    logger.info(f'metadata json dump time: {time.time() - start_time}')
+    log_json_size(json_bytes, "metadata")
+    return Response(json_bytes, media_type='application/json')
 
 @log_execution_time_async
 async def get_matches(cur, ids):
-    # todo - check whats going on with this rank =1 and pass_threshold = True in mascot data, rank =1 condition seems to speeds things up (but should be redundant)
     # todo - rename 'si' to 'm'
     query = """WITH submodpep AS (SELECT * FROM modifiedpeptide WHERE upload_id = ANY(%s) AND link_site1 > -1)
 SELECT si.id AS id, si.pep1_id AS pi1, si.pep2_id AS pi2,
@@ -380,7 +437,7 @@ async def get_xiview_matches(project, file=None):
 
 
 @log_execution_time_async
-@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
+# @xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
 async def get_xiview_peptides(project, file=None):
     """
     Get all the peptides.
@@ -442,7 +499,7 @@ async def get_all_peptides(cur, ids):
     return cur.fetchall()
 
 @log_execution_time_async
-@xiview_data_router.get('/get_xiview_peptides2', tags=["xiVIEW"])
+@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
 async def get_xiview_peptides2(project, file=None):
     """
     Get all the peptides.
@@ -542,7 +599,6 @@ async def get_xiview_proteins(project, file=None):
     log_json_size(json_bytes, "proteins")  # slows things down a little
     return Response(json_bytes, media_type='application/json')
 
-
 @log_execution_time_async
 async def get_all_proteins(cur, ids):
     query = """SELECT id, name, accession, sequence,
@@ -551,3 +607,30 @@ async def get_all_proteins(cur, ids):
                 ;"""
     cur.execute(query, [ids])
     return cur.fetchall()
+
+@log_execution_time_async
+@xiview_data_router.get('/get_datasets', tags=["xiVIEW"])
+async def get_datasets():
+    conn = None
+    ds_rows = []
+    error = None
+    try:
+        conn = await get_db_connection()
+        cur = conn.cursor()
+        query = """SELECT DISTINCT project_id, identification_file_name FROM upload;"""
+        # logger.debug(query)
+        cur.execute(query)
+        ds_rows = cur.fetchall()
+        # logger.info("finished")
+        cur.close()
+    except (Exception, psycopg2.DatabaseError) as e:
+        print(e)
+        error = e
+    finally:
+        if conn is not None:
+            conn.close()
+            # logger.debug('Database connection closed.')
+        if error is not None:
+            raise error
+    return ds_rows
+