From 49374485fe3979de5710888c3243e9cab8dd0223 Mon Sep 17 00:00:00 2001
From: dacolombo <daniele.colombo39@gmail.com>
Date: Mon, 7 Oct 2024 15:49:33 +0200
Subject: [PATCH 1/2] Add get_datasets to populate the index page of
 xiview-server

---
 app/routes/xiview.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/app/routes/xiview.py b/app/routes/xiview.py
index 3e4f560..4bd3cf1 100644
--- a/app/routes/xiview.py
+++ b/app/routes/xiview.py
@@ -542,6 +542,32 @@ async def get_xiview_proteins(project, file=None):
     log_json_size(json_bytes, "proteins")  # slows things down a little
     return Response(json_bytes, media_type='application/json')
 
+@log_execution_time_async
+@xiview_data_router.get('/get_datasets', tags=["xiVIEW"])
+async def get_datasets():
+    conn = None
+    ds_rows = []
+    error = None
+    try:
+        conn = await get_db_connection()
+        cur = conn.cursor()
+        query = """SELECT DISTINCT project_id, identification_file_name FROM upload;"""
+        # logger.debug(query)
+        cur.execute(query)
+        ds_rows = cur.fetchall()
+        # logger.info("finished")
+        cur.close()
+    except (Exception, psycopg2.DatabaseError) as e:
+        print(e)
+        error = e
+    finally:
+        if conn is not None:
+            conn.close()
+            # logger.debug('Database connection closed.')
+        if error is not None:
+            raise error
+    return ds_rows
+
 
 @log_execution_time_async
 async def get_all_proteins(cur, ids):

From 7ac46199e7ac1d0e43a0aa2f707d56819a1714f7 Mon Sep 17 00:00:00 2001
From: colin combe <colin.combe@ed.ac.uk>
Date: Mon, 28 Oct 2024 09:36:18 +0000
Subject: [PATCH 2/2] for parallel xiview data requests

---
 app/routes/xiview.py | 85 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/app/routes/xiview.py b/app/routes/xiview.py
index 4bd3cf1..7e12dcf 100644
--- a/app/routes/xiview.py
+++ b/app/routes/xiview.py
@@ -6,11 +6,12 @@
 
 import fastapi
 import psycopg2
-from fastapi import APIRouter, Depends, Request, Response
+from fastapi import APIRouter, Depends, Request, Response, HTTPException, status
 import orjson
 from psycopg2 import sql
 from psycopg2.extras import RealDictCursor
 from sqlalchemy.orm import session, Session
+from typing import List, Any, Optional
 
 from models.upload import Upload
 from app.routes.shared import get_db_connection, get_most_recent_upload_ids, log_execution_time_async
@@ -19,6 +20,27 @@
 
 xiview_data_router = APIRouter()
 
+# async def execute_query(query: str, params: Optional[List[Any]] = None, fetch_one: bool = False):
+#     """
+#     Execute a query and return the result
+#     :param query: the query to execute
+#     :param params: the parameters to pass to the query
+#     :param fetch_one: whether to fetch one result or all
+#     :return: the result of the query
+#     """
+#     try:
+#         conn = await get_db_connection()
+#         with conn.cursor() as cur:
+#             cur.execute(query, params)
+#             result = cur.fetchone() if fetch_one else cur.fetchall()
+#         conn.commit()
+#         return result
+#     except Exception as e:
+#         logging.error(f"Database operation failed: {e}")
+#         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Database operation failed")
+#     finally:
+#             conn.close()
+
 
 class EndpointFilter(logging.Filter):
     """
@@ -102,7 +124,7 @@ def visualisations(project_id: str, request: Request, session: Session = Depends
         if filename not in processed_filenames:
             datafile = {
                 "filename": filename,
-                "visualisation": "cross-linking",
+                "visualisation": "cross-linking", # todo - we're not hyphenating crosslinking
                 "link": (xiview_base_url + "?project=" + project_id + "&file=" +
                          str(filename))
             }
@@ -226,10 +248,45 @@ async def get_results_metadata(cur, ids):
 
     return metadata
 
+@log_execution_time_async
+@xiview_data_router.get('/get_xiview_metadata', tags=["xiVIEW"])
+async def get_xiview_metadata(project, file=None):
+    """
+    Get the metadata for the xiVIEW visualisation.
+    URLs have the following structure:
+    https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453&file=Cullin_SDA_1pcFDR.mzid
+    Users may provide only projects, meaning we need to have an aggregated view.
+    https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453
+
+    :return: json of the metadata
+    """
+    logger.info(f"get_xiview_metadata for {project}, file: {file}")
+    most_recent_upload_ids = await get_most_recent_upload_ids(project, file)
+
+    conn = None
+    data = {}
+    error = None
+
+    try:
+        conn = await get_db_connection()
+        cur = conn.cursor(cursor_factory=RealDictCursor)
+        data = await get_results_metadata(cur, most_recent_upload_ids)
+        cur.close()
+    except (Exception, psycopg2.DatabaseError) as e:
+        logger.error(e)
+        return {"error": "Database error"}, 500
+    finally:
+        if conn is not None:
+            conn.close()
+
+    start_time = time.time()
+    json_bytes = orjson.dumps(data)
+    logger.info(f'metadata json dump time: {time.time() - start_time}')
+    log_json_size(json_bytes, "metadata")
+    return Response(json_bytes, media_type='application/json')
 
 @log_execution_time_async
 async def get_matches(cur, ids):
-    # todo - check whats going on with this rank =1 and pass_threshold = True in mascot data, rank =1 condition seems to speeds things up (but should be redundant)
     # todo - rename 'si' to 'm'
     query = """WITH submodpep AS (SELECT * FROM modifiedpeptide WHERE upload_id = ANY(%s) AND link_site1 > -1)
 SELECT si.id AS id, si.pep1_id AS pi1, si.pep2_id AS pi2,
@@ -380,7 +437,7 @@ async def get_xiview_matches(project, file=None):
 
 
 @log_execution_time_async
-@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
+# @xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
 async def get_xiview_peptides(project, file=None):
     """
     Get all the peptides.
@@ -442,7 +499,7 @@ async def get_all_peptides(cur, ids):
     return cur.fetchall()
 
 @log_execution_time_async
-@xiview_data_router.get('/get_xiview_peptides2', tags=["xiVIEW"])
+@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"])
 async def get_xiview_peptides2(project, file=None):
     """
     Get all the peptides.
@@ -542,6 +599,15 @@ async def get_xiview_proteins(project, file=None):
     log_json_size(json_bytes, "proteins")  # slows things down a little
     return Response(json_bytes, media_type='application/json')
 
+@log_execution_time_async
+async def get_all_proteins(cur, ids):
+    query = """SELECT id, name, accession, sequence,
+                     cast(upload_id as text) AS search_id, description FROM dbsequence
+                     WHERE upload_id = ANY(%s)
+                ;"""
+    cur.execute(query, [ids])
+    return cur.fetchall()
+
 @log_execution_time_async
 @xiview_data_router.get('/get_datasets', tags=["xiVIEW"])
 async def get_datasets():
@@ -568,12 +634,3 @@ async def get_datasets():
             raise error
     return ds_rows
 
-
-@log_execution_time_async
-async def get_all_proteins(cur, ids):
-    query = """SELECT id, name, accession, sequence,
-                     cast(upload_id as text) AS search_id, description FROM dbsequence
-                     WHERE upload_id = ANY(%s)
-                ;"""
-    cur.execute(query, [ids])
-    return cur.fetchall()