From 49374485fe3979de5710888c3243e9cab8dd0223 Mon Sep 17 00:00:00 2001 From: dacolombo Date: Mon, 7 Oct 2024 15:49:33 +0200 Subject: [PATCH 1/2] Add get_datasets to populate the index page of xiview-server --- app/routes/xiview.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/app/routes/xiview.py b/app/routes/xiview.py index 3e4f560..4bd3cf1 100644 --- a/app/routes/xiview.py +++ b/app/routes/xiview.py @@ -542,6 +542,32 @@ async def get_xiview_proteins(project, file=None): log_json_size(json_bytes, "proteins") # slows things down a little return Response(json_bytes, media_type='application/json') +@log_execution_time_async +@xiview_data_router.get('/get_datasets', tags=["xiVIEW"]) +async def get_datasets(): + conn = None + ds_rows = [] + error = None + try: + conn = await get_db_connection() + cur = conn.cursor() + query = """SELECT DISTINCT project_id, identification_file_name FROM upload;""" + # logger.debug(query) + cur.execute(query) + ds_rows = cur.fetchall() + # logger.info("finished") + cur.close() + except (Exception, psycopg2.DatabaseError) as e: + print(e) + error = e + finally: + if conn is not None: + conn.close() + # logger.debug('Database connection closed.') + if error is not None: + raise error + return ds_rows + @log_execution_time_async async def get_all_proteins(cur, ids): From 7ac46199e7ac1d0e43a0aa2f707d56819a1714f7 Mon Sep 17 00:00:00 2001 From: colin combe Date: Mon, 28 Oct 2024 09:36:18 +0000 Subject: [PATCH 2/2] for parallel xiview data requests --- app/routes/xiview.py | 85 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 14 deletions(-) diff --git a/app/routes/xiview.py b/app/routes/xiview.py index 4bd3cf1..7e12dcf 100644 --- a/app/routes/xiview.py +++ b/app/routes/xiview.py @@ -6,11 +6,12 @@ import fastapi import psycopg2 -from fastapi import APIRouter, Depends, Request, Response +from fastapi import APIRouter, Depends, Request, Response, HTTPException, status import orjson from psycopg2 import sql from psycopg2.extras import RealDictCursor from sqlalchemy.orm import session, Session +from typing import List, Any, Optional from models.upload import Upload from app.routes.shared import get_db_connection, get_most_recent_upload_ids, log_execution_time_async @@ -19,6 +20,27 @@ xiview_data_router = APIRouter() +# async def execute_query(query: str, params: Optional[List[Any]] = None, fetch_one: bool = False): +# """ +# Execute a query and return the result +# :param query: the query to execute +# :param params: the parameters to pass to the query +# :param fetch_one: whether to fetch one result or all +# :return: the result of the query +# """ +# try: +# conn = await get_db_connection() +# with conn.cursor() as cur: +# cur.execute(query, params) +# result = cur.fetchone() if fetch_one else cur.fetchall() +# conn.commit() +# return result +# except Exception as e: +# logging.error(f"Database operation failed: {e}") +# raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Database operation failed") +# finally: +# conn.close() + class EndpointFilter(logging.Filter): """ @@ -102,7 +124,7 @@ def visualisations(project_id: str, request: Request, session: Session = Depends if filename not in processed_filenames: datafile = { "filename": filename, - "visualisation": "cross-linking", + "visualisation": "cross-linking", # todo - we're not hyphenating crosslinking "link": (xiview_base_url + "?project=" + project_id + "&file=" + str(filename)) } @@ -226,10 +248,45 @@ async def get_results_metadata(cur, ids): return metadata +@log_execution_time_async +@xiview_data_router.get('/get_xiview_metadata', tags=["xiVIEW"]) +async def get_xiview_metadata(project, file=None): + """ + Get the metadata for the xiVIEW visualisation. + URLs have the following structure: + https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453&file=Cullin_SDA_1pcFDR.mzid + Users may provide only projects, meaning we need to have an aggregated view. + https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_metadata?project=PXD020453 + + :return: json of the metadata + """ + logger.info(f"get_xiview_metadata for {project}, file: {file}") + most_recent_upload_ids = await get_most_recent_upload_ids(project, file) + + conn = None + data = {} + error = None + + try: + conn = await get_db_connection() + cur = conn.cursor(cursor_factory=RealDictCursor) + data = await get_results_metadata(cur, most_recent_upload_ids) + cur.close() + except (Exception, psycopg2.DatabaseError) as e: + logger.error(e) + return {"error": "Database error"}, 500 + finally: + if conn is not None: + conn.close() + + start_time = time.time() + json_bytes = orjson.dumps(data) + logger.info(f'metadata json dump time: {time.time() - start_time}') + log_json_size(json_bytes, "metadata") + return Response(json_bytes, media_type='application/json') @log_execution_time_async async def get_matches(cur, ids): - # todo - check whats going on with this rank =1 and pass_threshold = True in mascot data, rank =1 condition seems to speeds things up (but should be redundant) # todo - rename 'si' to 'm' query = """WITH submodpep AS (SELECT * FROM modifiedpeptide WHERE upload_id = ANY(%s) AND link_site1 > -1) SELECT si.id AS id, si.pep1_id AS pi1, si.pep2_id AS pi2, @@ -380,7 +437,7 @@ async def get_xiview_matches(project, file=None): @log_execution_time_async -@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"]) +# @xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"]) async def get_xiview_peptides(project, file=None): """ Get all the peptides. @@ -442,7 +499,7 @@ async def get_all_peptides(cur, ids): return cur.fetchall() @log_execution_time_async -@xiview_data_router.get('/get_xiview_peptides2', tags=["xiVIEW"]) +@xiview_data_router.get('/get_xiview_peptides', tags=["xiVIEW"]) async def get_xiview_peptides2(project, file=None): """ Get all the peptides. @@ -542,6 +599,15 @@ async def get_xiview_proteins(project, file=None): log_json_size(json_bytes, "proteins") # slows things down a little return Response(json_bytes, media_type='application/json') +@log_execution_time_async +async def get_all_proteins(cur, ids): + query = """SELECT id, name, accession, sequence, + cast(upload_id as text) AS search_id, description FROM dbsequence + WHERE upload_id = ANY(%s) + ;""" + cur.execute(query, [ids]) + return cur.fetchall() + @log_execution_time_async @xiview_data_router.get('/get_datasets', tags=["xiVIEW"]) async def get_datasets(): @@ -568,12 +634,3 @@ async def get_datasets(): raise error return ds_rows - -@log_execution_time_async -async def get_all_proteins(cur, ids): - query = """SELECT id, name, accession, sequence, - cast(upload_id as text) AS search_id, description FROM dbsequence - WHERE upload_id = ANY(%s) - ;""" - cur.execute(query, [ids]) - return cur.fetchall()