From 4932c943c5906d620cf6dafb60ac7211e33434c6 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 26 Mar 2024 14:01:59 +0100 Subject: [PATCH] add csv reader fields size limit function --- psm_utils/io/_utils.py | 27 +++++++++++++++++++++++++++ psm_utils/io/ionbot.py | 13 ++++++++----- psm_utils/io/maxquant.py | 3 +++ psm_utils/io/msamanda.py | 3 +++ psm_utils/io/peptide_record.py | 3 +++ psm_utils/io/percolator.py | 3 +++ psm_utils/io/sage.py | 3 +++ psm_utils/io/tsv.py | 4 ++++ 8 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 psm_utils/io/_utils.py diff --git a/psm_utils/io/_utils.py b/psm_utils/io/_utils.py new file mode 100644 index 0000000..1befd36 --- /dev/null +++ b/psm_utils/io/_utils.py @@ -0,0 +1,27 @@ +import sys +import csv + + +def set_csv_field_size_limit(): + """ + Sets the maximum field size limit for reading CSV files. + + This function sets the maximum field size limit for reading CSV files using the `csv` module. + It attempts to set the limit to the maximum integer value (`sys.maxsize`), and if an `OverflowError` + occurs, it reduces the limit by dividing it by 10 until it can be set successfully. + + Note: + This function should be called before reading any CSV files to ensure that the field size limit + is properly set. + + + """ + maxInt = sys.maxsize + + while maxInt > 1: + print(maxInt) + try: + csv.field_size_limit(maxInt) + break + except OverflowError: + maxInt = int(maxInt / 10) diff --git a/psm_utils/io/ionbot.py b/psm_utils/io/ionbot.py index 95ad274..43ad511 100644 --- a/psm_utils/io/ionbot.py +++ b/psm_utils/io/ionbot.py @@ -16,6 +16,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() REQUIRED_COLUMNS = [ "database_peptide", @@ -89,11 +92,11 @@ def _get_peptide_spectrum_match(self, psm_dict: Dict[str, str | float]) -> PSM: ), spectrum_id=psm_dict["spectrum_title"], run=psm_dict["spectrum_file"], - is_decoy=True - if psm_dict["database"] == "D" - else False - if psm_dict["database"] == "T" - else None, + is_decoy=( + True + if psm_dict["database"] == "D" + else False if psm_dict["database"] == "T" else None + ), score=float(psm_dict["psm_score"]), precursor_mz=float(psm_dict["m/z"]), retention_time=float(psm_dict["observed_retention_time"]), diff --git a/psm_utils/io/maxquant.py b/psm_utils/io/maxquant.py index 99b13a1..f51aaf6 100644 --- a/psm_utils/io/maxquant.py +++ b/psm_utils/io/maxquant.py @@ -14,6 +14,9 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__) diff --git a/psm_utils/io/msamanda.py b/psm_utils/io/msamanda.py index bbbeab0..67d9ccb 100644 --- a/psm_utils/io/msamanda.py +++ b/psm_utils/io/msamanda.py @@ -13,6 +13,9 @@ from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase from psm_utils.psm import PSM, Peptidoform +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__) diff --git a/psm_utils/io/peptide_record.py b/psm_utils/io/peptide_record.py index 2c1e6da..8afb97f 100644 --- a/psm_utils/io/peptide_record.py +++ b/psm_utils/io/peptide_record.py @@ -66,6 +66,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class _PeptideRecord: diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py index ae7521a..586d9ee 100644 --- a/psm_utils/io/percolator.py +++ b/psm_utils/io/percolator.py @@ -25,6 +25,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class PercolatorTabReader(ReaderBase): diff --git a/psm_utils/io/sage.py b/psm_utils/io/sage.py index 25abe82..c7a849e 100644 --- a/psm_utils/io/sage.py +++ b/psm_utils/io/sage.py @@ -18,6 +18,9 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class SageReader(ReaderBase): diff --git a/psm_utils/io/tsv.py b/psm_utils/io/tsv.py index 9fc0089..213358c 100644 --- a/psm_utils/io/tsv.py +++ b/psm_utils/io/tsv.py @@ -45,6 +45,7 @@ """ + from __future__ import annotations import ast @@ -59,6 +60,9 @@ from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__)