From c2a975c9345411ea2a0a6e8a4d071f5571d0ece8 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 26 Mar 2024 14:01:43 +0100 Subject: [PATCH 1/3] pin version of pyteomics --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ba85cab..b883a60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ dynamic = ["version"] requires-python = ">=3.7" dependencies = [ - "pyteomics >= 4", + "pyteomics >= 4, <4.7", "pyopenms", "lxml", "psims", From 4932c943c5906d620cf6dafb60ac7211e33434c6 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 26 Mar 2024 14:01:59 +0100 Subject: [PATCH 2/3] add csv reader fields size limit function --- psm_utils/io/_utils.py | 27 +++++++++++++++++++++++++++ psm_utils/io/ionbot.py | 13 ++++++++----- psm_utils/io/maxquant.py | 3 +++ psm_utils/io/msamanda.py | 3 +++ psm_utils/io/peptide_record.py | 3 +++ psm_utils/io/percolator.py | 3 +++ psm_utils/io/sage.py | 3 +++ psm_utils/io/tsv.py | 4 ++++ 8 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 psm_utils/io/_utils.py diff --git a/psm_utils/io/_utils.py b/psm_utils/io/_utils.py new file mode 100644 index 0000000..1befd36 --- /dev/null +++ b/psm_utils/io/_utils.py @@ -0,0 +1,27 @@ +import sys +import csv + + +def set_csv_field_size_limit(): + """ + Sets the maximum field size limit for reading CSV files. + + This function sets the maximum field size limit for reading CSV files using the `csv` module. + It attempts to set the limit to the maximum integer value (`sys.maxsize`), and if an `OverflowError` + occurs, it reduces the limit by dividing it by 10 until it can be set successfully. + + Note: + This function should be called before reading any CSV files to ensure that the field size limit + is properly set. + + + """ + maxInt = sys.maxsize + + while maxInt > 1: + print(maxInt) + try: + csv.field_size_limit(maxInt) + break + except OverflowError: + maxInt = int(maxInt / 10) diff --git a/psm_utils/io/ionbot.py b/psm_utils/io/ionbot.py index 95ad274..43ad511 100644 --- a/psm_utils/io/ionbot.py +++ b/psm_utils/io/ionbot.py @@ -16,6 +16,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() REQUIRED_COLUMNS = [ "database_peptide", @@ -89,11 +92,11 @@ def _get_peptide_spectrum_match(self, psm_dict: Dict[str, str | float]) -> PSM: ), spectrum_id=psm_dict["spectrum_title"], run=psm_dict["spectrum_file"], - is_decoy=True - if psm_dict["database"] == "D" - else False - if psm_dict["database"] == "T" - else None, + is_decoy=( + True + if psm_dict["database"] == "D" + else False if psm_dict["database"] == "T" else None + ), score=float(psm_dict["psm_score"]), precursor_mz=float(psm_dict["m/z"]), retention_time=float(psm_dict["observed_retention_time"]), diff --git a/psm_utils/io/maxquant.py b/psm_utils/io/maxquant.py index 99b13a1..f51aaf6 100644 --- a/psm_utils/io/maxquant.py +++ b/psm_utils/io/maxquant.py @@ -14,6 +14,9 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__) diff --git a/psm_utils/io/msamanda.py b/psm_utils/io/msamanda.py index bbbeab0..67d9ccb 100644 --- a/psm_utils/io/msamanda.py +++ b/psm_utils/io/msamanda.py @@ -13,6 +13,9 @@ from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase from psm_utils.psm import PSM, Peptidoform +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__) diff --git a/psm_utils/io/peptide_record.py b/psm_utils/io/peptide_record.py index 2c1e6da..8afb97f 100644 --- a/psm_utils/io/peptide_record.py +++ b/psm_utils/io/peptide_record.py @@ -66,6 +66,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class _PeptideRecord: diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py index ae7521a..586d9ee 100644 --- a/psm_utils/io/percolator.py +++ b/psm_utils/io/percolator.py @@ -25,6 +25,9 @@ from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class PercolatorTabReader(ReaderBase): diff --git a/psm_utils/io/sage.py b/psm_utils/io/sage.py index 25abe82..c7a849e 100644 --- a/psm_utils/io/sage.py +++ b/psm_utils/io/sage.py @@ -18,6 +18,9 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() class SageReader(ReaderBase): diff --git a/psm_utils/io/tsv.py b/psm_utils/io/tsv.py index 9fc0089..213358c 100644 --- a/psm_utils/io/tsv.py +++ b/psm_utils/io/tsv.py @@ -45,6 +45,7 @@ """ + from __future__ import annotations import ast @@ -59,6 +60,9 @@ from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.psm import PSM from psm_utils.psm_list import PSMList +from psm_utils.io._utils import set_csv_field_size_limit + +set_csv_field_size_limit() logger = logging.getLogger(__name__) From 8c5eef80a4c954acfc368f760cd22486b8b6c8e0 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 26 Mar 2024 14:04:14 +0100 Subject: [PATCH 3/3] removed unused import --- psm_utils/io/sage.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/psm_utils/io/sage.py b/psm_utils/io/sage.py index c7a849e..e0cc0d1 100644 --- a/psm_utils/io/sage.py +++ b/psm_utils/io/sage.py @@ -6,7 +6,6 @@ """ - from __future__ import annotations import csv @@ -17,7 +16,6 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList from psm_utils.io._utils import set_csv_field_size_limit set_csv_field_size_limit() @@ -91,11 +89,9 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: ), spectrum_id=psm_dict["scannr"], run=Path(psm_dict["filename"]).stem, - is_decoy=True - if psm_dict["label"] == "-1" - else False - if psm_dict["label"] == "1" - else None, + is_decoy=( + True if psm_dict["label"] == "-1" else False if psm_dict["label"] == "1" else None + ), qvalue=psm_dict["spectrum_q"], score=float(psm_dict[self.score_column]), precursor_mz=self._parse_precursor_mz(psm_dict["expmass"], psm_dict["charge"]),