From b550f0861f1dc74ad6cd27f2941293e901852113 Mon Sep 17 00:00:00 2001 From: Chinmaya Narayana <58813915+chinmayaNK22@users.noreply.github.com> Date: Sat, 18 May 2024 20:25:46 +0530 Subject: [PATCH] Update PTM_summarizer.py --- PTM_summarizer.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/PTM_summarizer.py b/PTM_summarizer.py index e59e0f7..5d7a97d 100644 --- a/PTM_summarizer.py +++ b/PTM_summarizer.py @@ -99,22 +99,34 @@ def parse_psm_file(infile): def parse_acc(header): splitters = [' ','|'] - acc_sep = [] + acc_sep = {} for split in splitters: try: - acc_sep.append(header.index(split)) + sep_pos = header.index(split) + acc_sep[sep_pos] = split except: pass - return sorted(acc_sep)[0] + sep_poss = sorted(acc_sep) + if header[0:sep_poss[0]] == 'gi': + accession = header.split(acc_sep[sep_poss[0]])[1] + return accession + + elif header[0:sep_poss[0]] == 'sp': + aaccession = header.split(acc_sep[sep_poss[0]])[1] + return accession + + else: + accession = header[0:splitter] + return accession + def map_to_protein(indict, infasta): output = {} for rows in readfasta(infasta).read(): header = rows[0] seq = rows[1] - splitter = parse_acc(header) - acc = header[0:splitter] + acc = parse_acc(header) for keys, values in indict.items(): mod_peps = keys.split('@') if ';' in mod_peps[1]: