Skip to content

Commit

Permalink
Merge pull request #841 from uclahs-cds/czhu-fix-parse-vep
Browse files Browse the repository at this point in the history
Fix parseVEP to handle insertions in star-inclusion format
  • Loading branch information
zhuchcn authored Feb 7, 2024
2 parents 504d949 + e9a3527 commit 2dc0ae6
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 9 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Updated `splitFasta` and `summarizeFasta` to accept source combinations in `--order-source`.

- Fixed parseCIRCexplorer so the exon/intron indices in variant IDs are sorted correctly.
- Fixed `parseCIRCexplorer` so the exon/intron indices in variant IDs are sorted correctly.

- Fixed `parseVEP` to handle insertions in start-inclusion. #840

## [1.2.1] - 2023-10-05

Expand Down
15 changes: 7 additions & 8 deletions moPepGen/parser/VEPParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def __init__(
def __repr__(self)->str:
"""Return representation of the VEP record."""
consequences = '|'.join(self.consequences)
return f"< {self.feature}, {consequences}, {self.location} >"
return f"< {self.feature}, {consequences}, {self.location}, {self.allele} >"

def convert_to_variant_record(self, anno:gtf.GenomicAnnotation,
genome:dna.DNASeqDict) -> seqvar.VariantRecord:
Expand Down Expand Up @@ -174,18 +174,17 @@ def convert_to_variant_record(self, anno:gtf.GenomicAnnotation,
# Sometimes insertions are reported by VEP in the end-inclusion
# way (e.g., C -> TACC), which needs to be converted into
# start-inclusion (A -> ATAC) for variants on + strand genes.
if strand == 1:
if seq.seq[alt_start] != allele[-1]:
raise ValueError(f"Don't know how to process this variant: {self}")
ref = str(seq.seq[alt_start])
if ref == allele[-1]:
alt_start -= 1
alt_end = alt_start + 1
ref = str(seq.seq[alt_start])
alt = ref + allele[:-1]
else:
if seq.seq[alt_start] != allele[0]:
raise ValueError(f"Don't know how to process this variant: {self}")
ref = str(seq.seq[alt_start])
elif ref == allele[0]:
ref = str(ref)
alt = allele
else:
raise ValueError(f"Don't know how to process this variant: {self}")
else: # SNV
ref = str(seq.seq[alt_start])
alt = allele
Expand Down
28 changes: 28 additions & 0 deletions test/unit/test_vep_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,34 @@ def test_vep_to_variant_record_case16_insertion(self):
self.assertEqual(record.ref, 'G')
self.assertEqual(record.alt, 'GTCA')

def test_vep_to_variant_record_case17_insertion_start_inclusion(self):
""" Insertion when the location is a single spot.
In this test case, the variant is represented by VEP in a start-inclusion
format as C -> CTCA
"""
genome = create_dna_record_dict(GENOME_DATA)
anno = create_genomic_annotation(ANNOTATION_DATA)

vep_record = VEPParser.VEPRecord(
uploaded_variation='rs55971985',
location='chr1:18',
allele='CTCA',
gene='ENSG0001',
feature='ENST0001.1',
feature_type='Transcript',
consequences=['missense_variant'],
cdna_position='11',
cds_position='11',
protein_position=3,
amino_acids=('S', 'T'),
codons=('aTa', 'aCa'),
existing_variation='-',
extra={}
)
record = vep_record.convert_to_variant_record(anno, genome)
self.assertEqual(record.ref, 'C')
self.assertEqual(record.alt, 'CTCA')

def test_vep_to_variant_mnv(self):
""" error is raised for MNV """
genome = create_dna_record_dict(GENOME_DATA)
Expand Down

0 comments on commit 2dc0ae6

Please sign in to comment.