From 71b7ff57acb5108d006360d7a73a19cc93e31cbe Mon Sep 17 00:00:00 2001 From: Matthieu Barba Date: Thu, 29 Feb 2024 11:18:58 +0000 Subject: [PATCH] Move store_gene_annotations and test --- .../io/genomio/gff3/extract_annotation.py | 15 ++++++++++ .../ensembl/io/genomio/gff3/simplifier.py | 17 +---------- .../tests/gff3/test_extract_annotation.py | 29 +++++++++++++++++++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/python/ensembl/io/genomio/gff3/extract_annotation.py b/src/python/ensembl/io/genomio/gff3/extract_annotation.py index 736eec9ad..2c0368746 100644 --- a/src/python/ensembl/io/genomio/gff3/extract_annotation.py +++ b/src/python/ensembl/io/genomio/gff3/extract_annotation.py @@ -252,3 +252,18 @@ def to_json(self, out_path: PathLike) -> None: self.transfer_descriptions() feats_list = self._to_list() print_json(Path(out_path), feats_list) + + def store_gene(self, gene: SeqFeature) -> None: + """Record the functional_annotations of a gene and its children features.""" + self.add_feature(gene, "gene") + + cds_found = False + for transcript in gene.sub_features: + self.add_feature(transcript, "transcript", gene.id) + for feat in transcript.sub_features: + if feat.type != "CDS": + continue + # Store CDS functional annotation only once + if not cds_found: + cds_found = True + self.add_feature(feat, "translation", transcript.id) diff --git a/src/python/ensembl/io/genomio/gff3/simplifier.py b/src/python/ensembl/io/genomio/gff3/simplifier.py index 587ce4d5f..edc411a81 100644 --- a/src/python/ensembl/io/genomio/gff3/simplifier.py +++ b/src/python/ensembl/io/genomio/gff3/simplifier.py @@ -162,24 +162,9 @@ def simpler_gff3_feature(self, feat: SeqFeature) -> Optional[SeqFeature]: # Normalize, store annotation, and return the cleaned up gene gene = self.normalize_gene(gene) - self.store_gene_annotations(gene) + self.annotations.store_gene(gene) return self.clean_gene(gene) - def store_gene_annotations(self, gene: SeqFeature) -> None: - """Record the functional_annotations of the gene and its children features.""" - self.annotations.add_feature(gene, "gene") - - cds_found = False - for transcript in gene.sub_features: - self.annotations.add_feature(transcript, "transcript", gene.id) - for feat in transcript.sub_features: - if feat.type != "CDS": - continue - # Store CDS functional annotation only once - if not cds_found: - cds_found = True - self.annotations.add_feature(feat, "translation", transcript.id) - def clean_gene(self, gene: SeqFeature) -> SeqFeature: """Return the same gene without qualifiers unrelated to the gene structure.""" diff --git a/src/python/tests/gff3/test_extract_annotation.py b/src/python/tests/gff3/test_extract_annotation.py index a96721706..9f23e0348 100644 --- a/src/python/tests/gff3/test_extract_annotation.py +++ b/src/python/tests/gff3/test_extract_annotation.py @@ -288,3 +288,32 @@ def test_transfer_descriptions( transcs = annot.get_features("transcript") assert genes[gene_name].get("description") == out_gene_desc assert transcs[transcript_name].get("description") == out_transc_desc + + +@pytest.mark.dependency(depends=["add_feature"]) +@pytest.mark.parametrize( + "with_cds, num_genes, num_tr, num_cds", + [ + pytest.param(False, 1, 1, 0, id="Store gene without CDS"), + pytest.param(True, 1, 1, 1, id="Store gene with CDS"), + ], +) +def test_store_gene(with_cds: bool, num_genes: int, num_tr: int, num_cds: int) -> None: + """Test store_gene given a gene Feature with transcripts and optional translations. + .""" + annot = FunctionalAnnotations() + gene_name = "gene_A" + transcript_name = "tran_A" + one_gene = SeqFeature(type="gene", id=gene_name) + one_gene.sub_features = [] + one_transcript = SeqFeature(type="mRNA", id=transcript_name) + one_transcript.sub_features = [] + if with_cds: + one_translation = SeqFeature(type="CDS", id="cds_A") + one_transcript.sub_features.append(one_translation) + one_gene.sub_features.append(one_transcript) + + annot.store_gene(one_gene) + assert len(annot.features["gene"]) == num_genes + assert len(annot.features["transcript"]) == num_tr + assert len(annot.features["translation"]) == num_cds