From 202a808ffd8cdcb1fdab4a5b1c9c701486b47f66 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Fri, 10 May 2024 13:40:36 +0100 Subject: [PATCH 1/3] add build_version meta key, rename component to component_db and replace BRC4 by VEuPathDB (or vpdb) --- .../dumper/dump_files/dumped_genome.json | 6 ++--- .../input_dir_brc_on/gen_prep_in_meta.json | 5 ++-- .../subworkflows/genome_prepare/meta.yml | 4 +-- .../nextflow/workflows/genome_prepare/main.nf | 4 +-- scripts/brc4/check_descriptions.pl | 15 +++-------- scripts/brc4/get_metadata_from_redmine.py | 25 +++++++++--------- .../io/genomio/data/schemas/genome.json | 10 ++++--- .../ensembl/io/genomio/database/factory.py | 26 ++++++++++++------- src/python/ensembl/io/genomio/events/dump.py | 12 ++++----- .../io/genomio/genome_metadata/dump.py | 2 +- .../ensembl/io/genomio/gff3/id_allocator.py | 2 +- src/python/tests/database/test_factory.py | 15 +++++++---- .../genome_metadata/test_extend/genome.json | 5 ++-- .../test_extend/updated_genome.json | 5 ++-- src/python/tests/gff3/test_id_allocator.py | 2 +- src/python/tests/gff3/test_simplifier.py | 10 +++---- ...c4name.gff => genes_badnames_vpdbname.gff} | 0 ...enome_no_brc4.json => genome_no_vpdb.json} | 0 .../{genome_brc4.json => genome_vpdb.json} | 5 ++-- 19 files changed, 81 insertions(+), 72 deletions(-) rename src/python/tests/gff3/test_simplifier/{genes_badnames_brc4name.gff => genes_badnames_vpdbname.gff} (100%) rename src/python/tests/gff3/test_simplifier/{genome_no_brc4.json => genome_no_vpdb.json} (100%) rename src/python/tests/gff3/test_simplifier/{genome_brc4.json => genome_vpdb.json} (83%) diff --git a/data/test/pipelines/dumper/dump_files/dumped_genome.json b/data/test/pipelines/dumper/dump_files/dumped_genome.json index 8a62e447f..660d36834 100644 --- a/data/test/pipelines/dumper/dump_files/dumped_genome.json +++ b/data/test/pipelines/dumper/dump_files/dumped_genome.json @@ -7,9 +7,9 @@ "taxonomy_id": 7159, "production_name": "aedes_aegypti" }, - "BRC4": { + "veupathdb": { "organism_abbrev": "aaegL5", - "component": "VectorBase" - + "component_db": "VectorBase", + "build_version": 65 } } diff --git a/data/test/pipelines/genome_prepare/input_dir_brc_on/gen_prep_in_meta.json b/data/test/pipelines/genome_prepare/input_dir_brc_on/gen_prep_in_meta.json index 898395824..0b96d3123 100644 --- a/data/test/pipelines/genome_prepare/input_dir_brc_on/gen_prep_in_meta.json +++ b/data/test/pipelines/genome_prepare/input_dir_brc_on/gen_prep_in_meta.json @@ -1,6 +1,7 @@ { - "BRC4": { - "component": "OrganismDB", + "veupathdb": { + "build_version": 65, + "component_db": "OrganismDB", "organism_abbrev": "organAbrev123" }, "species": {}, diff --git a/pipelines/nextflow/subworkflows/genome_prepare/meta.yml b/pipelines/nextflow/subworkflows/genome_prepare/meta.yml index 275f1c352..839a0bba7 100644 --- a/pipelines/nextflow/subworkflows/genome_prepare/meta.yml +++ b/pipelines/nextflow/subworkflows/genome_prepare/meta.yml @@ -16,7 +16,7 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "genome_prepare" -description: BRC/Ensembl metazoa pipeline. Retrieve data for genome(s), obtained from INSDC and RefSeq, validate and prepare GFF3, FASTA, JSON files for each genome accession. +description: VEuPathDB/Ensembl Metazoa pipeline. Retrieve data for genome(s), obtained from INSDC and RefSeq, validate and prepare GFF3, FASTA, JSON files for each genome accession. keywords: - fasta - gff3 @@ -45,7 +45,7 @@ input: type: directory description: | MANDATORY param. User supplied input directory name containing genome json(s) storing meta information including: - genome INSDC accession, BRC 'organism_abbrev', BRC 'component'. + genome INSDC accession, VEuPathDB 'organism_abbrev', VEuPathDB 'component_db', VEuPathDB 'build_version'. pattern: "input_dir/*.json" output: - fasta_dna: diff --git a/pipelines/nextflow/workflows/genome_prepare/main.nf b/pipelines/nextflow/workflows/genome_prepare/main.nf index 9172d7b54..a7dc81977 100644 --- a/pipelines/nextflow/workflows/genome_prepare/main.nf +++ b/pipelines/nextflow/workflows/genome_prepare/main.nf @@ -45,8 +45,8 @@ def meta_from_genome_json(json_path) { prod_name = data.assembly.accession publish_dir = data.assembly.accession if ( params.brc_mode ) { - prod_name = data.BRC4.organism_abbrev - publish_dir = "${data.BRC4.component}/${data.BRC4.organism_abbrev}" + prod_name = data.veupathdb.organism_abbrev + publish_dir = "${data.veupathdb.component_db}/${data.veupathdb.organism_abbrev}" } else if ( data.species && data.species.production_name ) { prod_name = data.species.production_name publish_dir = prod_name diff --git a/scripts/brc4/check_descriptions.pl b/scripts/brc4/check_descriptions.pl index 1e6e064b4..29cf9fe99 100755 --- a/scripts/brc4/check_descriptions.pl +++ b/scripts/brc4/check_descriptions.pl @@ -72,15 +72,15 @@ sub main { my @all_species = ($opt{species}) || @{$registry->get_all_species()}; for my $species (sort @all_species) { my $ma = $registry->get_adaptor($species, "core", "MetaContainer"); - my $component = get_meta_value($ma, 'BRC4.component'); + my $component = get_meta_value($ma, 'veupathdb.component_db'); if ($opt{component} and $opt{component} ne $component) { $ma->dbc->disconnect_if_idle(); next; } my $count = check_genes($registry, $species); - my $build = get_build($ma, $species); - my $org = get_meta_value($ma, 'BRC4.organism_abbrev'); + my $build = get_meta_value($ma, 'veupathdb.build_version'); + my $org = get_meta_value($ma, 'veupathdb.organism_abbrev'); $ma->dbc->disconnect_if_idle(); @@ -110,15 +110,6 @@ sub main { } } -sub get_build { - my ($ma, $key) = @_; - - my $dbname = $ma->dbc->dbname; - if ($dbname =~ /_(\d+)_\d+_\d+$/) { - return $1; - } -} - sub get_meta_value { my ($ma, $key) = @_; diff --git a/scripts/brc4/get_metadata_from_redmine.py b/scripts/brc4/get_metadata_from_redmine.py index 14a017055..d28dd2373 100644 --- a/scripts/brc4/get_metadata_from_redmine.py +++ b/scripts/brc4/get_metadata_from_redmine.py @@ -18,8 +18,7 @@ from redminelib import Redmine import argparse import os, json, re, time -import requests -import xml.etree.ElementTree as ET + url = "https://redmine.apidb.org" default_fields = dict( @@ -63,7 +62,7 @@ def retrieve_genomes(redmine, output_dir, build=None): failed_issues.append({"issue": issue, "desc": failure}) continue - abbrev = genome["BRC4"]["organism_abbrev"] + abbrev = genome["veupathdb"]["organism_abbrev"] group = "other" if "Reference change" in extra["operations"]: group = "reference_change" @@ -110,7 +109,7 @@ def retrieve_genomes(redmine, output_dir, build=None): pass for genome in genomes: - organism = genome["BRC4"]["organism_abbrev"] + organism = genome["veupathdb"]["organism_abbrev"] organism_file = os.path.join(group_dir, organism + ".json") with open(organism_file, "w") as f: json.dump(genome, f, indent=True) @@ -147,8 +146,9 @@ def parse_genome(issue): customs = get_custom_fields(issue) genome = { - "BRC4": { - "component": "", + "veupathdb": { + "build_version": 0, + "component_db": "", "organism_abbrev": "", }, "species": {}, @@ -164,11 +164,14 @@ def parse_genome(issue): accession = check_accession(accession) genome["assembly"]["accession"] = accession - # Get BRC4 component + # Get VEuPathDB build version + genome["veupathdb"]["build_version"] = int(re.search(r"Build (\d+)", str(issue.fixed_version)).group(1)) + + # Get VEuPathDB component db if "Component DB" in customs: components = customs["Component DB"]["value"] if len(components) == 1: - genome["BRC4"]["component"] = components[0] + genome["veupathdb"]["component_db"] = components[0] elif len(components) > 1: raise Exception("More than 1 component for genome " + str(issue.id)) @@ -181,7 +184,7 @@ def parse_genome(issue): if not check_organism_abbrev(abbrev): print(f"Invalid organism_abbrev in {issue.id}: {abbrev}") else: - genome["BRC4"]["organism_abbrev"] = abbrev + genome["veupathdb"]["organism_abbrev"] = abbrev except KeyError: print(f"Can't get organism abbrev for {issue.id} because: missing organism_abbrev") return @@ -191,7 +194,6 @@ def parse_genome(issue): gff_path = customs["GFF 2 Load"]["value"] if gff_path: extra["GFF"] = True - # print("GFF2LOAD: separate gff file for %s: %s (issue %d)" % (genome["BRC4"]["organism_abbrev"], gff_path, issue.id)) except: pass @@ -199,7 +201,6 @@ def parse_genome(issue): try: if customs["Replacement genome?"]["value"].startswith("Yes"): extra["Replacement"] = True - # print("REPLACEMENT: the organism %s is a replacement (issue %d)" % (genome["BRC4"]["organism_abbrev"], issue.id)) except: pass @@ -217,7 +218,7 @@ def check_genome(genome, extra): if not genome: return "No genome parsed" - if not "organism_abbrev" in genome["BRC4"] or not genome["BRC4"]["organism_abbrev"]: + if not "organism_abbrev" in genome["veupathdb"] or not genome["veupathdb"]["organism_abbrev"]: return "No organism_abbrev defined" operations = extra["operations"] diff --git a/src/python/ensembl/io/genomio/data/schemas/genome.json b/src/python/ensembl/io/genomio/data/schemas/genome.json index 54985bb7a..2bfa6b237 100644 --- a/src/python/ensembl/io/genomio/data/schemas/genome.json +++ b/src/python/ensembl/io/genomio/data/schemas/genome.json @@ -87,15 +87,17 @@ "name" ] }, - "BRC4_info": { + "vpdb_info": { "type": "object", "additionalProperties": false, "properties": { - "component" : { "type" : "string" }, + "build_version" : { "type" : "integer" }, + "component_db" : { "type" : "string" }, "organism_abbrev" : { "type" : "string" } }, "required": [ - "component", + "build_version", + "component_db", "organism_abbrev" ] }, @@ -122,7 +124,7 @@ "annotation" : { "$ref" : "#/definitions/annotation_info" }, "genebuild" : { "$ref" : "#/definitions/genebuild_info" }, "provider" : { "$ref" : "#/definitions/provider_info" }, - "BRC4" : { "$ref" : "#/definitions/BRC4_info" }, + "veupathdb" : { "$ref" : "#/definitions/vpdb_info" }, "added_seq" : { "$ref" : "#/definitions/added_sequence_info" } }, "required" : [ diff --git a/src/python/ensembl/io/genomio/database/factory.py b/src/python/ensembl/io/genomio/database/factory.py index 21ec8508d..a312a77b7 100644 --- a/src/python/ensembl/io/genomio/database/factory.py +++ b/src/python/ensembl/io/genomio/database/factory.py @@ -35,9 +35,9 @@ def format_db_data(server_url: URL, dbs: List[str], brc_mode: bool = False) -> L Args: server: Server URL where all the databases are hosted. dbs: List of database names. - brc_mode: If true, assign ``BRC4.organism_abbrev`` as the species, and ``BRC4.component`` as the - division. Otherwise, the species will be ``species.production_name`` and the division will be - ``species.division``. + brc_mode: If true, assign `veupathdb.organism_abbrev` as the species, `veupathdb.component_db` as the + division and `veupathdb.build_version` as the project release. Otherwise, the species will be + `species.production_name` and the division will be `species.division`. Returns: List of dictionaries with 3 keys: "database", "species" and "division". @@ -55,12 +55,15 @@ def format_db_data(server_url: URL, dbs: List[str], brc_mode: bool = False) -> L project_release = core_db.get_project_release() if brc_mode: - brc_organism = core_db.get_meta_value("BRC4.organism_abbrev") - brc_component = core_db.get_meta_value("BRC4.component") - if brc_organism is not None: - species = brc_organism - if brc_component is not None: - division = brc_component + vpdb_organism = core_db.get_meta_value("veupathdb.organism_abbrev") + vpdb_component = core_db.get_meta_value("veupathdb.component_db") + vpdb_build = core_db.get_meta_value("veupathdb.build_version") + if vpdb_organism is not None: + species = vpdb_organism + if vpdb_component is not None: + division = vpdb_component + if vpdb_build is not None: + project_release = vpdb_build if not division: division = "all" @@ -137,7 +140,10 @@ def main() -> None: parser.add_argument( "--brc_mode", action="store_true", - help="Enable BRC mode, i.e. use organism_abbrev for species, component for division", + help=( + "Enable BRC mode, i.e. use organism_abbrev for species, component_db for division, " + "build_version for project release" + ), ) parser.add_log_arguments() args = parser.parse_args() diff --git a/src/python/ensembl/io/genomio/events/dump.py b/src/python/ensembl/io/genomio/events/dump.py index c502335c1..aec530c4e 100644 --- a/src/python/ensembl/io/genomio/events/dump.py +++ b/src/python/ensembl/io/genomio/events/dump.py @@ -17,7 +17,7 @@ __all__ = [ "IdsSet", "DictToIdsSet", - "BRC4_START_DATE", + "VPDB_START_DATE", "Pair", "UnsupportedEvent", "Event", @@ -38,7 +38,7 @@ from ensembl.utils.logging import init_logging_with_args -BRC4_START_DATE = datetime(2020, 5, 1) +VPDB_START_DATE = datetime(2020, 5, 1) IdsSet = Set[str] DictToIdsSet = Dict[str, IdsSet] @@ -89,7 +89,7 @@ class Event: name: Name of the event (will be updated automatically). pairs: All pair of ids for this event. - Any gene set before 2019-09 is dubbed pre-BRC4. + Any gene set before 2019-09 is dubbed pre-VPDB. """ @@ -234,14 +234,14 @@ def add_pair(self, pair: Pair) -> None: self.pairs.append(pair) def get_full_release(self) -> str: - """Returns the expanded release name, pre-BRC4 or `BRC4 = build`.""" + """Returns the expanded release name, pre-VPDB or `VPDB = build`.""" release = self.release date = self.date - if date and date > BRC4_START_DATE: + if date and date > VPDB_START_DATE: release = f"build {release}" else: - release = f"pre-BRC4 {release}" + release = f"pre-VPDB {release}" return release diff --git a/src/python/ensembl/io/genomio/genome_metadata/dump.py b/src/python/ensembl/io/genomio/genome_metadata/dump.py index 7cb7702b5..73e0dfcc3 100644 --- a/src/python/ensembl/io/genomio/genome_metadata/dump.py +++ b/src/python/ensembl/io/genomio/genome_metadata/dump.py @@ -45,7 +45,7 @@ "provider_url": str, "version": int, }, - "BRC4": {"organism_abbrev": str, "component": str}, + "veupathdb": {"organism_abbrev": str, "component_db": str, "build_version": int}, "genebuild": {"id": str, "method": str, "method_display": str, "start_date": str, "version": str}, "species": { "alias": str, diff --git a/src/python/ensembl/io/genomio/gff3/id_allocator.py b/src/python/ensembl/io/genomio/gff3/id_allocator.py index a25ca7f12..0d06678c7 100644 --- a/src/python/ensembl/io/genomio/gff3/id_allocator.py +++ b/src/python/ensembl/io/genomio/gff3/id_allocator.py @@ -43,7 +43,7 @@ class StableIDAllocator: def set_prefix(self, genome: Dict) -> None: """Sets the ID prefix using the organism abbrev if it exists in the genome metadata.""" try: - org = genome["BRC4"]["organism_abbrev"] + org = genome["veupathdb"]["organism_abbrev"] except KeyError: prefix = "TMP_PREFIX_" else: diff --git a/src/python/tests/database/test_factory.py b/src/python/tests/database/test_factory.py index 7ba8c8e7f..1312b7ee8 100644 --- a/src/python/tests/database/test_factory.py +++ b/src/python/tests/database/test_factory.py @@ -35,8 +35,9 @@ "species.production_name": "dog", "species.division": "metazoa", "assembly.accession": "GCA_000111222.3", - "BRC4.organism_abbrev": "brc_dog", - "BRC4.component": "brc_db", + "veupathdb.organism_abbrev": "brc_dog", + "veupathdb.component_db": "brc_db", + "veupathdb.build_version": 12, } @@ -124,7 +125,7 @@ def test_format_db_data( server_url: Server URL where all the databases are hosted. dbs: List of database names. brc_mode: BRC mode? - skip_keys: Return `None` instead of the assigned value for "BRC4.*" meta keys. + skip_keys: Return `None` instead of the assigned value for "veupathdb.*" meta keys. output: Expected list of dictionaries with metadata per database. """ @@ -132,7 +133,7 @@ def _get_meta_value(meta_key: str) -> Optional[str]: """Return empty string if "species.division" is requested in BRC mode, "Metazoa" otherwise.""" if (meta_key == "species.division") and brc_mode: return "" - if meta_key.startswith("BRC4.") and skip_keys: + if meta_key.startswith("veupathdb.") and skip_keys: return None return _META[meta_key] @@ -146,7 +147,11 @@ def _get_meta_value(meta_key: str) -> Optional[str]: if dbs: calls = [call("species.production_name"), call("species.division"), call("assembly.accession")] if brc_mode: - calls += [call("BRC4.organism_abbrev"), call("BRC4.component")] + calls += [ + call("veupathdb.organism_abbrev"), + call("veupathdb.component_db"), + call("veupathdb.build_version"), + ] dbconnection.get_meta_value.assert_has_calls(calls) dbconnection.get_project_release.assert_called() diff --git a/src/python/tests/genome_metadata/test_extend/genome.json b/src/python/tests/genome_metadata/test_extend/genome.json index 77d2f895d..bfbd3a570 100644 --- a/src/python/tests/genome_metadata/test_extend/genome.json +++ b/src/python/tests/genome_metadata/test_extend/genome.json @@ -1,6 +1,7 @@ { - "BRC4": { - "component": "PlasmoDB", + "veupathdb": { + "build_version": 65, + "component_db": "PlasmoDB", "organism_abbrev": "pfal3D7" }, "assembly": { diff --git a/src/python/tests/genome_metadata/test_extend/updated_genome.json b/src/python/tests/genome_metadata/test_extend/updated_genome.json index abd08f0a4..8e39d4594 100644 --- a/src/python/tests/genome_metadata/test_extend/updated_genome.json +++ b/src/python/tests/genome_metadata/test_extend/updated_genome.json @@ -1,6 +1,7 @@ { - "BRC4": { - "component": "PlasmoDB", + "veupathdb": { + "build_version": 65, + "component_db": "PlasmoDB", "organism_abbrev": "pfal3D7" }, "added_seq": { diff --git a/src/python/tests/gff3/test_id_allocator.py b/src/python/tests/gff3/test_id_allocator.py index 3fe2c82c9..efc48f66b 100644 --- a/src/python/tests/gff3/test_id_allocator.py +++ b/src/python/tests/gff3/test_id_allocator.py @@ -59,7 +59,7 @@ def _show_diff(result_path: Path, expected_path: Path) -> str: "genome, expected_prefix", [ pytest.param({}, "TMP_PREFIX_", id="Default prefix"), - pytest.param({"BRC4": {"organism_abbrev": "LOREM"}}, "TMP_LOREM_", id="Prefix from genome meta"), + pytest.param({"veupathdb": {"organism_abbrev": "LOREM"}}, "TMP_LOREM_", id="Prefix from genome meta"), ], ) def test_set_prefix(genome: Dict, expected_prefix: str) -> None: diff --git a/src/python/tests/gff3/test_simplifier.py b/src/python/tests/gff3/test_simplifier.py index 41e875cca..3a704ca15 100644 --- a/src/python/tests/gff3/test_simplifier.py +++ b/src/python/tests/gff3/test_simplifier.py @@ -415,16 +415,16 @@ def test_simpler_gff3_skip( id="Genes with bad names, no genome", ), param( - "genome_no_brc4.json", + "genome_no_vpdb.json", "genes_badnames.gff", "genes_badnames_noname.gff", - id="Genes with bad names, genome not BRC4", + id="Genes with bad names, genome not VEuPathDB", ), param( - "genome_brc4.json", + "genome_vpdb.json", "genes_badnames.gff", - "genes_badnames_brc4name.gff", - id="Genes with bad names, genome BRC4", + "genes_badnames_vpdbname.gff", + id="Genes with bad names, genome VEuPathDB", ), ], ) diff --git a/src/python/tests/gff3/test_simplifier/genes_badnames_brc4name.gff b/src/python/tests/gff3/test_simplifier/genes_badnames_vpdbname.gff similarity index 100% rename from src/python/tests/gff3/test_simplifier/genes_badnames_brc4name.gff rename to src/python/tests/gff3/test_simplifier/genes_badnames_vpdbname.gff diff --git a/src/python/tests/gff3/test_simplifier/genome_no_brc4.json b/src/python/tests/gff3/test_simplifier/genome_no_vpdb.json similarity index 100% rename from src/python/tests/gff3/test_simplifier/genome_no_brc4.json rename to src/python/tests/gff3/test_simplifier/genome_no_vpdb.json diff --git a/src/python/tests/gff3/test_simplifier/genome_brc4.json b/src/python/tests/gff3/test_simplifier/genome_vpdb.json similarity index 83% rename from src/python/tests/gff3/test_simplifier/genome_brc4.json rename to src/python/tests/gff3/test_simplifier/genome_vpdb.json index 77d2f895d..bfbd3a570 100644 --- a/src/python/tests/gff3/test_simplifier/genome_brc4.json +++ b/src/python/tests/gff3/test_simplifier/genome_vpdb.json @@ -1,6 +1,7 @@ { - "BRC4": { - "component": "PlasmoDB", + "veupathdb": { + "build_version": 65, + "component_db": "PlasmoDB", "organism_abbrev": "pfal3D7" }, "assembly": { From eade2698c182adbdb6aabd9491f9e14476da1c92 Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Fri, 10 May 2024 13:59:48 +0100 Subject: [PATCH 2/3] make pytest and mypy happy --- src/python/tests/database/test_factory.py | 4 ++-- .../tests/genome_metadata/test_extend/genome.json | 10 +++++----- .../genome_metadata/test_extend/updated_genome.json | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/python/tests/database/test_factory.py b/src/python/tests/database/test_factory.py index 1312b7ee8..99ba2571c 100644 --- a/src/python/tests/database/test_factory.py +++ b/src/python/tests/database/test_factory.py @@ -20,7 +20,7 @@ """ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from unittest.mock import call, Mock, patch from deepdiff import DeepDiff @@ -129,7 +129,7 @@ def test_format_db_data( output: Expected list of dictionaries with metadata per database. """ - def _get_meta_value(meta_key: str) -> Optional[str]: + def _get_meta_value(meta_key: str) -> Optional[Union[int, str]]: """Return empty string if "species.division" is requested in BRC mode, "Metazoa" otherwise.""" if (meta_key == "species.division") and brc_mode: return "" diff --git a/src/python/tests/genome_metadata/test_extend/genome.json b/src/python/tests/genome_metadata/test_extend/genome.json index bfbd3a570..b6982c533 100644 --- a/src/python/tests/genome_metadata/test_extend/genome.json +++ b/src/python/tests/genome_metadata/test_extend/genome.json @@ -1,9 +1,4 @@ { - "veupathdb": { - "build_version": 65, - "component_db": "PlasmoDB", - "organism_abbrev": "pfal3D7" - }, "assembly": { "accession": "GCA_000002765.1", "provider_name": "RefSeq", @@ -17,5 +12,10 @@ "species": { "scientific_name": "Plasmodium falciparum", "taxonomy_id": 36329 + }, + "veupathdb": { + "build_version": 65, + "component_db": "PlasmoDB", + "organism_abbrev": "pfal3D7" } } \ No newline at end of file diff --git a/src/python/tests/genome_metadata/test_extend/updated_genome.json b/src/python/tests/genome_metadata/test_extend/updated_genome.json index 8e39d4594..2d72f5ee3 100644 --- a/src/python/tests/genome_metadata/test_extend/updated_genome.json +++ b/src/python/tests/genome_metadata/test_extend/updated_genome.json @@ -1,9 +1,4 @@ { - "veupathdb": { - "build_version": 65, - "component_db": "PlasmoDB", - "organism_abbrev": "pfal3D7" - }, "added_seq": { "region_name": [ "CP089275", @@ -24,5 +19,10 @@ "species": { "scientific_name": "Plasmodium falciparum", "taxonomy_id": 36329 + }, + "veupathdb": { + "build_version": 65, + "component_db": "PlasmoDB", + "organism_abbrev": "pfal3D7" } } \ No newline at end of file From 4efe3f5abb69ba12bda71d4435ced5d7301ad71e Mon Sep 17 00:00:00 2001 From: Jorge Alvarez Jarreta Date: Mon, 13 May 2024 10:33:28 +0100 Subject: [PATCH 3/3] mypy considers the output as a generic object --- src/python/tests/database/test_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/tests/database/test_factory.py b/src/python/tests/database/test_factory.py index 99ba2571c..fac89872f 100644 --- a/src/python/tests/database/test_factory.py +++ b/src/python/tests/database/test_factory.py @@ -20,7 +20,7 @@ """ from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional from unittest.mock import call, Mock, patch from deepdiff import DeepDiff @@ -129,7 +129,7 @@ def test_format_db_data( output: Expected list of dictionaries with metadata per database. """ - def _get_meta_value(meta_key: str) -> Optional[Union[int, str]]: + def _get_meta_value(meta_key: str) -> Optional[Any]: """Return empty string if "species.division" is requested in BRC mode, "Metazoa" otherwise.""" if (meta_key == "species.division") and brc_mode: return ""