Skip to content

Commit

Permalink
Fix loading_bar in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed May 5, 2024
1 parent 7d10c18 commit 01f0548
Show file tree
Hide file tree
Showing 46 changed files with 3,064 additions and 1,064 deletions.
9 changes: 6 additions & 3 deletions tests/godagtimed_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,23 @@

import os
import timeit

from goatools.test_data.godag_timed import GoDagTimed
from goatools.test_data.godag_timed import prt_hms
from goatools.base import download_go_basic_obo

REPO = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")


def test_deprecatedloc_godagtimed():
"""Test deprecated location of GoDagTimed"""
tic = timeit.default_timer()
prt_hms(tic, 'prt_hms TESTED')
prt_hms(tic, "prt_hms TESTED")

fin_go_obo = os.path.join(REPO, "go-basic.obo")
download_go_basic_obo(fin_go_obo, loading_bar=None)
download_go_basic_obo(fin_go_obo)
GoDagTimed(fin_go_obo)

if __name__ == '__main__':

if __name__ == "__main__":
test_deprecatedloc_godagtimed()
60 changes: 31 additions & 29 deletions tests/i148_semsim_lin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,59 @@

import os
import sys

from itertools import combinations_with_replacement as combo_w_rplc
from goatools.base import get_godag
from goatools.associations import dnld_annofile

from goatools.anno.gpad_reader import GpadReader
#### from goatools.semantic import semantic_similarity
from goatools.semantic import TermCounts
#### from goatools.semantic import get_info_content
#### from goatools.semantic import deepest_common_ancestor
from goatools.semantic import resnik_sim
from goatools.associations import dnld_annofile
from goatools.base import get_godag
from goatools.semantic import lin_sim
#### from goatools.godag.consts import NS2GO
from goatools.semantic import TermCounts

REPO = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")

def test_i148_semsim_lin(prt=sys.stdout):

def test_i148_semsim_lin():
"""Test for issue 148, Lin Similarity if a term has no annotations"""
fin_gpad = os.path.join(REPO, 'goa_human.gpad')
dnld_annofile(fin_gpad, 'gpad')
fin_gpad = os.path.join(REPO, "goa_human.gpad")
dnld_annofile(fin_gpad, "gpad")

godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
godag = get_godag(os.path.join(REPO, "go-basic.obo"))
annoobj = GpadReader(fin_gpad, godag=godag)

goids = [
'GO:0042581',
'GO:0101002',
'GO:0042582',
'GO:0070820',
'GO:0008021',
'GO:0005766',
'GO:0016591']

associations = annoobj.get_id2gos('CC')
"GO:0042581",
"GO:0101002",
"GO:0042582",
"GO:0070820",
"GO:0008021",
"GO:0005766",
"GO:0016591",
]

associations = annoobj.get_id2gos("CC")
termcounts = TermCounts(godag, associations)

# Calculate Lin values
p2v = {frozenset([a, b]): lin_sim(a, b, godag, termcounts) for a, b in combo_w_rplc(goids, 2)}
p2v = {
frozenset([a, b]): lin_sim(a, b, godag, termcounts)
for a, b in combo_w_rplc(goids, 2)
}
_prt_values(goids, p2v, prt=sys.stdout)


def _prt_values(goids, p2v, prt=sys.stdout):
"""Print values"""
prt.write(' {HDR}\n'.format(HDR=' '.join(goids)))
none = 'None '
prt.write(" {HDR}\n".format(HDR=" ".join(goids)))
none = "None "
for go_row in goids:
prt.write('{GO} '.format(GO=go_row))
prt.write("{GO} ".format(GO=go_row))
for go_col in goids:
val = p2v[frozenset([go_row, go_col])]
txt = '{L:<9.6} '.format(L=val) if val is not None else none
prt.write('{T:10} '.format(T=txt))
prt.write('\n')
txt = "{L:<9.6} ".format(L=val) if val is not None else none
prt.write("{T:10} ".format(T=txt))
prt.write("\n")


if __name__ == '__main__':
if __name__ == "__main__":
test_i148_semsim_lin()
7 changes: 5 additions & 2 deletions tests/test_altid_godag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from goatools.base import get_godag


def test_alt_id():
"""Ensure that alternate GO IDs."""
obo_dag = get_godag("go-basic.obo", loading_bar=None)
obo_dag = get_godag("go-basic.obo")
alt_ids = get_altids(obo_dag)
obo_goids = obo_dag.keys()
obo_goids_set = set(obo_goids)
assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)


def get_altids(obo_dag):
"""Get all alternate GO ids for entire go-basic.obo DAG."""
alt_ids_all = set()
Expand All @@ -19,5 +21,6 @@ def get_altids(obo_dag):
alt_ids_all |= set(alt_ids_cur)
return alt_ids_all

if __name__ == '__main__':

if __name__ == "__main__":
test_alt_id()
84 changes: 48 additions & 36 deletions tests/test_anno_rd_gene2go.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,69 @@
#!/usr/bin/env python
"""Ensure NEW results are equal to OLD results: read_ncbi_gene2go."""

from __future__ import print_function

import os
import sys

from collections import defaultdict

from goatools.associations import dnld_ncbi_gene_file
from goatools.anno.genetogo_reader import Gene2GoReader


REPO = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")


def test_anno_read():
"""Test reading an NCBI gene2go annotation file."""
fin_anno = os.path.join(REPO, 'gene2go')
fin_anno = os.path.join(REPO, "gene2go")
_dnld_anno(fin_anno)
#godag = get_godag(os.path.join(REPO, 'go-basic.obo'), loading_bar=None)

print('\nTEST STORING ONLY ONE SPECIES')
print("\nTEST STORING ONLY ONE SPECIES")
obj = Gene2GoReader(fin_anno)
assert len(obj.taxid2asscs) == 1
obj.prt_summary_anno2ev()

print('\nTEST STORING ALL SPECIES')
print("\nTEST STORING ALL SPECIES")
obj = Gene2GoReader(fin_anno, taxids=True)
assert len(obj.taxid2asscs) > 1, '**EXPECTED MORE: len(taxid2asscs) == {N}'.format(
N=len(obj.taxid2asscs))
assert len(obj.taxid2asscs) > 1, "**EXPECTED MORE: len(taxid2asscs) == {N}".format(
N=len(obj.taxid2asscs)
)
obj.prt_summary_anno2ev()

print('\nTEST GETTING ASSOCIATIONS FOR ONE SPECIES')
print("\nTEST GETTING ASSOCIATIONS FOR ONE SPECIES")
print("\nTEST read_ncbi_gene2go_old: [9606]")
old_g2go_hsa = read_ncbi_gene2go_old(fin_anno, [9606])
## new_g2go_hsa = read_ncbi_gene2go(fin_anno, [9606])
new_g2go_hsa = obj.get_id2gos_nss(taxids=[9606])
assert old_g2go_hsa == new_g2go_hsa, \
'OLD({O}) != NEW({N})'.format(O=len(old_g2go_hsa), N=len(new_g2go_hsa))
assert old_g2go_hsa == new_g2go_hsa, "OLD({O}) != NEW({N})".format(
O=len(old_g2go_hsa), N=len(new_g2go_hsa)
)
print("\nTEST read_ncbi_gene2go_old: 9606")
## assert old_g2go_hsa == read_ncbi_gene2go(fin_anno, 9606)
assert old_g2go_hsa == obj.get_id2gos_nss(taxid=9606)

print('\nTEST GETTING REVERSE ASSOCIATIONS: GO2GENES')
print("\nTEST GETTING REVERSE ASSOCIATIONS: GO2GENES")
go2geneids = True
print("\nTEST read_ncbi_gene2go_old: 9606 go2geneids=True")
old_go2gs_hsa = read_ncbi_gene2go_old(fin_anno, [9606], go2geneids=go2geneids)
## new_go2gs_hsa = read_ncbi_gene2go(fin_anno, 9606, go2geneids=go2geneids)
new_go2gs_hsa = obj.get_id2gos_nss(taxid=9606, go2geneids=go2geneids)
print('OLD:', next(iter(old_go2gs_hsa.items())))
print('NEW:', next(iter(new_go2gs_hsa.items())))
assert old_go2gs_hsa == new_go2gs_hsa, \
'OLD({O}) != NEW({N})'.format(O=len(old_go2gs_hsa), N=len(new_go2gs_hsa))
print("OLD:", next(iter(old_go2gs_hsa.items())))
print("NEW:", next(iter(new_go2gs_hsa.items())))
assert old_go2gs_hsa == new_go2gs_hsa, "OLD({O}) != NEW({N})".format(
O=len(old_go2gs_hsa), N=len(new_go2gs_hsa)
)

print('\nTEST RETURNING ASSOCIATIONS FOR SELECTED EVIDENCE CODES')
evcodes = set(['ISO', 'IKR'])
print("\nTEST RETURNING ASSOCIATIONS FOR SELECTED EVIDENCE CODES")
evcodes = set(["ISO", "IKR"])
print("\nTEST read_ncbi_gene2go_old: 9606 evcodes=True")
old_gene2gos_evc = read_ncbi_gene2go_old(fin_anno, taxids=[9606], ev_include=evcodes)
old_gene2gos_evc = read_ncbi_gene2go_old(
fin_anno, taxids=[9606], ev_include=evcodes
)
## new_gene2gos_evc = read_ncbi_gene2go(fin_anno, 9606, ev_include=evcodes)
new_gene2gos_evc = obj.get_id2gos_nss(taxid=9606, ev_include=evcodes)
print('OLD:', next(iter(old_gene2gos_evc.items())))
print('NEW:', next(iter(new_gene2gos_evc.items())))
print("OLD:", next(iter(old_gene2gos_evc.items())))
print("NEW:", next(iter(new_gene2gos_evc.items())))
assert old_gene2gos_evc == new_gene2gos_evc


Expand All @@ -67,10 +72,11 @@ def _dnld_anno(file_anno):
if os.path.exists(file_anno):
assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)
return
dnld_ncbi_gene_file(file_anno, loading_bar=None)
dnld_ncbi_gene_file(file_anno)
assert os.path.isfile(file_anno), "MISSING ANNO({F})".format(F=file_anno)
assert os.path.getsize(file_anno) > 1000000, "BAD ANNO({F})".format(F=file_anno)


# Fomerly in goatools/associations.py file
def read_ncbi_gene2go_old(fin_gene2go, taxids=None, **kws):
"""Read NCBI's gene2go. Return gene2go data for user-specified taxids."""
Expand All @@ -79,26 +85,30 @@ def read_ncbi_gene2go_old(fin_gene2go, taxids=None, **kws):
id2gos = defaultdict(set)
# Optional detailed associations split by taxid and having both ID2GOs & GO2IDs
# e.g., taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set))
taxid2asscs = kws.get('taxid2asscs', None)
evs = kws.get('ev_include', None)
taxid2asscs = kws.get("taxid2asscs", None)
evs = kws.get("ev_include", None)
# By default, return id2gos. User can cause go2geneids to be returned by:
# >>> read_ncbi_gene2go(..., go2geneids=True
b_geneid2gos = not kws.get('go2geneids', False)
if taxids is None: # Default taxid is Human
b_geneid2gos = not kws.get("go2geneids", False)
if taxids is None: # Default taxid is Human
taxids = [9606]
with open(fin_gene2go) as ifstrm:
with open(fin_gene2go, encoding="utf-8") as ifstrm:
# pylint: disable=too-many-nested-blocks
for line in ifstrm:
if line[0] != '#': # Line contains data. Not a comment
line = line.rstrip() # chomp
flds = line.split('\t')
if line[0] != "#": # Line contains data. Not a comment
line = line.rstrip() # chomp
flds = line.split("\t")
if len(flds) >= 5:
taxid_curr, geneid, go_id, evidence, qualifier = flds[:5]
taxid_curr = int(taxid_curr)
# NOT: Used when gene is expected to have function F, but does NOT.
# ND : GO function not seen after exhaustive annotation attempts to the gene.
## if taxid_curr in taxids and qualifier != 'NOT' and evidence != 'ND':
if taxid_curr in taxids and 'NOT' not in qualifier and evidence != 'ND':
if (
taxid_curr in taxids
and "NOT" not in qualifier
and evidence != "ND"
):
# Optionally specify a subset of GOs based on their evidence.
if evs is None or evidence in evs:
geneid = int(geneid)
Expand All @@ -107,11 +117,13 @@ def read_ncbi_gene2go_old(fin_gene2go, taxids=None, **kws):
else:
id2gos[go_id].add(geneid)
if taxid2asscs is not None:
taxid2asscs[taxid_curr]['ID2GOs'][geneid].add(go_id)
taxid2asscs[taxid_curr]['GO2IDs'][go_id].add(geneid)
sys.stdout.write(" {N:,} items READ: {ASSC}\n".format(N=len(id2gos), ASSC=fin_gene2go))
return id2gos # return simple associations
taxid2asscs[taxid_curr]["ID2GOs"][geneid].add(go_id)
taxid2asscs[taxid_curr]["GO2IDs"][go_id].add(geneid)
sys.stdout.write(
" {N:,} items READ: {ASSC}\n".format(N=len(id2gos), ASSC=fin_gene2go)
)
return id2gos # return simple associations


if __name__ == '__main__':
if __name__ == "__main__":
test_anno_read()
Loading

0 comments on commit 01f0548

Please sign in to comment.