Skip to content

Commit

Permalink
Merge pull request #48 from dvklopfenstein/dvk
Browse files Browse the repository at this point in the history
added aummarize script
  • Loading branch information
dvklopfenstein authored Dec 19, 2022
2 parents b699481 + c023ff4 commit 52f7bb3
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 36 deletions.
6 changes: 6 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ install:

py:
find src -name \*.py

e:
find src/pmidcite/eutils -name \*.py

t:
find src/tests -regextype posix-extended -regex ".*[a-z]+.py"
Expand Down Expand Up @@ -113,3 +116,6 @@ clean:
clobber_tmp:
rm -rf ./icite
rm -rf ./src/tests/icite

clobber:
make -f makefile clobber_tmp clobber_dist
File renamed without changes.
18 changes: 18 additions & 0 deletions src/bin/summarize_papers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""Summarize NIH citation data for requested papers from the commandline or in files"""

__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from pmidcite.cli.summarize_papers import SummarizePapersCli # get_argparser
from pmidcite.cfg import get_cfgparser


def main():
"""Summarize NIH citation data for requested papers from the commandline or in files"""
SummarizePapersCli(get_cfgparser(prt=None)).cli()

if __name__ == '__main__':
main()

# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved.
75 changes: 75 additions & 0 deletions src/pmidcite/cli/summarize_papers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Summarize NIH citation data for requested papers from the commandline or in files"""

from sys import stdout
from argparse import ArgumentParser
from pmidcite.cli.utils import prt_loc_rcfile
from pmidcite.cli.utils import get_files_exists
from pmidcite.summarize_papers import SummarizePapers

__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"


class SummarizePapersCli:
"""Summarize NIH citation data for requested papers from the commandline or in files"""

def __init__(self, cfg):
self.cfg = cfg

def get_argparser(self):
"""Argument parser for summarizing the citations on set(s) of papers"""
parser = ArgumentParser(
description="Summarize NIH's citation on a set(s) of papers",
add_help=False)
##cfg = self.cfg
# https://docs.python.org/3/library/argparse.html
# https://docs.python.org/3/library/argparse.html#action
# - PMIDs ----------------------------------------------------------------------------
parser.add_argument(
'-h', '--help', action='store_true',
help='print this help message and exit (also --help)')
parser.add_argument(
'files', metavar='FILES', type=str, nargs='*',
help='File(s) containing NIH citation data for numerous papers with PMIDs')
##parser.add_argument(
## '-i', '--infile', nargs='*',
## help='Files containing NIH citation data for numerous papers with PMIDs')
##parser.add_argument(
## '-o', '--outfile',
## help='Write current citation report to an ASCII text file.')
##parser.add_argument(
## '-f', '--force_write', action='store_true',
## help='if an existing outfile file exists, overwrite it.')

self.cfg.get_nihgrouper().add_arguments(parser)
##parser.add_argument(
## '--md', action='store_true',
## help='Print using markdown table format.')
parser.add_argument(
'--print-rcfile', action='store_true',
help='Print the location of the pmidcite configuration file (env var: PMIDCITECONF)')
return parser


def cli(self):
"""Run citation summary on a set(s) of PMIDs"""
argparser = self.get_argparser()
args = argparser.parse_args()
print('ARGS CITE SUMMARY ../pmidcite/src/pmidcite/cli/summarize_papers.py', args)
if args.print_rcfile:
prt_loc_rcfile(self.cfg, stdout)
return
files = get_files_exists(args.files)
if args.help or not files:
argparser.print_help()
print('\nHelp message printed because: -h or --help == True')
return
##self._run(args, argparser)
nih_grouper = self.cfg.get_nihgrouper()
for filename in files:
sumpap = SummarizePapers.from_file(filename, nih_grouper)
print(sumpap.str_oneline())
return


# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved.
23 changes: 20 additions & 3 deletions src/pmidcite/cli/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Read a file created by pmidcite and write simple text file of PMIDs"""

__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"
__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from os.path import exists
from os.path import split
Expand Down Expand Up @@ -48,6 +48,16 @@ def get_all(pmid_list, fin_pmids, top_cit_ref=None):
print(' MISSING: {FILE}'.format(FILE=fin))
return pmids

def get_files_exists(files, prt=None):
"""Get the files that exist"""
ret = []
for filename in files:
if exists(filename):
ret.append(filename)
elif prt:
prt.write(f'**WARNING: FILE NOT EXIST({filename})\n')
return ret

def _read_pmids(fin, top_cit_ref):
"""Read PMIDs from a file. One PMID per line."""
pmids = []
Expand Down Expand Up @@ -125,5 +135,12 @@ def _get_outfile_resolved(outfile, append_outfile):
return append_outfile
return None

def prt_loc_rcfile(cfg, prt=stdout):
"""Print location of configuration file"""
prt.write('\n**NOTE FROM ARG(--print-rcfile):\n')
cfg.prt_cfgfile()
prt.write('\n')



# Copyright (C) 2019-present DV Klopfenstein. All rights reserved.
# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved.
10 changes: 5 additions & 5 deletions src/pmidcite/eutils/cmds/efetch.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Fetch items and write"""
# https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch

__author__ = 'DV Klopfenstein'
__copyright__ = "Copyright (C) 2016-present DV Klopfenstein. All rights reserved."
__author__ = 'DV Klopfenstein, PhD'
__copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved."
__license__ = "GPL"

import sys
Expand All @@ -17,7 +17,7 @@ class EFetch(CommandBase):
#### def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100, **kws):
def __init__(self, rettype='medline', retmode='text', batch_size=100, **kws):
kws_base = {k:v for k, v in kws.items() if k in CommandBase.exp_kws}
print('FFFFFFFFFFFFFFFFFFFF', kws_base)
##print('FFFFFFFFFFFFFFFFFFFF', kws_base)
super(EFetch, self).__init__(**kws_base)

def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches):
Expand All @@ -29,7 +29,7 @@ def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches):
for start in range(0, num_fetches, self.batch_size):
## msg = msg_fmt.format(querykey, database, self.batch_size, start, self.desc)
## sys.stdout.write(msg)
print('SSSSSSSSSSSSSSSSSSSSSSSTART:', start)
## print('SSSSSSSSSSSSSSSSSSSSSSSTART:', start)
txt = self.efetch_txt(start, self.batch_size, database, webenv, querykey)

if txt is not None:
Expand Down Expand Up @@ -73,4 +73,4 @@ def efetch_txt(self, start, retmax, database, webenv, querykey):
sys.stdout.write(" querykey: {}\n".format(querykey))


# Copyright (C) 2016-present DV Klopfenstein. All rights reserved.
# Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved.
11 changes: 6 additions & 5 deletions src/pmidcite/icite/dnldr/pmid_dnlder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Given a PubMed ID (PMID), download a list of publications which cite and reference it"""
# https://icite.od.nih.gov/api

__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"
__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from os.path import exists
from os.path import join
Expand Down Expand Up @@ -53,7 +53,8 @@ def _dnld_icites(self, pmid2foutpy):
for nih_dict in nihdicts:
s_wrpy(pmid2foutpy[nih_dict['pmid']], nih_dict)
s_get_group = self.nihgrouper.get_group
return [NIHiCiteEntry(d, s_get_group(d['nih_percentile'])) for d in nihdicts]
# pylint: disable=line-too-long
return [NIHiCiteEntry.from_jsondct(d, s_get_group(d['nih_percentile'])) for d in nihdicts]
return []

def get_icite(self, pmid):
Expand All @@ -63,7 +64,7 @@ def get_icite(self, pmid):
nih_dict = self.api.dnld_nihdict(pmid)
if nih_dict:
self._wrpy(file_pmid, nih_dict)
return NIHiCiteEntry(
return NIHiCiteEntry.from_jsondct(
nih_dict,
self.nihgrouper.get_group(nih_dict['nih_percentile']))
return self.loader.load_icite(file_pmid) # NIHiCiteEntry
Expand Down Expand Up @@ -99,4 +100,4 @@ def _load_icites(self, pmids, pmid2py):
return nihentries_loaded


# Copyright (C) 2019-present DV Klopfenstein. All rights reserved.
# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved.
11 changes: 6 additions & 5 deletions src/pmidcite/icite/dnldr/pmid_dnlder_only.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Given a PubMed ID (PMID), download a list of publications which cite and reference it"""
# https://icite.od.nih.gov/api

__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"
__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from pmidcite.icite.dnldr.pmid_dnlder_base import NIHiCiteDownloaderBase
from pmidcite.icite.entry import NIHiCiteEntry
Expand All @@ -24,17 +24,18 @@ def _dnld_icites(self, pmids):
nihdicts = self.api.dnld_nihdicts(pmids)
if nihdicts:
s_get_group = self.nihgrouper.get_group
return [NIHiCiteEntry(d, s_get_group(d['nih_percentile'])) for d in nihdicts]
# pylint: disable=line-too-long
return [NIHiCiteEntry.from_jsondct(d, s_get_group(d['nih_percentile'])) for d in nihdicts]
return []

def get_icite(self, pmid):
"""Load or download NIH iCite data for requested PMID"""
nih_dict = self.api.dnld_nihdict(pmid)
if nih_dict:
return NIHiCiteEntry(
return NIHiCiteEntry.from_jsondct(
nih_dict,
self.nihgrouper.get_group(nih_dict['nih_percentile']))
return None


# Copyright (C) 2019-present DV Klopfenstein. All rights reserved.
# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved.
9 changes: 5 additions & 4 deletions src/pmidcite/icite/dnldr/pmid_loader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Given a PubMed ID (PMID), return a list of publications which cite it"""

__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"
__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from sys import stdout
from os.path import join
Expand Down Expand Up @@ -64,7 +64,8 @@ def load_icite(self, file_pmid):
mod = module_from_spec(spec)
spec.loader.exec_module(mod)
## print('LLLLLLLLLLLLL load_icite', file_pmid)
return NIHiCiteEntry(mod.ICITE, self.nih_grouper.get_group(mod.ICITE['nih_percentile']))
# pylint: disable=line-too-long
return NIHiCiteEntry.from_jsondct(mod.ICITE, self.nih_grouper.get_group(mod.ICITE['nih_percentile']))
return None

def load_pmid(self, pmid):
Expand All @@ -84,4 +85,4 @@ def _get_pmids_linked(self, icites_top):
## return pmids_linked


# Copyright (C) 2019-present DV Klopfenstein. All rights reserved.
# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved.
34 changes: 20 additions & 14 deletions src/pmidcite/icite/entry.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Holds NIH iCite data for one PubMed ID (PMID)"""
# https://icite.od.nih.gov/api

__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"
__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from sys import stdout

Expand Down Expand Up @@ -56,19 +56,25 @@ class NIHiCiteEntry:
author1='authors',
title='title')

def __init__(self, icite_dct, nih_group):
self.pmid = icite_dct['pmid']
self.dct = icite_dct
def __init__(self, pmid=None, dct=None):
self.pmid = pmid
self.dct = dct

@classmethod
def from_jsondct(cls, icite_dct, nih_group_num):
"""Construct NIHiCiteEntry from jsondct downloaded from NIH using Entrez utils"""
cls_dct = icite_dct
nih_perc = icite_dct['nih_percentile']
self.dct['nih_group'] = nih_group # 0 - 5
cls_dct['nih_group'] = nih_group_num # 0 - 5
# pylint: disable=line-too-long
self.dct['num_auth'] = len(icite_dct['authors'])
self.dct['num_clin'] = len(icite_dct['cited_by_clin'])
self.dct['num_cite'] = len(icite_dct['cited_by'])
num_cites_all = len(set(self.dct['cited_by_clin']).union(self.dct['cited_by']))
self.dct['num_cites_all'] = num_cites_all
self.dct['nih_perc'] = round(nih_perc) if nih_perc is not None else 110 + num_cites_all
self.dct['num_refs'] = len(icite_dct['references'])
cls_dct['num_auth'] = len(icite_dct['authors'])
cls_dct['num_clin'] = len(icite_dct['cited_by_clin'])
cls_dct['num_cite'] = len(icite_dct['cited_by'])
num_cites_all = len(set(cls_dct['cited_by_clin']).union(cls_dct['cited_by']))
cls_dct['num_cites_all'] = num_cites_all
cls_dct['nih_perc'] = round(nih_perc) if nih_perc is not None else 110 + num_cites_all
cls_dct['num_refs'] = len(icite_dct['references'])
return cls(icite_dct['pmid'], cls_dct)

## TBD:
## def __eq__(self, rhs):
Expand Down Expand Up @@ -222,4 +228,4 @@ def __lt__(self, rhs):
return self.pmid < rhs.pmid


# Copyright (C) 2019-present DV Klopfensteinr,. All rights reserved.
# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved.
11 changes: 11 additions & 0 deletions src/pmidcite/icite/nih_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,22 @@ class NihGrouper:

ntobj = namedtuple('NtNihGroup', 'val txt')

group_chrs = ['0', '1', '2', '3', '4', 'i']

def __init__(self, group1_min=2.1, group2_min=15.7, group3_min=83.9, group4_min=97.5):
self.min1 = group1_min
self.min2 = group2_min
self.min3 = group3_min
self.min4 = group4_min
#print(f'group1_min: {group1_min}')
#print(f'group2_min: {group2_min}')
#print(f'group3_min: {group3_min}')
#print(f'group4_min: {group4_min}')

def str_group(self, nih_percentile):
"""Get chr representing group number"""
group_num = self.get_group(nih_percentile)
return 'i' if group_num == 5 else str(group_num)

def get_group(self, nih_percentile):
"""Assign group numbers to the NIH percentile values using the 68-95-99.7 rule"""
Expand Down
15 changes: 15 additions & 0 deletions src/pmidcite/scripts/summarize_papers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Summarize NIH citation data for requested papers from the commandline or in files"""

__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved."
__author__ = "DV Klopfenstein, PhD"

from pmidcite.cli.summarize_papers import SummarizePapersCli # get_argparser
from pmidcite.cfg import get_cfgparser


def main():
"""Summarize NIH citation data for requested papers from the commandline or in files"""
SummarizePapersCli(get_cfgparser(prt=None)).cli()


# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved.

0 comments on commit 52f7bb3

Please sign in to comment.