From 080f31a992f76d8c154d9c02cff7c7708dc39ebe Mon Sep 17 00:00:00 2001 From: dvklopfenstein Date: Tue, 13 Dec 2022 12:25:04 -0500 Subject: [PATCH 1/5] Added commented prints for group dividers --- src/pmidcite/icite/nih_grouper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pmidcite/icite/nih_grouper.py b/src/pmidcite/icite/nih_grouper.py index 47b86d9..d008b1c 100644 --- a/src/pmidcite/icite/nih_grouper.py +++ b/src/pmidcite/icite/nih_grouper.py @@ -16,6 +16,10 @@ def __init__(self, group1_min=2.1, group2_min=15.7, group3_min=83.9, group4_min= self.min2 = group2_min self.min3 = group3_min self.min4 = group4_min + #print(f'group1_min: {group1_min}') + #print(f'group2_min: {group2_min}') + #print(f'group3_min: {group3_min}') + #print(f'group4_min: {group4_min}') def get_group(self, nih_percentile): """Assign group numbers to the NIH percentile values using the 68-95-99.7 rule""" From 6bf292d47e8b013a9cc3a31c48497bbb83881edd Mon Sep 17 00:00:00 2001 From: dvklopfenstein Date: Sat, 17 Dec 2022 11:09:54 -0500 Subject: [PATCH 2/5] Add find src/pmidcite/eutils make target, e --- makefile | 3 +++ versioneer.py => src/archive/versioneer.py | 0 2 files changed, 3 insertions(+) rename versioneer.py => src/archive/versioneer.py (100%) diff --git a/makefile b/makefile index 026e5b0..918d279 100644 --- a/makefile +++ b/makefile @@ -5,6 +5,9 @@ install: py: find src -name \*.py + +e: + find src/pmidcite/eutils -name \*.py t: find src/tests -regextype posix-extended -regex ".*[a-z]+.py" diff --git a/versioneer.py b/src/archive/versioneer.py similarity index 100% rename from versioneer.py rename to src/archive/versioneer.py From 14af43be5bfdd2dfbb77f958ed403c855e64683e Mon Sep 17 00:00:00 2001 From: dvklopfenstein Date: Mon, 19 Dec 2022 13:05:30 -0500 Subject: [PATCH 3/5] comment out prints --- src/pmidcite/eutils/cmds/efetch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pmidcite/eutils/cmds/efetch.py b/src/pmidcite/eutils/cmds/efetch.py index 28f9ada..b8ea1fa 100644 --- a/src/pmidcite/eutils/cmds/efetch.py +++ b/src/pmidcite/eutils/cmds/efetch.py @@ -1,8 +1,8 @@ """Fetch items and write""" # https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch -__author__ = 'DV Klopfenstein' -__copyright__ = "Copyright (C) 2016-present DV Klopfenstein. All rights reserved." +__author__ = 'DV Klopfenstein, PhD' +__copyright__ = "Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved." __license__ = "GPL" import sys @@ -17,7 +17,7 @@ class EFetch(CommandBase): #### def __init__(self, retmax=10000, rettype='medline', retmode='text', batch_size=100, **kws): def __init__(self, rettype='medline', retmode='text', batch_size=100, **kws): kws_base = {k:v for k, v in kws.items() if k in CommandBase.exp_kws} - print('FFFFFFFFFFFFFFFFFFFF', kws_base) + ##print('FFFFFFFFFFFFFFFFFFFF', kws_base) super(EFetch, self).__init__(**kws_base) def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): @@ -29,7 +29,7 @@ def efetch_and_write(self, ostrm, database, webenv, querykey, num_fetches): for start in range(0, num_fetches, self.batch_size): ## msg = msg_fmt.format(querykey, database, self.batch_size, start, self.desc) ## sys.stdout.write(msg) - print('SSSSSSSSSSSSSSSSSSSSSSSTART:', start) + ## print('SSSSSSSSSSSSSSSSSSSSSSSTART:', start) txt = self.efetch_txt(start, self.batch_size, database, webenv, querykey) if txt is not None: @@ -73,4 +73,4 @@ def efetch_txt(self, start, retmax, database, webenv, querykey): sys.stdout.write(" querykey: {}\n".format(querykey)) -# Copyright (C) 2016-present DV Klopfenstein. All rights reserved. +# Copyright (C) 2016-present DV Klopfenstein, PhD. All rights reserved. From 555c05555f094126a44282346dddf4d086dd5d88 Mon Sep 17 00:00:00 2001 From: dvklopfenstein Date: Mon, 19 Dec 2022 13:06:02 -0500 Subject: [PATCH 4/5] Add PhD to author --- src/pmidcite/scripts/summarize_papers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 src/pmidcite/scripts/summarize_papers.py diff --git a/src/pmidcite/scripts/summarize_papers.py b/src/pmidcite/scripts/summarize_papers.py new file mode 100755 index 0000000..c3e677e --- /dev/null +++ b/src/pmidcite/scripts/summarize_papers.py @@ -0,0 +1,15 @@ +"""Summarize NIH citation data for requested papers from the commandline or in files""" + +__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" + +from pmidcite.cli.summarize_papers import SummarizePapersCli # get_argparser +from pmidcite.cfg import get_cfgparser + + +def main(): + """Summarize NIH citation data for requested papers from the commandline or in files""" + SummarizePapersCli(get_cfgparser(prt=None)).cli() + + +# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved. From c023ff495bd36c0714a3b71d491c3f697391dd2c Mon Sep 17 00:00:00 2001 From: dvklopfenstein Date: Mon, 19 Dec 2022 17:23:08 -0500 Subject: [PATCH 5/5] Add summarize fnc for groups of papers cites --- makefile | 3 + src/bin/summarize_papers.py | 18 +++++ src/pmidcite/cli/summarize_papers.py | 75 ++++++++++++++++++++ src/pmidcite/cli/utils.py | 23 +++++- src/pmidcite/icite/dnldr/pmid_dnlder.py | 11 +-- src/pmidcite/icite/dnldr/pmid_dnlder_only.py | 11 +-- src/pmidcite/icite/dnldr/pmid_loader.py | 9 +-- src/pmidcite/icite/entry.py | 34 +++++---- src/pmidcite/icite/nih_grouper.py | 7 ++ 9 files changed, 160 insertions(+), 31 deletions(-) create mode 100755 src/bin/summarize_papers.py create mode 100644 src/pmidcite/cli/summarize_papers.py diff --git a/makefile b/makefile index 918d279..4b9b8f7 100644 --- a/makefile +++ b/makefile @@ -116,3 +116,6 @@ clean: clobber_tmp: rm -rf ./icite rm -rf ./src/tests/icite + +clobber: + make -f makefile clobber_tmp clobber_dist diff --git a/src/bin/summarize_papers.py b/src/bin/summarize_papers.py new file mode 100755 index 0000000..dac3501 --- /dev/null +++ b/src/bin/summarize_papers.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Summarize NIH citation data for requested papers from the commandline or in files""" + +__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" + +from pmidcite.cli.summarize_papers import SummarizePapersCli # get_argparser +from pmidcite.cfg import get_cfgparser + + +def main(): + """Summarize NIH citation data for requested papers from the commandline or in files""" + SummarizePapersCli(get_cfgparser(prt=None)).cli() + +if __name__ == '__main__': + main() + +# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/cli/summarize_papers.py b/src/pmidcite/cli/summarize_papers.py new file mode 100644 index 0000000..08e96bb --- /dev/null +++ b/src/pmidcite/cli/summarize_papers.py @@ -0,0 +1,75 @@ +"""Summarize NIH citation data for requested papers from the commandline or in files""" + +from sys import stdout +from argparse import ArgumentParser +from pmidcite.cli.utils import prt_loc_rcfile +from pmidcite.cli.utils import get_files_exists +from pmidcite.summarize_papers import SummarizePapers + +__copyright__ = "Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" + + +class SummarizePapersCli: + """Summarize NIH citation data for requested papers from the commandline or in files""" + + def __init__(self, cfg): + self.cfg = cfg + + def get_argparser(self): + """Argument parser for summarizing the citations on set(s) of papers""" + parser = ArgumentParser( + description="Summarize NIH's citation on a set(s) of papers", + add_help=False) + ##cfg = self.cfg + # https://docs.python.org/3/library/argparse.html + # https://docs.python.org/3/library/argparse.html#action + # - PMIDs ---------------------------------------------------------------------------- + parser.add_argument( + '-h', '--help', action='store_true', + help='print this help message and exit (also --help)') + parser.add_argument( + 'files', metavar='FILES', type=str, nargs='*', + help='File(s) containing NIH citation data for numerous papers with PMIDs') + ##parser.add_argument( + ## '-i', '--infile', nargs='*', + ## help='Files containing NIH citation data for numerous papers with PMIDs') + ##parser.add_argument( + ## '-o', '--outfile', + ## help='Write current citation report to an ASCII text file.') + ##parser.add_argument( + ## '-f', '--force_write', action='store_true', + ## help='if an existing outfile file exists, overwrite it.') + + self.cfg.get_nihgrouper().add_arguments(parser) + ##parser.add_argument( + ## '--md', action='store_true', + ## help='Print using markdown table format.') + parser.add_argument( + '--print-rcfile', action='store_true', + help='Print the location of the pmidcite configuration file (env var: PMIDCITECONF)') + return parser + + + def cli(self): + """Run citation summary on a set(s) of PMIDs""" + argparser = self.get_argparser() + args = argparser.parse_args() + print('ARGS CITE SUMMARY ../pmidcite/src/pmidcite/cli/summarize_papers.py', args) + if args.print_rcfile: + prt_loc_rcfile(self.cfg, stdout) + return + files = get_files_exists(args.files) + if args.help or not files: + argparser.print_help() + print('\nHelp message printed because: -h or --help == True') + return + ##self._run(args, argparser) + nih_grouper = self.cfg.get_nihgrouper() + for filename in files: + sumpap = SummarizePapers.from_file(filename, nih_grouper) + print(sumpap.str_oneline()) + return + + +# Copyright (C) 2022-present, DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/cli/utils.py b/src/pmidcite/cli/utils.py index 6a72718..c2190c3 100644 --- a/src/pmidcite/cli/utils.py +++ b/src/pmidcite/cli/utils.py @@ -1,7 +1,7 @@ """Read a file created by pmidcite and write simple text file of PMIDs""" -__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved." -__author__ = "DV Klopfenstein" +__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" from os.path import exists from os.path import split @@ -48,6 +48,16 @@ def get_all(pmid_list, fin_pmids, top_cit_ref=None): print(' MISSING: {FILE}'.format(FILE=fin)) return pmids +def get_files_exists(files, prt=None): + """Get the files that exist""" + ret = [] + for filename in files: + if exists(filename): + ret.append(filename) + elif prt: + prt.write(f'**WARNING: FILE NOT EXIST({filename})\n') + return ret + def _read_pmids(fin, top_cit_ref): """Read PMIDs from a file. One PMID per line.""" pmids = [] @@ -125,5 +135,12 @@ def _get_outfile_resolved(outfile, append_outfile): return append_outfile return None +def prt_loc_rcfile(cfg, prt=stdout): + """Print location of configuration file""" + prt.write('\n**NOTE FROM ARG(--print-rcfile):\n') + cfg.prt_cfgfile() + prt.write('\n') + + -# Copyright (C) 2019-present DV Klopfenstein. All rights reserved. +# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/icite/dnldr/pmid_dnlder.py b/src/pmidcite/icite/dnldr/pmid_dnlder.py index 0df7ba3..ccd05b5 100644 --- a/src/pmidcite/icite/dnldr/pmid_dnlder.py +++ b/src/pmidcite/icite/dnldr/pmid_dnlder.py @@ -1,8 +1,8 @@ """Given a PubMed ID (PMID), download a list of publications which cite and reference it""" # https://icite.od.nih.gov/api -__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved." -__author__ = "DV Klopfenstein" +__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" from os.path import exists from os.path import join @@ -53,7 +53,8 @@ def _dnld_icites(self, pmid2foutpy): for nih_dict in nihdicts: s_wrpy(pmid2foutpy[nih_dict['pmid']], nih_dict) s_get_group = self.nihgrouper.get_group - return [NIHiCiteEntry(d, s_get_group(d['nih_percentile'])) for d in nihdicts] + # pylint: disable=line-too-long + return [NIHiCiteEntry.from_jsondct(d, s_get_group(d['nih_percentile'])) for d in nihdicts] return [] def get_icite(self, pmid): @@ -63,7 +64,7 @@ def get_icite(self, pmid): nih_dict = self.api.dnld_nihdict(pmid) if nih_dict: self._wrpy(file_pmid, nih_dict) - return NIHiCiteEntry( + return NIHiCiteEntry.from_jsondct( nih_dict, self.nihgrouper.get_group(nih_dict['nih_percentile'])) return self.loader.load_icite(file_pmid) # NIHiCiteEntry @@ -99,4 +100,4 @@ def _load_icites(self, pmids, pmid2py): return nihentries_loaded -# Copyright (C) 2019-present DV Klopfenstein. All rights reserved. +# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/icite/dnldr/pmid_dnlder_only.py b/src/pmidcite/icite/dnldr/pmid_dnlder_only.py index 5cfee65..1d5aa31 100644 --- a/src/pmidcite/icite/dnldr/pmid_dnlder_only.py +++ b/src/pmidcite/icite/dnldr/pmid_dnlder_only.py @@ -1,8 +1,8 @@ """Given a PubMed ID (PMID), download a list of publications which cite and reference it""" # https://icite.od.nih.gov/api -__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved." -__author__ = "DV Klopfenstein" +__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" from pmidcite.icite.dnldr.pmid_dnlder_base import NIHiCiteDownloaderBase from pmidcite.icite.entry import NIHiCiteEntry @@ -24,17 +24,18 @@ def _dnld_icites(self, pmids): nihdicts = self.api.dnld_nihdicts(pmids) if nihdicts: s_get_group = self.nihgrouper.get_group - return [NIHiCiteEntry(d, s_get_group(d['nih_percentile'])) for d in nihdicts] + # pylint: disable=line-too-long + return [NIHiCiteEntry.from_jsondct(d, s_get_group(d['nih_percentile'])) for d in nihdicts] return [] def get_icite(self, pmid): """Load or download NIH iCite data for requested PMID""" nih_dict = self.api.dnld_nihdict(pmid) if nih_dict: - return NIHiCiteEntry( + return NIHiCiteEntry.from_jsondct( nih_dict, self.nihgrouper.get_group(nih_dict['nih_percentile'])) return None -# Copyright (C) 2019-present DV Klopfenstein. All rights reserved. +# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/icite/dnldr/pmid_loader.py b/src/pmidcite/icite/dnldr/pmid_loader.py index 9cd8ddc..468304b 100644 --- a/src/pmidcite/icite/dnldr/pmid_loader.py +++ b/src/pmidcite/icite/dnldr/pmid_loader.py @@ -1,7 +1,7 @@ """Given a PubMed ID (PMID), return a list of publications which cite it""" -__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved." -__author__ = "DV Klopfenstein" +__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" from sys import stdout from os.path import join @@ -64,7 +64,8 @@ def load_icite(self, file_pmid): mod = module_from_spec(spec) spec.loader.exec_module(mod) ## print('LLLLLLLLLLLLL load_icite', file_pmid) - return NIHiCiteEntry(mod.ICITE, self.nih_grouper.get_group(mod.ICITE['nih_percentile'])) + # pylint: disable=line-too-long + return NIHiCiteEntry.from_jsondct(mod.ICITE, self.nih_grouper.get_group(mod.ICITE['nih_percentile'])) return None def load_pmid(self, pmid): @@ -84,4 +85,4 @@ def _get_pmids_linked(self, icites_top): ## return pmids_linked -# Copyright (C) 2019-present DV Klopfenstein. All rights reserved. +# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/icite/entry.py b/src/pmidcite/icite/entry.py index 65c1f83..ef40030 100644 --- a/src/pmidcite/icite/entry.py +++ b/src/pmidcite/icite/entry.py @@ -1,8 +1,8 @@ """Holds NIH iCite data for one PubMed ID (PMID)""" # https://icite.od.nih.gov/api -__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein. All rights reserved." -__author__ = "DV Klopfenstein" +__copyright__ = "Copyright (C) 2019-present, DV Klopfenstein, PhD. All rights reserved." +__author__ = "DV Klopfenstein, PhD" from sys import stdout @@ -56,19 +56,25 @@ class NIHiCiteEntry: author1='authors', title='title') - def __init__(self, icite_dct, nih_group): - self.pmid = icite_dct['pmid'] - self.dct = icite_dct + def __init__(self, pmid=None, dct=None): + self.pmid = pmid + self.dct = dct + + @classmethod + def from_jsondct(cls, icite_dct, nih_group_num): + """Construct NIHiCiteEntry from jsondct downloaded from NIH using Entrez utils""" + cls_dct = icite_dct nih_perc = icite_dct['nih_percentile'] - self.dct['nih_group'] = nih_group # 0 - 5 + cls_dct['nih_group'] = nih_group_num # 0 - 5 # pylint: disable=line-too-long - self.dct['num_auth'] = len(icite_dct['authors']) - self.dct['num_clin'] = len(icite_dct['cited_by_clin']) - self.dct['num_cite'] = len(icite_dct['cited_by']) - num_cites_all = len(set(self.dct['cited_by_clin']).union(self.dct['cited_by'])) - self.dct['num_cites_all'] = num_cites_all - self.dct['nih_perc'] = round(nih_perc) if nih_perc is not None else 110 + num_cites_all - self.dct['num_refs'] = len(icite_dct['references']) + cls_dct['num_auth'] = len(icite_dct['authors']) + cls_dct['num_clin'] = len(icite_dct['cited_by_clin']) + cls_dct['num_cite'] = len(icite_dct['cited_by']) + num_cites_all = len(set(cls_dct['cited_by_clin']).union(cls_dct['cited_by'])) + cls_dct['num_cites_all'] = num_cites_all + cls_dct['nih_perc'] = round(nih_perc) if nih_perc is not None else 110 + num_cites_all + cls_dct['num_refs'] = len(icite_dct['references']) + return cls(icite_dct['pmid'], cls_dct) ## TBD: ## def __eq__(self, rhs): @@ -222,4 +228,4 @@ def __lt__(self, rhs): return self.pmid < rhs.pmid -# Copyright (C) 2019-present DV Klopfensteinr,. All rights reserved. +# Copyright (C) 2019-present DV Klopfenstein, PhD. All rights reserved. diff --git a/src/pmidcite/icite/nih_grouper.py b/src/pmidcite/icite/nih_grouper.py index d008b1c..7e89d1d 100644 --- a/src/pmidcite/icite/nih_grouper.py +++ b/src/pmidcite/icite/nih_grouper.py @@ -11,6 +11,8 @@ class NihGrouper: ntobj = namedtuple('NtNihGroup', 'val txt') + group_chrs = ['0', '1', '2', '3', '4', 'i'] + def __init__(self, group1_min=2.1, group2_min=15.7, group3_min=83.9, group4_min=97.5): self.min1 = group1_min self.min2 = group2_min @@ -21,6 +23,11 @@ def __init__(self, group1_min=2.1, group2_min=15.7, group3_min=83.9, group4_min= #print(f'group3_min: {group3_min}') #print(f'group4_min: {group4_min}') + def str_group(self, nih_percentile): + """Get chr representing group number""" + group_num = self.get_group(nih_percentile) + return 'i' if group_num == 5 else str(group_num) + def get_group(self, nih_percentile): """Assign group numbers to the NIH percentile values using the 68-95-99.7 rule""" # No NIH percentile yet assigned. This paper should be checked out.