|
139 | 139 | "\n",
|
140 | 140 | "def search(pat, txt, flags=0):\n",
|
141 | 141 | " \"Dictionary of matched groups in `pat` within `txt`\"\n",
|
142 |
| - " return re.search(pat, txt, flags=flags).groupdict()" |
| 142 | + " res = re.search(pat, txt, flags=flags)\n", |
| 143 | + " return res.groupdict() if res else None" |
143 | 144 | ]
|
144 | 145 | },
|
145 | 146 | {
|
|
679 | 680 | "outputs": [],
|
680 | 681 | "source": [
|
681 | 682 | "#| export\n",
|
682 |
| - "def _doc(url, **kw):\n", |
| 683 | + "def _doc(kw):\n", |
683 | 684 | " \"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs.\"\n",
|
| 685 | + " url = kw.pop('url')\n", |
684 | 686 | " re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)\n",
|
685 | 687 | " txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]\n",
|
686 | 688 | " return Doc('\\n'.join(txt), **kw)"
|
|
693 | 695 | "outputs": [],
|
694 | 696 | "source": [
|
695 | 697 | "#| export\n",
|
696 |
| - "def _section(nm, items):\n", |
| 698 | + "def _section(nm, items, n_workers=None):\n", |
697 | 699 | " \"Create a section containing a `Doc` object for each child.\"\n",
|
698 |
| - " return ft(nm, *[_doc(**o) for o in items])" |
| 700 | + " return ft(nm, *parallel(_doc, items, n_workers=n_workers, threadpool=True))" |
699 | 701 | ]
|
700 | 702 | },
|
701 | 703 | {
|
|
705 | 707 | "outputs": [],
|
706 | 708 | "source": [
|
707 | 709 | "#| export\n",
|
708 |
| - "def mk_ctx(d, optional=True):\n", |
| 710 | + "def mk_ctx(d, optional=True, n_workers=None):\n", |
709 | 711 | " \"Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section.\"\n",
|
710 | 712 | " skip = '' if optional else 'Optional'\n",
|
711 |
| - " sections = [_section(k, v) for k,v in d.sections.items() if k!=skip]\n", |
| 713 | + " sections = [_section(k, v, n_workers=n_workers) for k,v in d.sections.items() if k!=skip]\n", |
712 | 714 | " return Project(title=d.title, summary=d.summary)(d.info, *sections)"
|
713 | 715 | ]
|
714 | 716 | },
|
|
753 | 755 | {
|
754 | 756 | "data": {
|
755 | 757 | "text/plain": [
|
756 |
| - "{'docs': {'FastHTML quick start': 25803,\n", |
| 758 | + "{'docs': {'FastHTML quick start': 27376,\n", |
757 | 759 | " 'HTMX reference': 26427,\n",
|
758 | 760 | " 'Starlette quick guide': 7936},\n",
|
759 |
| - " 'examples': {'Todo list application': 18588},\n", |
| 761 | + " 'examples': {'Todo list application': 18558},\n", |
760 | 762 | " 'optional': {'Starlette full documentation': 48331}}"
|
761 | 763 | ]
|
762 | 764 | },
|
|
777 | 779 | {
|
778 | 780 | "data": {
|
779 | 781 | "text/plain": [
|
780 |
| - "128271" |
| 782 | + "129814" |
781 | 783 | ]
|
782 | 784 | },
|
783 | 785 | "execution_count": null,
|
|
796 | 798 | "outputs": [],
|
797 | 799 | "source": [
|
798 | 800 | "#| export\n",
|
799 |
| - "def create_ctx(txt, optional=False):\n", |
| 801 | + "def create_ctx(txt, optional=False, n_workers=None):\n", |
800 | 802 | " \"A `Project` with a `Section` for each H2 part in `txt`, optionally skipping the 'optional' section.\"\n",
|
801 | 803 | " d = parse_llms_file(txt)\n",
|
802 |
| - " ctx = mk_ctx(d, optional=optional)\n", |
| 804 | + " ctx = mk_ctx(d, optional=optional, n_workers=n_workers)\n", |
803 | 805 | " return to_xml(ctx, do_escape=False)"
|
804 | 806 | ]
|
805 | 807 | },
|
|
813 | 815 | "@call_parse\n",
|
814 | 816 | "def llms_txt2ctx(\n",
|
815 | 817 | " fname:str, # File name to read\n",
|
816 |
| - " optional:bool_arg=False # Include 'optional' section?\n", |
| 818 | + " optional:bool_arg=False, # Include 'optional' section?\n", |
| 819 | + " n_workers:int=None # Number of threads to use for parallel downloading\n", |
817 | 820 | "):\n",
|
818 | 821 | " \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
|
819 |
| - " print(create_ctx(Path(fname).read_text(), optional=optional))" |
| 822 | + " print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))" |
820 | 823 | ]
|
821 | 824 | },
|
822 | 825 | {
|
|
0 commit comments