Skip to content

Commit 0536f0c

Browse files
committed
fixes #6
1 parent b9d2bff commit 0536f0c

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

llms_txt/core.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def named_re(nm, pat):
2525

2626
def search(pat, txt, flags=0):
2727
"Dictionary of matched groups in `pat` within `txt`"
28-
return re.search(pat, txt, flags=flags).groupdict()
28+
res = re.search(pat, txt, flags=flags)
29+
return res.groupdict() if res else None
2930

3031
# %% ../nbs/01_core.ipynb
3132
def parse_link(txt):
@@ -65,22 +66,23 @@ def parse_llms_file(txt):
6566
from fastcore.xml import Sections,Project,Doc
6667

6768
# %% ../nbs/01_core.ipynb
68-
def _doc(url, **kw):
69+
def _doc(kw):
6970
"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs."
71+
url = kw.pop('url')
7072
re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)
7173
txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]
7274
return Doc('\n'.join(txt), **kw)
7375

7476
# %% ../nbs/01_core.ipynb
75-
def _section(nm, items):
77+
def _section(nm, items, n_workers=None):
7678
"Create a section containing a `Doc` object for each child."
77-
return ft(nm, *[_doc(**o) for o in items])
79+
return ft(nm, *parallel(_doc, items, n_workers=n_workers, threadpool=True))
7880

7981
# %% ../nbs/01_core.ipynb
80-
def mk_ctx(d, optional=True):
82+
def mk_ctx(d, optional=True, n_workers=None):
8183
"Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section."
8284
skip = '' if optional else 'Optional'
83-
sections = [_section(k, v) for k,v in d.sections.items() if k!=skip]
85+
sections = [_section(k, v, n_workers=n_workers) for k,v in d.sections.items() if k!=skip]
8486
return Project(title=d.title, summary=d.summary)(d.info, *sections)
8587

8688
# %% ../nbs/01_core.ipynb
@@ -89,17 +91,18 @@ def get_sizes(ctx):
8991
return {o.tag:{p.title:len(p.children[0]) for p in o.children} for o in ctx.children if hasattr(o,'tag')}
9092

9193
# %% ../nbs/01_core.ipynb
92-
def create_ctx(txt, optional=False):
94+
def create_ctx(txt, optional=False, n_workers=None):
9395
"A `Project` with a `Section` for each H2 part in `txt`, optionally skipping the 'optional' section."
9496
d = parse_llms_file(txt)
95-
ctx = mk_ctx(d, optional=optional)
97+
ctx = mk_ctx(d, optional=optional, n_workers=n_workers)
9698
return to_xml(ctx, do_escape=False)
9799

98100
# %% ../nbs/01_core.ipynb
99101
@call_parse
100102
def llms_txt2ctx(
101103
fname:str, # File name to read
102-
optional:bool_arg=False # Include 'optional' section?
104+
optional:bool_arg=False, # Include 'optional' section?
105+
n_workers:int=None # Number of threads to use for parallel downloading
103106
):
104107
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."
105-
print(create_ctx(Path(fname).read_text(), optional=optional))
108+
print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))

nbs/01_core.ipynb

+16-13
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@
139139
"\n",
140140
"def search(pat, txt, flags=0):\n",
141141
" \"Dictionary of matched groups in `pat` within `txt`\"\n",
142-
" return re.search(pat, txt, flags=flags).groupdict()"
142+
" res = re.search(pat, txt, flags=flags)\n",
143+
" return res.groupdict() if res else None"
143144
]
144145
},
145146
{
@@ -679,8 +680,9 @@
679680
"outputs": [],
680681
"source": [
681682
"#| export\n",
682-
"def _doc(url, **kw):\n",
683+
"def _doc(kw):\n",
683684
" \"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs.\"\n",
685+
" url = kw.pop('url')\n",
684686
" re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)\n",
685687
" txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]\n",
686688
" return Doc('\\n'.join(txt), **kw)"
@@ -693,9 +695,9 @@
693695
"outputs": [],
694696
"source": [
695697
"#| export\n",
696-
"def _section(nm, items):\n",
698+
"def _section(nm, items, n_workers=None):\n",
697699
" \"Create a section containing a `Doc` object for each child.\"\n",
698-
" return ft(nm, *[_doc(**o) for o in items])"
700+
" return ft(nm, *parallel(_doc, items, n_workers=n_workers, threadpool=True))"
699701
]
700702
},
701703
{
@@ -705,10 +707,10 @@
705707
"outputs": [],
706708
"source": [
707709
"#| export\n",
708-
"def mk_ctx(d, optional=True):\n",
710+
"def mk_ctx(d, optional=True, n_workers=None):\n",
709711
" \"Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section.\"\n",
710712
" skip = '' if optional else 'Optional'\n",
711-
" sections = [_section(k, v) for k,v in d.sections.items() if k!=skip]\n",
713+
" sections = [_section(k, v, n_workers=n_workers) for k,v in d.sections.items() if k!=skip]\n",
712714
" return Project(title=d.title, summary=d.summary)(d.info, *sections)"
713715
]
714716
},
@@ -753,10 +755,10 @@
753755
{
754756
"data": {
755757
"text/plain": [
756-
"{'docs': {'FastHTML quick start': 25803,\n",
758+
"{'docs': {'FastHTML quick start': 27376,\n",
757759
" 'HTMX reference': 26427,\n",
758760
" 'Starlette quick guide': 7936},\n",
759-
" 'examples': {'Todo list application': 18588},\n",
761+
" 'examples': {'Todo list application': 18558},\n",
760762
" 'optional': {'Starlette full documentation': 48331}}"
761763
]
762764
},
@@ -777,7 +779,7 @@
777779
{
778780
"data": {
779781
"text/plain": [
780-
"128271"
782+
"129814"
781783
]
782784
},
783785
"execution_count": null,
@@ -796,10 +798,10 @@
796798
"outputs": [],
797799
"source": [
798800
"#| export\n",
799-
"def create_ctx(txt, optional=False):\n",
801+
"def create_ctx(txt, optional=False, n_workers=None):\n",
800802
" \"A `Project` with a `Section` for each H2 part in `txt`, optionally skipping the 'optional' section.\"\n",
801803
" d = parse_llms_file(txt)\n",
802-
" ctx = mk_ctx(d, optional=optional)\n",
804+
" ctx = mk_ctx(d, optional=optional, n_workers=n_workers)\n",
803805
" return to_xml(ctx, do_escape=False)"
804806
]
805807
},
@@ -813,10 +815,11 @@
813815
"@call_parse\n",
814816
"def llms_txt2ctx(\n",
815817
" fname:str, # File name to read\n",
816-
" optional:bool_arg=False # Include 'optional' section?\n",
818+
" optional:bool_arg=False, # Include 'optional' section?\n",
819+
" n_workers:int=None # Number of threads to use for parallel downloading\n",
817820
"):\n",
818821
" \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
819-
" print(create_ctx(Path(fname).read_text(), optional=optional))"
822+
" print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))"
820823
]
821824
},
822825
{

nbs/llms.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
## Docs
66

7-
- [llms.txt proposal](https://llmstxt.org/index-commonmark.md): The proposal for llms.txt
7+
- [llms.txt proposal](https://llmstxt.org/index.md): The proposal for llms.txt
88
- [Python library docs](https://llmstxt.org/intro.html.md): Docs for `llms-txt` python lib
99
- [ed demo](https://llmstxt.org/ed-commonmark.md): Tongue-in-cheek example of how llms.txt could be used in the classic `ed` editor, used to show how editors could incorporate llms.txt in general.
1010

0 commit comments

Comments
 (0)