Skip to content

Commit 6ffd5e3

Browse files
authored
Merge #52 from justinmk/fixes
fix: taglink special-cases |(| |{| …
2 parents 30cd470 + 8c7b8ff commit 6ffd5e3

File tree

7 files changed

+3521
-3382
lines changed

7 files changed

+3521
-3382
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ Known issues
4646
`:help lcs-tab`.
4747
- `url` doesn't handle _surrounding_ parens. E.g. `(https://example.com/#yay)` yields `word`
4848
- `url` doesn't handle _nested_ parens. E.g. `(https://example.com/(foo)#yay)`
49-
- `column_heading` currently only recognizes tilde "~" preceded by space (i.e.
50-
"foo ~" not "foo~"). This covers 99% of :help files, but the grammar should
49+
- `column_heading` currently only recognizes tilde `~` preceded by space (i.e.
50+
`foo ~` not `foo~`). This covers 99% of :help files, but the grammar should
5151
probably support "foo~" also.
52+
- `column_heading` children should be plaintext. Currently its children are parsed as `$._atom`.
5253

5354
TODO
5455
----

corpus/arguments.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ EXTERNAL *netrw-externapp* {{{2
9292
(help_file
9393
(block
9494
(line
95-
(word)
96-
(ERROR
97-
(word))
9895
(argument
99-
(word))
96+
(word)
97+
(ERROR
98+
(word)
99+
(word)))
100100
(word)
101101
(codespan
102102
(word))

corpus/optionlink.txt

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ world 'hello' world
2929
(word))))
3030

3131
================================================================================
32-
NOT optionlink: ' followed by whitespace
32+
NOT optionlink: ' or 'x
3333
================================================================================
3434
'fillchars'
35-
stl ' ' or '^' statusline
36-
wbr ' ' windowbar
37-
tricky: ' 'yes'
35+
stl ' ' or '^' statusline
36+
wbr ' ' windowbar
37+
tricky: ' 'yes' 's foo
3838

3939

4040
--------------------------------------------------------------------------------
@@ -60,12 +60,14 @@ NOT optionlink: ' followed by whitespace
6060
(word)
6161
(word)
6262
(optionlink
63-
(word)))))
63+
(word))
64+
(word)
65+
(word))))
6466

6567
================================================================================
6668
NOT optionlink #7 #14
6769
================================================================================
68-
Let's see if that works.
70+
Let's see.
6971
no! ','sqlKeyword'
7072
single-char '-' 'g' '보'
7173
non-ascii: '\"' '%)' '-bang' '.*\\.log' '.gitignore' '@{${\"foo\"}}'
@@ -80,9 +82,6 @@ number: '04' 'ISO-10646-1' 'python3'
8082
(help_file
8183
(block
8284
(line
83-
(word)
84-
(word)
85-
(word)
8685
(word)
8786
(word))
8887
(line
@@ -107,11 +106,11 @@ number: '04' 'ISO-10646-1' 'python3'
107106
(MISSING "*"))
108107
(word)
109108
(word)
110-
(word)
111-
(ERROR
112-
(word))
113109
(argument
114-
(word))
110+
(word)
111+
(ERROR
112+
(word)
113+
(word)))
115114
(word)
116115
(word))
117116
(line
@@ -155,8 +154,7 @@ x `after_codespan`'s
155154
(word)
156155
(codespan
157156
(word))
158-
(word)
159-
(ERROR))))
157+
(word))))
160158

161159
================================================================================
162160
NOT optionlink 3

corpus/taglink.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ taglink alone
1515
================================================================================
1616
taglink in text
1717
================================================================================
18+
|(|, |)|, |`|, |{|, |}|.
19+
1820
Hello |world| hello
1921

2022
|-+| +[num] line
@@ -26,6 +28,23 @@ Hello |world| hello
2628
--------------------------------------------------------------------------------
2729

2830
(help_file
31+
(block
32+
(line
33+
(taglink
34+
(word))
35+
(word)
36+
(taglink
37+
(word))
38+
(word)
39+
(taglink
40+
(word))
41+
(word)
42+
(taglink
43+
(word))
44+
(word)
45+
(taglink
46+
(word))
47+
(word)))
2948
(block
3049
(line
3150
(word)

grammar.js

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,10 @@ module.exports = grammar({
6161

6262
// Explicit special cases: these are plaintext, not errors.
6363
_word_common: () => choice(
64-
// NOT optionlink: single "'".
64+
// NOT optionlink: '
6565
"'",
66+
// NOT optionlink: 'x
67+
seq("'", token.immediate(/[^'\n\t ]/)),
6668
// NOT optionlink: followed by non-lowercase char.
6769
seq("'", token.immediate(/[a-z]*[^'a-z\n\t ][a-z]*/), optional(token.immediate("'"))),
6870
// NOT optionlink: single char surrounded by "'".
@@ -199,7 +201,15 @@ module.exports = grammar({
199201
// Link to option: 'foo'. Lowercase non-digit ASCII, minimum 2 chars. #14
200202
optionlink: ($) => _word($, /[a-z][a-z]+/, "'", "'"),
201203
// Link to tag: |foo|
202-
taglink: ($) => _word($, /[^|\n\t ]+/, '|', '|'),
204+
taglink: ($) => _word($, choice(
205+
token.immediate(/[^|\n\t ]+/),
206+
// Special cases: |(| |{| …
207+
token.immediate('{'),
208+
token.immediate('}'),
209+
token.immediate('('),
210+
token.immediate(')'),
211+
token.immediate('`'),
212+
), '|', '|'),
203213
// Inline code (may contain whitespace!): `foo bar`
204214
codespan: ($) => _word($, /[^``\n]+/, '`', '`'),
205215
// Argument: {arg}
@@ -208,9 +218,10 @@ module.exports = grammar({
208218
});
209219

210220
// Word delimited by special chars.
211-
// The word_regex capture is aliased to "word" because they are semantically
212-
// the same: atoms of captured plain text.
213-
function _word($, word_regex, c1, c2, fname) {
221+
// `rule` can be a rule function or regex. It is aliased to "word" because they are
222+
// semantically the same: atoms of captured plain text.
223+
function _word($, rule, c1, c2, fname) {
224+
rule = rule.test !== undefined ? token.immediate(rule) : rule
214225
fname = fname ?? 'text';
215-
return seq(c1, field(fname, alias(token.immediate(word_regex), $.word)), token.immediate(c2));
226+
return seq(c1, field(fname, alias(rule, $.word)), token.immediate(c2));
216227
}

src/grammar.json

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,22 @@
171171
"type": "STRING",
172172
"value": "'"
173173
},
174+
{
175+
"type": "SEQ",
176+
"members": [
177+
{
178+
"type": "STRING",
179+
"value": "'"
180+
},
181+
{
182+
"type": "IMMEDIATE_TOKEN",
183+
"content": {
184+
"type": "PATTERN",
185+
"value": "[^'\\n\\t ]"
186+
}
187+
}
188+
]
189+
},
174190
{
175191
"type": "SEQ",
176192
"members": [
@@ -837,11 +853,51 @@
837853
"content": {
838854
"type": "ALIAS",
839855
"content": {
840-
"type": "IMMEDIATE_TOKEN",
841-
"content": {
842-
"type": "PATTERN",
843-
"value": "[^|\\n\\t ]+"
844-
}
856+
"type": "CHOICE",
857+
"members": [
858+
{
859+
"type": "IMMEDIATE_TOKEN",
860+
"content": {
861+
"type": "PATTERN",
862+
"value": "[^|\\n\\t ]+"
863+
}
864+
},
865+
{
866+
"type": "IMMEDIATE_TOKEN",
867+
"content": {
868+
"type": "STRING",
869+
"value": "{"
870+
}
871+
},
872+
{
873+
"type": "IMMEDIATE_TOKEN",
874+
"content": {
875+
"type": "STRING",
876+
"value": "}"
877+
}
878+
},
879+
{
880+
"type": "IMMEDIATE_TOKEN",
881+
"content": {
882+
"type": "STRING",
883+
"value": "("
884+
}
885+
},
886+
{
887+
"type": "IMMEDIATE_TOKEN",
888+
"content": {
889+
"type": "STRING",
890+
"value": ")"
891+
}
892+
},
893+
{
894+
"type": "IMMEDIATE_TOKEN",
895+
"content": {
896+
"type": "STRING",
897+
"value": "`"
898+
}
899+
}
900+
]
845901
},
846902
"named": true,
847903
"value": "word"

0 commit comments

Comments
 (0)