-
Notifications
You must be signed in to change notification settings - Fork 24
Adds better support for Typst #149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: default
Are you sure you want to change the base?
Changes from all commits
81e42c5
272ef3f
2924bc2
a03715c
7e453a5
190e038
506cd1f
7bdba1c
6a6a443
b394060
f7b637f
1a3f6f9
22e2b7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
local lexer = require('lexer') | ||
local token = lexer.token | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modern lexers don't need this anymore. |
||
local P, S, B = lpeg.P, lpeg.S, lpeg.B | ||
|
||
local lex = lexer.new(...) | ||
-- Keep things simple for now and only allow bold and italic in non-code mode | ||
local italic = -B('\\') * lex:tag(lexer.ITALIC, lexer.range('_', '_')) | ||
local bold = -B('\\') * lex:tag(lexer.BOLD, lexer.range('*', '*')) | ||
|
||
lex:add_rule('bold', bold) | ||
lex:add_rule('italic', italic) | ||
Comment on lines
+10
to
+11
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to keep these out of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've tried to keep build_rules generic, so it has the shared rules in both scripting/text mode, as for bold/italic, these must only be applied in text mode, as an example:
In the above example if we kept the bold/italic in the build_rules, it would tag the 0 in
|
||
|
||
local function header(level) | ||
local hspace = (lexer.space - '\n') | ||
local equals_signs = P('=')^level | ||
-- Stupid header rule for now | ||
local header = (lexer.starts_line(hspace^0 * equals_signs * hspace^1) * (lexer.any - S('\n'))^0) | ||
--[[ | ||
local header = (lexer.starts_line(hspace^0 * equals_signs * hspace^1) * (lexer.any - S('#@<\n'))^0) + | ||
(((B('[') * hspace^0 * equals_signs * hspace^1)) * | ||
(lexer.any - S('#@<'))^0) | ||
]] | ||
return lex:tag(string.format('%s.h%s', lexer.HEADING, level), header) | ||
end | ||
|
||
local function build_rules(pre) | ||
local hash_word = -B('\\') * pre * lexer.word | ||
local keyword_match = -B('\\') * pre * lex:word_match(lexer.KEYWORD) | ||
|
||
return { | ||
in_code = -B('\\') * lexer.range('`', false, false), | ||
dq_string = -B('\\') * lexer.range('"', true), | ||
string = -B('\\') * lexer.range('`', false, false) + -B('\\') * lexer.range('"', true), | ||
|
||
hash_word = hash_word, | ||
keyword_match = keyword_match, | ||
|
||
-- TODO: limit numeric values to only be tagged when used as args, assigned values | ||
-- numeric_value = (lexer.number^1 * ('.' * lexer.number^1)^-1 * lex:word_match('UNITS')^-1), | ||
iden = lex:tag(lexer.IDENTIFIER, hash_word), | ||
mod_func = lex:tag(lexer.KEYWORD, hash_word) * lexer.space^1 * | ||
lex:tag(lexer.FUNCTION, lexer.word) * lex:tag(lexer.OPERATOR, S('[(')), | ||
func = lex:tag(lexer.FUNCTION, hash_word) * lex:tag(lexer.OPERATOR, S('[(')), | ||
method = lex:tag(lexer.IDENTIFIER, hash_word) * | ||
lex:tag(lexer.OPERATOR, P('.')) * | ||
lex:tag(lexer.FUNCTION_METHOD, lexer.word) * lex:tag(lexer.OPERATOR, S('[(')), | ||
field = lex:tag(lexer.IDENTIFIER, hash_word) * | ||
lex:tag(lexer.OPERATOR, P('.')) * | ||
lex:tag('FIELD', lexer.word) * -S('[('), | ||
operator = lex:tag(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')), | ||
label = -B('\\') * lex:tag(lexer.LABEL, P('<') * lexer.word * P('>')), | ||
label_two = -B('\\') * lex:tag(lexer.LABEL, P('@') * lexer.word), | ||
link = P('http') * P('s')^-1 * P(':') * (lexer.word + S('.:/'))^1, | ||
|
||
math = -B('\\') * lexer.range('$', false, false), | ||
code = lexer.range('```', '```', false), | ||
list = lex:tag(lexer.LIST, lexer.starts_line(lexer.digit^1 * '.' + S('+-'), true) * S(' \t')), | ||
-- TODO: Do we really need to not tag a number if procceded by an alpha | ||
comment = lex:tag(lexer.COMMENT, lexer.range('/*', '*/') + lexer.to_eol('//')), | ||
|
||
keyword = lex:tag(lexer.KEYWORD, keyword_match), | ||
|
||
header = header(6) + header(5) + header(4) + header(3) + header(2) + header(1) | ||
} | ||
end | ||
|
||
local emb_lex = lexer.new('scripting') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would use a more descriptive name like 'typst_scripting'. |
||
|
||
--[[ | ||
#{ ... } | ||
OR | ||
#let x = { ... } | ||
]] | ||
-- This is very limited, since it would only work correctly if no nested structures (w/ brackets) are found inside | ||
-- otherwise (if they're found inside), the first closing bracket of that nested structure would close the whole embedded | ||
-- script, causing the rest of the script to not be treated as a part of embedded script | ||
local start = (lex:tag(lexer.KEYWORD, P('#') * lex:word_match(lexer.KEYWORD)) * | ||
#((lexer.any - S('{;\n'))^1 * S('{') * lexer.space^0)) + | ||
lex:tag(lexer.OPERATOR,P('#') * S('{')) | ||
local embed_start = lex:tag('emb_tag', start) | ||
local embed_end = lexer:tag('emb_tag', S('}')) | ||
Comment on lines
+80
to
+81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest using |
||
|
||
local function add_rules(lexer_obj, pre) | ||
local rules = build_rules(pre) | ||
lexer_obj:add_rule('header', rules.header) | ||
lexer_obj:add_rule('field', rules.field) | ||
lexer_obj:add_rule('function', rules.mod_func + rules.func) | ||
lexer_obj:add_rule('method', rules.method) | ||
lexer_obj:add_rule('label', rules.label + rules.label_two) | ||
lexer_obj:add_rule('code', lex:tag(lexer.CODE, rules.code)) | ||
lexer_obj:add_rule('string', lex:tag(lexer.STRING, rules.string)) | ||
lexer_obj:add_rule('link', lex:tag(lexer.LINK, rules.link)) | ||
lexer_obj:add_rule('math', lex:tag('environment.math', rules.math)) | ||
lexer_obj:add_rule('keyword', rules.keyword) | ||
lexer_obj:add_rule('identifier', rules.iden) | ||
--lexer_obj:add_rule('number', lex:tag(lexer.NUMBER, rules.numeric_value)) | ||
lexer_obj:add_rule('list', rules.list) | ||
lexer_obj:add_rule('comment', rules.comment) | ||
lexer_obj:add_rule('operator', rules.operator) | ||
end | ||
Comment on lines
+83
to
+100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like we can bring the contents of |
||
|
||
-- Keywords, functions... don't need '#' when in code | ||
-- the character `#` is not valid in code | ||
-- TODO: only enable styling and text related rules when in [] | ||
add_rules(emb_lex, '') | ||
|
||
lex:embed(emb_lex, embed_start, embed_end) | ||
|
||
add_rules(lex, '#') | ||
Comment on lines
+105
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I'm completely blown away. You've partially solved the "language embedded in itself" problem. I would really like to use something like
You cannot do this now because of an endless loop, but I would totally work on a fix to try and make this viable. |
||
|
||
|
||
lex:set_word_list(lexer.KEYWORD, { | ||
'if', 'else', 'for', 'while', 'let', 'set', 'import', 'include', 'return', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit of a nitpick for now, but in my brief testing,
the I just thought I'd make a note. EDIT: I saw you already noted something similar in an earlier comment, so this is just another instance to report. |
||
'true', 'false', 'none', 'auto', 'not', 'in', 'and', 'or', 'as', 'show' | ||
}) | ||
|
||
lex:set_word_list('UNITS', {'em', 'in', '%', 'mm', 'deg', 'rad', 'cm', 'pt', 'fr'}) | ||
|
||
lex:add_fold_point(lexer.OPERATOR, '{', '}') | ||
lex:add_fold_point(lexer.COMMENT, '/*', '*/') | ||
lex:add_fold_point(lexer.PREPROCESSOR, '```', '```') | ||
lexer.property['scintillua.comment'] = '//' | ||
|
||
return lex | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Modern lexers use
local lexer = lexer
now.