-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdumbParser.lua
6938 lines (5505 loc) · 235 KB
/
dumbParser.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
--[=[===========================================================
--=
--= Dumb Lua Parser - Lua parsing library
--= by Marcus 'ReFreezed' Thunström
--=
--= Tokenize Lua code or create ASTs (Abstract Syntax Trees)
--= and convert the data back to Lua.
--=
--= Version: 2.3 (2022-06-23)
--=
--= License: MIT (see the bottom of this file)
--= Website: http://refreezed.com/luaparser/
--= Documentation: http://refreezed.com/luaparser/docs/
--=
--= Supported Lua versions: 5.1, 5.2, 5.3, 5.4, LuaJIT
--=
--==============================================================
1 - Usage
2 - API
2.1 - Functions
2.2 - Constants
2.3 - Settings
3 - Tokens
4 - AST
5 - Other Objects
5.1 - Stats
5.2 - Locations
6 - Notes
1 - Usage
================================================================
local parser = require("dumbParser")
local tokens = parser.tokenizeFile("cool.lua")
local ast = parser.parse(tokens)
parser.simplify(ast)
parser.printTree(ast)
local lua = parser.toLua(ast, true)
print(lua)
2 - API
================================================================
2.1 - Functions
----------------------------------------------------------------
tokenize, tokenizeFile
newToken, updateToken, cloneToken, concatTokens
parse, parseExpression, parseFile
newNode, newNodeFast, valueToAst, cloneNode, cloneTree, getChild, setChild, addChild, removeChild
validateTree
traverseTree, traverseTreeReverse
updateReferences
simplify, optimize, minify
toLua
printTokens, printNode, printTree
formatMessage
findDeclaredNames, findGlobalReferences, findShadows
tokenize()
tokens = parser.tokenize( luaString [, pathForErrorMessages="?" ] )
tokens = parser.tokenize( luaString [, keepWhitespaceTokens=false, pathForErrorMessages="?" ] )
Convert a Lua string into an array of tokens. (See below for more info.)
Returns nil and a message on error.
tokenizeFile()
tokens = parser.tokenizeFile( path [, keepWhitespaceTokens=false ] )
Convert the contents of a file into an array of tokens. (See below for more info.) Uses io.open().
Returns nil and a message on error.
newToken()
token = parser.newToken( tokenType, tokenValue )
Create a new token. (See below or search for 'TokenCreation' for more info.)
updateToken()
parser.updateToken( token, tokenValue )
Update the value and representation of an existing token. (Search for 'TokenModification' for more info.)
cloneToken()
tokenClone = parser.cloneToken( token )
Clone an existing token.
concatTokens()
luaString = parser.concatTokens( tokens )
Concatenate tokens. Whitespace is added between tokens when necessary.
parse()
astNode = parser.parse( tokens )
astNode = parser.parse( luaString [, pathForErrorMessages="?" ] )
Convert tokens or Lua code into an AST representing a block of code. (See below for more info.)
Returns nil and a message on error.
parseExpression()
astNode = parser.parseExpression( tokens )
astNode = parser.parseExpression( luaString [, pathForErrorMessages="?" ] )
Convert tokens or Lua code into an AST representing a value expression. (See below for more info.)
Returns nil and a message on error.
parseFile()
astNode = parser.parseFile( path )
Convert a Lua file into an AST. (See below for more info.) Uses io.open().
Returns nil and a message on error.
newNode()
astNode = parser.newNode( nodeType, arguments... )
Create a new AST node. (Search for 'NodeCreation' for more info.)
newNodeFast()
astNode = parser.newNodeFast( nodeType, arguments... )
Same as newNode() but without any validation. (Search for 'NodeCreation' for more info.)
valueToAst()
astNode = parser.valueToAst( value [, sortTableKeys=false ] )
Convert a Lua value (number, string, boolean, nil or table) to an AST.
cloneNode()
astNode = parser.cloneNode( astNode )
Clone an existing AST node (but not any children).
cloneTree()
astNode = parser.cloneTree( astNode )
Clone an existing AST node and its children.
getChild()
childNode = parser.getChild( astNode, fieldName )
childNode = parser.getChild( astNode, fieldName, index ) -- If the node field is an array.
childNode = parser.getChild( astNode, fieldName, index, tableFieldKey ) -- If the node field is a table field array.
tableFieldKey = "key"|"value"
Get a child node. (Search for 'NodeFields' for field names.)
The result is the same as doing the following, but with more error checking:
childNode = astNode[fieldName]
childNode = astNode[fieldName][index]
childNode = astNode[fieldName][index][tableFieldKey]
setChild()
parser.setChild( astNode, fieldName, childNode )
parser.setChild( astNode, fieldName, index, childNode ) -- If the node field is an array.
parser.setChild( astNode, fieldName, index, tableFieldKey, childNode ) -- If the node field is a table field array.
tableFieldKey = "key"|"value"
Set a child node. (Search for 'NodeFields' for field names.)
The result is the same as doing the following, but with more error checking:
astNode[fieldName] = childNode
astNode[fieldName][index] = childNode
astNode[fieldName][index][tableFieldKey] = childNode
addChild()
parser.addChild( astNode, fieldName, [ index=atEnd, ] childNode )
parser.addChild( astNode, fieldName, [ index=atEnd, ] keyNode, valueNode ) -- If the node field is a table field array.
Add a child node to an array field. (Search for 'NodeFields' for field names.)
The result is the same as doing the following, but with more error checking:
table.insert(astNode[fieldName], index, childNode)
table.insert(astNode[fieldName], index, {key=keyNode, value=valueNode, generatedKey=false})
removeChild()
parser.removeChild( astNode, fieldName [, index=last ] )
Remove a child node from an array field. (Search for 'NodeFields' for field names.)
The result is the same as doing the following, but with more error checking:
table.remove(astNode[fieldName], index)
isExpression()
bool = parser.isExpression( astNode )
Returns true for expression nodes and false for statements.
Note that call nodes count as expressions for this function, i.e. return true.
isStatement()
bool = parser.isStatement( astNode )
Returns true for statements and false for expression nodes.
Note that call nodes count as statements for this function, i.e. return true.
validateTree()
isValid, errorMessages = parser.validateTree( astNode )
Check for errors in an AST (e.g. missing condition expressions for if statements).
errorMessages is a multi-line string if isValid is false.
traverseTree()
didStop = parser.traverseTree( astNode, [ leavesFirst=false, ] callback [, topNodeParent=nil, topNodeContainer=nil, topNodeKey=nil ] )
action = callback( astNode, parent, container, key )
action = "stop"|"ignorechildren"|nil -- Returning nil (or nothing) means continue traversal.
Call a function on all nodes in an AST, going from astNode out to the leaf nodes (or from leaf nodes and inwards if leavesFirst is set).
container[key] is the position of the current node in the tree and can be used to replace the node.
traverseTreeReverse()
didStop = parser.traverseTreeReverse( astNode, [ leavesFirst=false, ] callback [, topNodeParent=nil, topNodeContainer=nil, topNodeKey=nil ] )
action = callback( astNode, parent, container, key )
action = "stop"|"ignorechildren"|nil -- Returning nil (or nothing) means continue traversal.
Call a function on all nodes in reverse order in an AST, going from astNode out to the leaf nodes (or from leaf nodes and inwards if leavesFirst is set).
container[key] is the position of the current node in the tree and can be used to replace the node.
updateReferences()
parser.updateReferences( astNode [, updateTopNodePositionInfo=true ] )
Update references between nodes in the tree.
This function sets 'parent'+'container'+'key' for all nodes, 'declaration' for identifiers and vararg nodes, and 'label' for goto nodes.
If 'updateTopNodePositionInfo' is false then 'parent', 'container' and 'key' will remain as-is for 'astNode' specifically.
simplify()
stats = parser.simplify( astNode )
Simplify/fold expressions and statements involving constants ('1+2' becomes '3', 'false and func()' becomes 'false' etc.).
See the INT_SIZE constant for notes.
See below for more info about stats.
optimize()
stats = parser.optimize( astNode )
Attempt to remove nodes that aren't useful, like unused variables, or variables that are essentially constants.
Calls simplify() internally.
This function can be quite slow!
See below for more info about stats.
Note: References may be out-of-date after calling this.
minify()
stats = parser.minify( astNode [, optimize=false ] )
Replace local variable names with short names.
This function can be used to obfuscate the code to some extent.
If 'optimize' is set then optimize() is also called automatically.
See below for more info about stats.
Note: References may be out-of-date after calling this.
toLua()
luaString = parser.toLua( astNode [, prettyOuput=false, nodeCallback ] )
nodeCallback = function( node, outputBuffer )
Convert an AST to Lua, optionally call a function on each node before they are turned into Lua.
Any node in the tree with a .pretty attribute will override the 'prettyOuput' flag for itself and its children.
Nodes can also have a .prefix and/or .suffix attribute with Lua code to output before/after the node (e.g. declaration.names[1].suffix="--[[foo]]").
outputBuffer is an array of Lua code that has been output so far.
Returns nil and a message on error.
printTokens()
parser.printTokens( tokens )
Print tokens to stdout.
printNode()
parser.printNode( astNode )
Print information about an AST node to stdout.
printTree()
parser.printTree( astNode )
Print the structure of a whole AST to stdout.
formatMessage()
message = parser.formatMessage( [ prefix="Info", ] token, formatString, ... )
message = parser.formatMessage( [ prefix="Info", ] astNode, formatString, ... )
message = parser.formatMessage( [ prefix="Info", ] location, formatString, ... )
Format a message to contain a code preview window with an arrow pointing at the target token, node or location.
This is used internally for formatting error messages.
-- Example:
if identifier.name ~= "good" then
print(parser.formatMessage("Error", identifier, "This identifier is not good!"))
print(parser.formatMessage(currentStatement, "Current statement."))
end
findDeclaredNames()
identifiers = parser.findDeclaredNames( astNode )
Find all declared names in the tree (i.e. identifiers from AstDeclaration, AstFunction and AstFor nodes).
findGlobalReferences()
identifiers = parser.findGlobalReferences( astNode )
Find all identifiers not referring to local variables in the tree.
Note: updateReferences() must be called at some point before you call this - otherwise all variables will be seen as globals!
findShadows()
shadowSequences = parser.findShadows( astNode )
shadowSequences = { shadowSequence1, ... }
shadowSequence = { shadowingIdentifier, shadowedIdentifier1, ... }
Find local variable shadowing in the tree. Each shadowSequence is an array of declared identifiers where each identifier shadows the next one.
Note: updateReferences() must be called at some point before you call this - otherwise all variables will be seen as globals!
Note: Shadowing of globals cannot be detected by the function as that would require knowledge of all potential globals in your program. (See findGlobalReferences())
2.2 - Constants
----------------------------------------------------------------
INT_SIZE, MAX_INT, MIN_INT
VERSION
INT_SIZE
parser.INT_SIZE = integer
How many bits integers have. In Lua 5.3 and later this is usually 64, and in earlier versions it's 32.
The int size may affect how bitwise operations involving only constants get simplified (see simplify()),
e.g. the expression '-1>>1' becomes 2147483647 in Lua 5.2 but 9223372036854775807 in Lua 5.3.
MAX_INT
parser.MAX_INT = integer
The highest representable positive signed integer value, according to INT_SIZE.
This is the same value as math.maxinteger in Lua 5.3 and later.
This only affects simplification of some bitwise operations.
MIN_INT
parser.MIN_INT = integer
The highest representable negative signed integer value, according to INT_SIZE.
This is the same value as math.mininteger in Lua 5.3 and later.
This only affects simplification of some bitwise operations.
VERSION
parser.VERSION
The parser's version number (e.g. "1.0.2").
2.3 - Settings
----------------------------------------------------------------
printIds, printLocations
indentation
constantNameReplacementStringMaxLength
printIds
parser.printIds = bool
If AST node IDs should be printed. (All nodes gets assigned a unique ID when created.)
Default: false.
printLocations
parser.printLocations = bool
If the file location (filename and line number) should be printed for each token or AST node.
Default: false.
indentation
parser.indentation = string
The indentation used when printing ASTs (with printTree()).
Default: 4 spaces.
constantNameReplacementStringMaxLength
parser.constantNameReplacementStringMaxLength = length
Normally optimize() replaces variable names that are effectively constants with their value.
The exception is if the value is a string that's longer than what this setting specifies.
Default: 200.
-- Example:
local ast = parser.parse[==[
local short = "a"
local long = "xy"
func(short, long)
]==]
parser.constantNameReplacementStringMaxLength = 1
parser.optimize(ast)
print(parser.toLua(ast)) -- local long="xy";func("a",long);
3 - Tokens
================================================================
Tokens are represented by tables.
Token fields:
type -- Token type. (See below.)
value -- Token value. All token types have a string value, except "number" tokens which have a number value.
representation -- The token's code representation. (Strings have surrounding quotes, comments start with "--" etc.)
sourceString -- The original source string, or "" if there is none.
sourcePath -- Path to the source file, or "?" if there is none.
lineStart -- Start line number in sourceString, or 0 by default.
lineEnd -- End line number in sourceString, or 0 by default.
positionStart -- Start byte position in sourceString, or 0 by default.
positionEnd -- End byte position in sourceString, or 0 by default.
Token types:
"comment" -- A comment.
"identifier" -- Word that is not a keyword.
"keyword" -- Lua keyword.
"number" -- Number literal.
"punctuation" -- Any punctuation, e.g. ".." or "(".
"string" -- String value.
"whitespace" -- Sequence of whitespace characters.
4 - AST
================================================================
AST nodes are represented by tables.
Node types:
"assignment" -- Assignment of one or more values to one or more variables.
"binary" -- Binary expression (operation with two operands, e.g. "+" or "and").
"block" -- List of statements. Blocks inside blocks are 'do...end' statements.
"break" -- Loop break statement.
"call" -- Function call.
"declaration" -- Declaration of one or more local variables, possibly with initial values.
"for" -- A 'for' loop.
"function" -- Anonymous function header and body.
"goto" -- A jump to a label.
"identifier" -- An identifier.
"if" -- If statement with a condition, a body if the condition is true, and possibly another body if the condition is false.
"label" -- Label for goto commands.
"literal" -- Number, string, boolean or nil literal.
"lookup" -- Field lookup on an object.
"repeat" -- A 'repeat' loop.
"return" -- Function/chunk return statement, possibly with values.
"table" -- Table constructor.
"unary" -- Unary expression (operation with one operand, e.g. "-" or "not").
"vararg" -- Vararg expression ("...").
"while" -- A 'while' loop.
Node fields: (Search for 'NodeFields'.)
5 - Other Objects
================================================================
5.1 - Stats
----------------------------------------------------------------
Some functions return a stats table which contains these fields:
nodeReplacements -- Array of locations. Locations of nodes that were replaced. (See below for location info.)
nodeRemovals -- Array of locations. Locations of nodes or tree branches that were removed. (See below for location info.)
nodeRemoveCount -- Number. How many nodes were removed, including subnodes of nodeRemovals.
renameCount -- Number. How many identifiers were renamed.
generatedNameCount -- Number. How many unique names were generated.
5.2 - Locations
----------------------------------------------------------------
Locations are tables with these fields:
sourceString -- The original source string, or "" if there is none.
sourcePath -- Path to the source file, or "?" if there is none.
line -- Line number in sourceString, or 0 by default.
position -- Byte position in sourceString, or 0 by default.
node -- The node the location points to, or nil if there is none.
replacement -- The node that replaced 'node', or nil if there is none. (This is set for stats.nodeReplacements.)
6 - Notes
================================================================
Special number notation rules.
The expression '-n' is parsed as a single number literal if 'n' is a
numeral (i.e. the result is a negative number).
The expression 'n/0' is parsed as a single number literal if 'n' is a
numeral. If 'n' is positive then the result is math.huge, if 'n' is
negative then the result is -math.huge, or if 'n' is 0 then the result is
NaN.
-============================================================]=]
local PARSER_VERSION = "2.3.0"
local NORMALIZE_MINUS_ZERO, HANDLE_ENV -- Should HANDLE_ENV be a setting?
do
local n = 0
NORMALIZE_MINUS_ZERO = tostring(-n) == "0" -- Lua 5.3+ normalizes -0 to 0.
end
do
local pcall = pcall
local _ENV = nil
HANDLE_ENV = not pcall(function() return _G end) -- Looking up the global _G will raise an error if _ENV is supported (Lua 5.2+).
end
local assert = assert
local error = error
local ipairs = ipairs
local loadstring = loadstring or load
local pairs = pairs
local print = print
local select = select
local tonumber = tonumber
local tostring = tostring
local type = type
local ioOpen = io.open
local ioWrite = io.write
local jit = jit
local mathFloor = math.floor
local mathMax = math.max
local mathMin = math.min
local mathType = math.type -- May be nil.
local F = string.format
local stringByte = string.byte
local stringChar = string.char
local stringFind = string.find
local stringGmatch = string.gmatch
local stringGsub = string.gsub
local stringMatch = string.match
local stringRep = string.rep
local stringSub = string.sub
local tableConcat = table.concat
local tableInsert = table.insert
local tableRemove = table.remove
local tableSort = table.sort
local tableUnpack = table.unpack or unpack
local maybeWrapInt = (
(jit and function(n)
-- 'n' might be cdata (i.e. a 64-bit integer) here. We have to limit the range
-- with mod once before we convert it to a Lua number to not lose precision,
-- but the result might be negative (and still out of range somehow!) so we
-- have to use mod again. Gah!
return tonumber(n % 0x100000000) % 0x100000000 -- 0x100000000 == 2^32
end)
or (_VERSION == "Lua 5.2" and require"bit32".band)
or function(n) return n end
)
local parser
local function newSet(values)
local set = {}
for _, v in ipairs(values) do
set[v] = true
end
return set
end
local function newCharSet(chars)
return newSet{ stringByte(chars, 1, #chars) }
end
local KEYWORDS = newSet{
"and", "break", "do", "else", "elseif", "end", "false", "for", "function", "goto", "if",
"in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while",
}
local PUNCTUATION = newSet{
"+", "-", "*", "/", "%", "^", "#",
"&", "~", "|", "<<", ">>", "//",
"==", "~=", "<=", ">=", "<", ">", "=",
"(", ")", "{", "}", "[", "]", "::",
";", ":", ",", ".", "..", "...",
}
local OPERATORS_UNARY = newSet{
"-", "not", "#", "~",
}
local OPERATORS_BINARY = newSet{
"+", "-", "*", "/", "//", "^", "%",
"&", "~", "|", ">>", "<<", "..",
"<", "<=", ">", ">=", "==", "~=",
"and", "or",
}
local OPERATOR_PRECEDENCE = {
["or"] = 1,
["and"] = 2,
["<"] = 3, [">"] = 3, ["<="] = 3, [">="] = 3, ["~="] = 3, ["=="] = 3,
["|"] = 4,
["~"] = 5,
["&"] = 6,
["<<"] = 7, [">>"] = 7,
[".."] = 8,
["+"] = 9, ["-"] = 9,
["*"] = 10, ["/"] = 10, ["//"] = 10, ["%"] = 10,
unary = 11, -- "-", "not", "#", "~"
["^"] = 12,
}
local EXPRESSION_NODES = newSet{ "binary", "call", "function", "identifier", "literal", "lookup", "table", "unary", "vararg" }
local STATEMENT_NODES = newSet{ "assignment", "block", "break", "call", "declaration", "for", "goto", "if", "label", "repeat", "return", "while" }
local TOKEN_BYTES = {
NAME_START = newCharSet"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_",
DASH = newCharSet"-",
NUM = newCharSet"0123456789",
QUOTE = newCharSet"\"'",
SQUARE = newCharSet"[",
DOT = newCharSet".",
PUNCT_TWO_CHARS = newCharSet".=~<>:/<>",
PUNCT_ONE_CHAR = newCharSet"+-*/%^#<>=(){}[];:,.&~|",
}
local NUMERAL_PATTERNS = {
HEX_FRAC_EXP = stringGsub("^( 0[Xx] (%x*) %.(%x+) [Pp]([-+]?%x+) )", " +", ""),
HEX_FRAC = stringGsub("^( 0[Xx] (%x*) %.(%x+) )", " +", ""),
HEX_EXP = stringGsub("^( 0[Xx] (%x+) %.? [Pp]([-+]?%x+) )", " +", ""),
HEX = stringGsub("^( 0[Xx] %x+ %.? )", " +", ""),
BIN = stringGsub("^( 0[Bb] [01]+ )", " +", ""),
DEC_FRAC_EXP = stringGsub("^( %d* %.%d+ [Ee][-+]?%d+ )", " +", ""),
DEC_FRAC = stringGsub("^( %d* %.%d+ )", " +", ""),
DEC_EXP = stringGsub("^( %d+ %.? [Ee][-+]?%d+ )", " +", ""),
DEC = stringGsub("^( %d+ %.? )", " +", ""),
}
local INT_SIZE, MAX_INT, MIN_INT
do
local hex = F("%x", maybeWrapInt(-1))
INT_SIZE = #hex * 4 -- This should generally be 64 for Lua 5.3+ and 32 for earlier.
MAX_INT = math.maxinteger or tonumber(stringGsub(hex, "f", "7", 1), 16)
MIN_INT = math.mininteger or -MAX_INT-1
end
local nextSerialNumber = 1
-- :NodeFields
local function populateCommonNodeFields(token, node)
-- All nodes have these fields.
node.id = nextSerialNumber
nextSerialNumber = nextSerialNumber + 1
node.sourceString = token and token.sourceString or ""
node.sourcePath = token and token.sourcePath or "?"
node.token = token
node.line = token and token.lineStart or 0
node.position = token and token.positionStart or 0
-- These fields are set by updateReferences():
-- node.parent = nil -- Refers to the node's parent in the tree.
-- node.container = nil -- Refers to the specific table that the node is in, which could be the parent itself or a field in the parent.
-- node.key = nil -- Refers to the specific field in the container that the node is in (which is either a string or an integer).
-- toLua() uses these fields if present:
-- node.pretty = bool
-- node.prefix = luaString
-- node.suffix = luaString
return node
end
-- AST expressions.
local function AstIdentifier (token,name)return populateCommonNodeFields(token,{
type = "identifier",
name = name, -- String.
attribute = "", -- "" | "close" | "const" -- Only used in declarations.
declaration = nil, -- AstIdentifier (whose parent is an AstDeclaration, AstFunction or AstFor). Updated by updateReferences(). This is nil for globals.
})end
local function AstVararg (token)return populateCommonNodeFields(token,{
type = "vararg",
declaration = nil, -- AstVararg (whose parent is an AstFunction). Updated by updateReferences(). This is nil in the main chunk (or in a non-vararg function, which is probably an error).
adjustToOne = false, -- True if parentheses surround the vararg.
})end
local function AstLiteral (token,v)return populateCommonNodeFields(token,{
type = "literal",
value = v, -- Number, string, boolean or nil.
})end
local function AstTable (token)return populateCommonNodeFields(token,{
type = "table",
fields = {}, -- Array of {key=expression, value=expression, generatedKey=bool}. generatedKey is true for implicit keys (i.e. {x,y}) and false for explicit keys (i.e. {a=x,b=y}). Note that the state of generatedKey affects the output of toLua()! 'key' may be nil if generatedKey is true.
})end
local function AstLookup (token)return populateCommonNodeFields(token,{
type = "lookup",
object = nil, -- Expression.
member = nil, -- Expression.
})end
local function AstUnary (token,op)return populateCommonNodeFields(token,{
type = "unary",
operator = op, -- "-" | "not" | "#" | "~"
expression = nil, -- Expression.
})end
local function AstBinary (token,op)return populateCommonNodeFields(token,{
type = "binary",
operator = op, -- "+" | "-" | "*" | "/" | "//" | "^" | "%" | "&" | "~" | "|" | ">>" | "<<" | ".." | "<" | "<=" | ">" | ">=" | "==" | "~=" | "and" | "or"
left = nil, -- Expression.
right = nil, -- Expression.
})end
local function AstCall (token)return populateCommonNodeFields(token,{ -- Calls can be both expressions and statements.
type = "call",
callee = nil, -- Expression.
arguments = {}, -- Array of expressions.
method = false, -- True if the call is a method call. Method calls must have a callee that is a lookup with a member expression that is a string literal that can pass as an identifier.
adjustToOne = false, -- True if parentheses surround the call.
})end
local function AstFunction (token)return populateCommonNodeFields(token,{
type = "function",
parameters = {}, -- Array of AstIdentifier and maybe an AstVararg at the end.
body = nil, -- AstBlock.
})end
-- AST statements.
local function AstBreak (token)return populateCommonNodeFields(token,{
type = "break",
})end
local function AstReturn (token)return populateCommonNodeFields(token,{
type = "return",
values = {}, -- Array of expressions.
})end
local function AstLabel (token,name)return populateCommonNodeFields(token,{
type = "label",
name = name, -- String. The value must be able to pass as an identifier.
})end
local function AstGoto (token,name)return populateCommonNodeFields(token,{
type = "goto",
name = name, -- String. The value must be able to pass as an identifier.
label = nil, -- AstLabel. Updated by updateReferences().
})end
local function AstBlock (token)return populateCommonNodeFields(token,{
type = "block",
statements = {}, -- Array of statements.
})end
local function AstDeclaration (token)return populateCommonNodeFields(token,{
type = "declaration",
names = {}, -- Array of AstIdentifier.
values = {}, -- Array of expressions.
})end
local function AstAssignment (token)return populateCommonNodeFields(token,{
type = "assignment",
targets = {}, -- Mixed array of AstIdentifier and AstLookup.
values = {}, -- Array of expressions.
})end
local function AstIf (token)return populateCommonNodeFields(token,{
type = "if",
condition = nil, -- Expression.
bodyTrue = nil, -- AstBlock.
bodyFalse = nil, -- AstBlock or nil.
})end
local function AstWhile (token)return populateCommonNodeFields(token,{
type = "while",
condition = nil, -- Expression.
body = nil, -- AstBlock.
})end
local function AstRepeat (token)return populateCommonNodeFields(token,{
type = "repeat",
body = nil, -- AstBlock.
condition = nil, -- Expression.
})end
local function AstFor (token,kind)return populateCommonNodeFields(token,{
type = "for",
kind = kind, -- "numeric" | "generic"
names = {}, -- Array of AstIdentifier.
values = {}, -- Array of expressions.
body = nil, -- AstBlock.
})end
local CHILD_FIELDS = {
["identifier"] = {},
["vararg"] = {},
["literal"] = {},
["table"] = {fields="tablefields"},
["lookup"] = {object="node", member="node"},
["unary"] = {expressions="node"},
["binary"] = {left="node", right="node"},
["call"] = {callee="node", arguments="nodearray"},
["function"] = {parameters="nodearray", body="node"},
["break"] = {},
["return"] = {values="nodearray"},
["label"] = {},
["goto"] = {},
["block"] = {statements="nodearray"},
["declaration"] = {names="nodearray", values="nodearray"},
["assignment"] = {targets="nodearray", values="nodearray"},
["if"] = {condition="node", bodyTrue="node", bodyFalse="node"},
["while"] = {condition="node", body="node"},
["repeat"] = {body="node", condition="node"},
["for"] = {names="nodearray", values="nodearray", body="node"},
}
local function Stats()
return {
-- simplify() and optimize():
nodeReplacements = {--[[ location1, ... ]]},
nodeRemovals = {--[[ location1, ... ]]},
nodeRemoveCount = 0,
-- minify():
renameCount = 0,
generatedNameCount = 0,
}
end
-- location = Location( sourceLocation [, extraKey, extraValue ] )
-- location = Location( sourceNode [, extraKey, extraValue ] )
local function Location(sourceLocOrNode, extraK, extraV)
local loc = {
sourceString = sourceLocOrNode.sourceString,
sourcePath = sourceLocOrNode.sourcePath,
line = sourceLocOrNode.line,
position = sourceLocOrNode.position,
node = sourceLocOrNode.type and sourceLocOrNode or nil,
}
if extraK then
loc[extraK] = extraV
end
return loc
end
-- count = countString( haystack, needle [, plain=false ] )
local function countString(haystack, needle, plain)
local count = 0
local pos = 1
while true do
local _, i2 = stringFind(haystack, needle, pos, plain)
if not i2 then return count end
count = count + 1
pos = i2 + 1
end
end
-- count = countSubString( haystack, startPosition, endPosition, needle [, plain=false ] )
local function countSubString(haystack, pos, posEnd, needle, plain)
local count = 0
while true do
local _, i2 = stringFind(haystack, needle, pos, plain)
if not i2 or i2 > posEnd then return count end
count = count + 1
pos = i2 + 1
end
end
-- errorf( [ level=1, ] format, ... )
local function errorf(level, s, ...)
if type(level) == "number" then
error(F(s, ...), (level == 0 and 0 or (1+level)))
else
error(F(level, s, ...), 2)
end
end
-- assertArg1( functionName, argumentNumber, value, expectedType [, level=2 ] )
-- assertArg2( functionName, argumentNumber, value, expectedType1, expectedType2 [, level=2 ] )
local function assertArg1(funcName, argNum, v, expectedType, level)
if type(v) == expectedType then return end
errorf(1+(level or 2), "Bad argument #%d to '%s'. (Expected %s, got %s)", argNum, funcName, expectedType, type(v))
end
local function assertArg2(funcName, argNum, v, expectedType1, expectedType2, level)
if type(v) == expectedType1 or type(v) == expectedType2 then return end
errorf(1+(level or 2), "Bad argument #%d to '%s'. (Expected %s or %s, got %s)", argNum, funcName, expectedType1, expectedType2, type(v))
end
local ensurePrintable
do
local CONTROL_TO_READABLE = {
["\0"] = "{NUL}",
["\n"] = "{NL}",
["\r"] = "{CR}",
}
--[[local]] function ensurePrintable(s)
return (stringGsub(s, "[%z\1-\31\127-\255]", function(c)
return CONTROL_TO_READABLE[c] or (stringByte(c) <= 31 or stringByte(c) >= 127) and F("{%d}", stringByte(c)) or nil
end))
end
end
local function removeUnordered(t, i)
local len = #t
if i > len or i < 1 then return end
-- Note: This does the correct thing if i==len too.
t[i] = t[len]
t[len] = nil
end
local function removeItemUnordered(t, v)
for i = 1, #t do
if t[i] == v then
removeUnordered(t, i)
return
end
end
end
local function getLineNumber(s, pos)
return 1 + countSubString(s, 1, pos-1, "\n", true)
end
local formatMessageInFile
do
local function findStartOfLine(s, pos, canBeEmpty)
while pos > 1 do
if stringByte(s, pos-1) == 10--[[\n]] and (canBeEmpty or stringByte(s, pos) ~= 10--[[\n]]) then break end
pos = pos - 1
end
return mathMax(pos, 1)
end
local function findEndOfLine(s, pos)
while pos < #s do
if stringByte(s, pos+1) == 10--[[\n]] then break end
pos = pos + 1
end
return mathMin(pos, #s)
end
local function getSubTextLength(s, pos, posEnd)
local len = 0
while pos <= posEnd do
if stringByte(s, pos) == 9 then -- '\t'
len = len + 4
pos = pos + 1
else
local _, i2 = stringFind(s, "^[%z\1-\127\194-\253][\128-\191]*", pos)
if i2 and i2 <= posEnd then
len = len + 1
pos = i2 + 1
else
len = len + 1
pos = pos + 1
end
end
end
return len
end
--[[local]] function formatMessageInFile(prefix, contents, path, pos, agent, s, ...)
if agent ~= "" then
agent = "["..agent.."] "
end
s = F(s, ...)
if contents == "" then
return F("%s @ %s: %s%s", prefix, path, agent, s)
end
pos = mathMin(mathMax(pos, 1), #contents+1)
local ln = getLineNumber(contents, pos)
local lineStart = findStartOfLine(contents, pos, true)
local lineEnd = findEndOfLine (contents, pos-1)
local linePre1Start = findStartOfLine(contents, lineStart-1, false)
local linePre1End = findEndOfLine (contents, linePre1Start-1)
local linePre2Start = findStartOfLine(contents, linePre1Start-1, false)
local linePre2End = findEndOfLine (contents, linePre2Start-1)
-- print(F("pos %d | lines %d..%d, %d..%d, %d..%d", pos, linePre2Start,linePre2End+1, linePre1Start,linePre1End+1, lineStart,lineEnd+1)) -- DEBUG
return F("%s @ %s:%d: %s%s\n>\n%s%s%s>-%s^",
prefix, path, ln, agent, s,
(linePre2Start < linePre1Start and linePre2Start <= linePre2End) and F("> %s\n", (stringGsub(stringSub(contents, linePre2Start, linePre2End), "\t", " "))) or "",
(linePre1Start < lineStart and linePre1Start <= linePre1End) and F("> %s\n", (stringGsub(stringSub(contents, linePre1Start, linePre1End), "\t", " "))) or "",
( lineStart <= lineEnd ) and F("> %s\n", (stringGsub(stringSub(contents, lineStart, lineEnd ), "\t", " "))) or ">\n",
stringRep("-", getSubTextLength(contents, lineStart, pos-1))
)
end
end
local function formatMessageAtToken(prefix, token, agent, s, ...)
return (formatMessageInFile(prefix, (token and token.sourceString or ""), (token and token.sourcePath or "?"), (token and token.positionStart or 0), agent, s, ...))
end
local function formatMessageAfterToken(prefix, token, agent, s, ...)
return (formatMessageInFile(prefix, (token and token.sourceString or ""), (token and token.sourcePath or "?"), (token and token.positionEnd+1 or 0), agent, s, ...))
end
local function formatMessageAtNode(prefix, node, agent, s, ...)
return (formatMessageInFile(prefix, node.sourceString, node.sourcePath, node.position, agent, s, ...))
end
local function formatMessageHelper(argNumOffset, prefix, nodeOrLocOrToken, s, ...)
assertArg1("formatMessage", 1+argNumOffset, prefix, "string", 3)
assertArg1("formatMessage", 2+argNumOffset, nodeOrLocOrToken, "table", 3)
assertArg1("formatMessage", 3+argNumOffset, s, "string", 3)
local formatter = nodeOrLocOrToken.representation and formatMessageAtToken or formatMessageAtNode
return (formatter(prefix, nodeOrLocOrToken, "", s, ...))
end
-- message = formatMessage( [ prefix="Info", ] token, s, ... )
-- message = formatMessage( [ prefix="Info", ] astNode, s, ... )
-- message = formatMessage( [ prefix="Info", ] location, s, ... )
local function formatMessage(prefix, ...)
if type(prefix) == "string" then
return (formatMessageHelper(0, prefix, ...))
else
return (formatMessageHelper(-1, "Info", prefix, ...))
end
end
local function formatErrorInFile (...) return formatMessageInFile ("Error", ...) end
local function formatErrorAtToken (...) return formatMessageAtToken ("Error", ...) end
local function formatErrorAfterToken(...) return formatMessageAfterToken("Error", ...) end
local function formatErrorAtNode (...) return formatMessageAtNode ("Error", ...) end