Skip to content

Commit c931480

Browse files
authored
Performance improvements with new parser internals. (#32)
* Performance improvements with new parser internals. * Bump PS version
1 parent ab6ea97 commit c931480

File tree

10 files changed

+404
-337
lines changed

10 files changed

+404
-337
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ jobs:
1414

1515
- uses: purescript-contrib/setup-purescript@main
1616
with:
17-
purescript: "0.14.0"
18-
spago: "0.19.0"
17+
purescript: "0.14.5"
18+
spago: "0.20.3"
1919
psa: "0.8.2"
2020
purs-tidy: "latest"
2121

@@ -30,11 +30,14 @@ jobs:
3030
- name: Build source
3131
run: spago build --purs-args '--censor-lib --strict'
3232

33+
- name: Run tests
34+
run: spago test
35+
3336
- name: Check formatting
3437
run: npm run check
3538

3639
- name: Parse package sets
3740
run: npm run parse-package-set
3841

3942
- name: Run file benchmark
40-
run: npm run bench-file test/Main.purs
43+
run: npm run bench-file src/PureScript/CST/Parser.purs

bench/ParseFile.purs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@ main = launchAff_ do
4444
case parseModule contents of
4545
ParseSucceeded _ -> do
4646
Console.log "Parse succeeded."
47-
ParseSucceededWithErrors _ errs ->
47+
ParseSucceededWithErrors _ errs -> do
48+
Console.log "Parse succeeded with errors."
4849
for_ errs $ Console.error <<< printPositionedError
49-
ParseFailed err ->
50+
ParseFailed err -> do
51+
Console.log "Parse failed."
5052
Console.error $ printPositionedError err
5153
Nothing ->
5254
Console.log "File path required"

bench/bench.dhall

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
let conf = ../spago.dhall
22

33
in conf // {
4-
sources = conf.sources # [ "bench/**/*.purs" ],
4+
sources = [ "src/**/*.purs", "bench/**/*.purs" ],
55
dependencies =
66
[ "aff"
77
, "arrays"
@@ -12,6 +12,7 @@ in conf // {
1212
, "either"
1313
, "foldable-traversable"
1414
, "free"
15+
, "functions"
1516
, "functors"
1617
, "identity"
1718
, "integers"

parse-package-set/parse-package-set.dhall

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
let conf = ../spago.dhall
22

33
in conf // {
4-
sources = conf.sources # [ "parse-package-set/**/*.purs" ],
4+
sources = [ "src/**/*.purs", "parse-package-set/**/*.purs" ],
55
dependencies =
66
[ "aff"
77
, "arrays"
@@ -16,6 +16,7 @@ in conf // {
1616
, "filterable"
1717
, "foldable-traversable"
1818
, "free"
19+
, "functions"
1920
, "functors"
2021
, "identity"
2122
, "integers"

spago.dhall

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
{ name = "language-cst-parser"
22
, dependencies =
33
[ "arrays"
4+
, "console"
45
, "const"
56
, "control"
67
, "effect"
78
, "either"
89
, "foldable-traversable"
910
, "free"
11+
, "functions"
1012
, "functors"
1113
, "identity"
1214
, "integers"
1315
, "lazy"
1416
, "lists"
1517
, "maybe"
1618
, "newtype"
19+
, "node-process"
1720
, "numbers"
1821
, "ordered-collections"
1922
, "partial"
@@ -28,5 +31,5 @@
2831
, "unsafe-coerce"
2932
]
3033
, packages = ./packages.dhall
31-
, sources = [ "src/**/*.purs" ]
34+
, sources = [ "src/**/*.purs", "test/**/Main.purs" ]
3235
}

src/PureScript/CST.purs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ parsePartialModule src =
9393
pure $ Module { header, body }
9494
}
9595
Right $ Tuple res state.errors
96-
ParseFail error position _ _ ->
97-
Left { error, position }
96+
ParseFail error _ ->
97+
Left error
9898

9999
printModule :: forall e. TokensOf e => Module e -> String
100100
printModule mod =

src/PureScript/CST/Lexer.purs

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ module PureScript.CST.Lexer
66

77
import Prelude
88

9-
import Control.Alt (class Alt, (<|>))
10-
import Data.Array as Array
9+
import Control.Alt (class Alt, alt)
10+
import Control.Monad.ST as ST
11+
import Control.Monad.ST.Ref as STRef
1112
import Data.Array.NonEmpty as NonEmptyArray
13+
import Data.Array.ST as STArray
1214
import Data.Char as Char
1315
import Data.Either (Either(..))
1416
import Data.Foldable (fold, foldl, foldMap)
@@ -32,6 +34,8 @@ import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, insertLayout)
3234
import PureScript.CST.TokenStream (TokenStep(..), TokenStream(..), consTokens, step, unwindLayout)
3335
import PureScript.CST.Types (Comment(..), IntValue(..), LineFeed(..), ModuleName(..), SourcePos, SourceStyle(..), Token(..))
3436

37+
infixr 3 alt as <|>
38+
3539
data LexResult e a
3640
= LexFail e String
3741
| LexSucc a String
@@ -147,18 +151,29 @@ satisfy mkErr p = Lex \str ->
147151
LexFail (\_ -> mkErr (mkUnexpected str)) str
148152

149153
many :: forall e a. Lex e a -> Lex e (Array a)
150-
many (Lex k) = Lex \str -> do
151-
let
152-
go acc str' =
153-
case k str' of
154-
LexFail err str''
155-
| SCU.length str' == SCU.length str'' ->
156-
LexSucc acc str'
157-
| otherwise ->
158-
LexFail err str''
159-
LexSucc a str'' ->
160-
go (Array.snoc acc a) str''
161-
go [] str
154+
many (Lex k) = Lex \str -> ST.run do
155+
valuesRef <- STArray.new
156+
strRef <- STRef.new str
157+
contRef <- STRef.new true
158+
resRef <- STRef.new (LexSucc [] str)
159+
ST.while (STRef.read contRef) do
160+
str' <- STRef.read strRef
161+
case k str' of
162+
LexFail error str''
163+
| SCU.length str' == SCU.length str'' -> do
164+
values <- STArray.unsafeFreeze valuesRef
165+
_ <- STRef.write (LexSucc values str'') resRef
166+
_ <- STRef.write false contRef
167+
pure unit
168+
| otherwise -> do
169+
_ <- STRef.write (LexFail error str'') resRef
170+
_ <- STRef.write false contRef
171+
pure unit
172+
LexSucc a str'' -> do
173+
_ <- STArray.push a valuesRef
174+
_ <- STRef.write str'' strRef
175+
pure unit
176+
STRef.read resRef
162177

163178
fail :: forall a. ParseError -> Lex LexError a
164179
fail = Lex <<< LexFail <<< const
@@ -369,9 +384,9 @@ token =
369384
<|> tokenComma
370385
where
371386
parseModuleName = ado
372-
parts <- many (try (parseProper <* charDot))
387+
prefix <- parseModuleNamePrefix
373388
name <- parseName
374-
in name (toModuleName parts)
389+
in name (toModuleName prefix)
375390

376391
parseName :: Lex _ (Maybe ModuleName -> Token)
377392
parseName =
@@ -461,6 +476,9 @@ token =
461476
ident <- try $ charQuestionMark *> (parseIdent <|> parseProper)
462477
in TokHole ident
463478

479+
parseModuleNamePrefix =
480+
regex (LexExpected "module name") "(?:(?:\\p{Lu}[\\p{L}0-9_']*)\\.)*"
481+
464482
parseProper =
465483
regex (LexExpected "proper name") "\\p{Lu}[\\p{L}0-9_']*"
466484

@@ -566,8 +584,8 @@ token =
566584

567585
parseNumber = do
568586
intPart <- intPartRegex
569-
fractionPart <- optional (try (charDot *> fractionPartRegex))
570-
exponentPart <- optional (charExponent *> parseExponentPart)
587+
fractionPart <- parseNumberFractionPart
588+
exponentPart <- parseNumberExponentPart
571589
if isNothing fractionPart && isNothing exponentPart then do
572590
let intVal = stripUnderscores intPart
573591
case Int.fromString intVal of
@@ -587,6 +605,12 @@ token =
587605
Nothing ->
588606
fail $ LexNumberOutOfRange raw
589607

608+
parseNumberFractionPart =
609+
optional (try (charDot *> fractionPartRegex))
610+
611+
parseNumberExponentPart =
612+
optional (charExponent *> parseExponentPart)
613+
590614
parseExponentPart = ado
591615
sign <- optional parseExponentSign
592616
exponent <- intPartRegex
@@ -656,7 +680,7 @@ token =
656680
charAny =
657681
satisfy (LexExpected "char") (const true)
658682

659-
toModuleName :: Array String -> Maybe ModuleName
683+
toModuleName :: String -> Maybe ModuleName
660684
toModuleName = case _ of
661-
[] -> Nothing
662-
mn -> Just $ ModuleName $ String.joinWith "." mn
685+
"" -> Nothing
686+
mn -> Just $ ModuleName $ SCU.dropRight 1 mn

src/PureScript/CST/Parser.purs

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import Data.Tuple (Tuple(..), uncurry)
2626
import Prim as P
2727
import PureScript.CST.Errors (ParseError(..), RecoveredError(..))
2828
import PureScript.CST.Layout (currentIndent)
29-
import PureScript.CST.Parser.Monad (Parser, Recovery(..), eof, lookAhead, many, optional, recover, take, try)
29+
import PureScript.CST.Parser.Monad (Parser, eof, lookAhead, many, optional, recover, take, try)
3030
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, layoutStack)
3131
import PureScript.CST.TokenStream as TokenStream
3232
import PureScript.CST.Types (Binder(..), ClassFundep(..), DataCtor(..), DataMembers(..), Declaration(..), Delimited, DoStatement(..), Export(..), Expr(..), Fixity(..), FixityOp(..), Foreign(..), Guarded(..), GuardedExpr(..), Ident(..), Import(..), ImportDecl(..), Instance(..), InstanceBinding(..), IntValue(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), ModuleHeader(..), ModuleName(..), Name(..), OneOrDelimited(..), Operator(..), PatternGuard(..), Proper(..), QualifiedName(..), RecordLabeled(..), RecordUpdate(..), Role(..), Row(..), Separated(..), SourceToken, Token(..), Type(..), TypeVarBinding(..), Where(..), Wrapped(..))
@@ -1146,30 +1146,37 @@ reservedKeywords = Set.fromFoldable
11461146
]
11471147

11481148
recoverIndent :: forall a. (RecoveredError -> a) -> Parser a -> Parser a
1149-
recoverIndent mkNode = recover \{ position, error } ->
1150-
map (\tokens -> mkNode (RecoveredError { position, error, tokens })) <<< recoverTokensWhile \tok indent ->
1151-
case tok.value of
1152-
TokLayoutEnd col -> col > indent
1153-
TokLayoutSep col -> col > indent
1154-
_ -> true
1155-
1156-
recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Recovery (Array SourceToken)
1149+
recoverIndent mkNode = recover \{ position, error } stream -> do
1150+
let
1151+
Tuple tokens newStream = recoverTokensWhile
1152+
( \tok indent -> case tok.value of
1153+
TokLayoutEnd col -> col > indent
1154+
TokLayoutSep col -> col > indent
1155+
_ -> true
1156+
)
1157+
stream
1158+
if Array.null tokens then
1159+
Nothing
1160+
else
1161+
Just (Tuple (mkNode (RecoveredError { position, error, tokens })) newStream)
1162+
1163+
recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Tuple (Array SourceToken) TokenStream
11571164
recoverTokensWhile p initStream = go [] initStream
11581165
where
11591166
indent :: Int
11601167
indent = maybe 0 _.column $ currentIndent $ layoutStack initStream
11611168

1162-
go :: Array SourceToken -> TokenStream -> Recovery (Array SourceToken)
1169+
go :: Array SourceToken -> TokenStream -> Tuple (Array SourceToken) TokenStream
11631170
go acc stream = case TokenStream.step stream of
1164-
TokenError errPos _ _ _ ->
1165-
Recovery acc errPos stream
1166-
TokenEOF eofPos _ ->
1167-
Recovery acc eofPos stream
1171+
TokenError _ _ _ _ ->
1172+
Tuple acc stream
1173+
TokenEOF _ _ ->
1174+
Tuple acc stream
11681175
TokenCons tok _ nextStream _ ->
11691176
if p tok indent then
11701177
go (Array.snoc acc tok) nextStream
11711178
else
1172-
Recovery acc tok.range.start stream
1179+
Tuple acc stream
11731180

11741181
recoverDecl :: RecoveryStrategy Declaration
11751182
recoverDecl = recoverIndent DeclError

0 commit comments

Comments
 (0)