Skip to content

Commit 8363f1c

Browse files
LPTKchengluyu
andcommitted
Pre-port useful changes from Luyu's GPP branch
- Make lowering less stack hungry - Fix `of` in UCS pattern splits - Add magic `source` definitions - Lex unicode characters - Add `@` application operator - Add Iter and XML utilities - Add and update various test cases Co-authored-by: Luyu Cheng <chengluyu@live.cn>
1 parent a753cb2 commit 8363f1c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1917
-227
lines changed

hkmc2/shared/src/main/scala/hkmc2/codegen/Lowering.scala

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,28 @@ class Lowering()(using Config, TL, Raise, State, Ctx):
198198
// Fail silently.
199199
false -> Term.Error
200200
val l = new TempSymbol(S(t))
201-
def rec(as: Ls[Bool -> st], asr: Ls[Arg]): Block = as match
202-
case Nil => k(Call(fr, asr.reverse)(isMlsFun, true))
203-
case (spd, a) :: as =>
204-
subTerm_nonTail(a): ar =>
205-
rec(as, Arg(spd, ar) :: asr)
206-
rec(as, Nil)
201+
// * The straightforward way to lower arguments creates too much recursion depth
202+
// * and makes Lowering stack overflow when lowering functions with lots of arguments.
203+
/*
204+
def rec(as: Ls[Bool -> st], asr: Ls[Arg]): Block = as match
205+
case Nil => k(Call(fr, asr.reverse)(isMlsFun, true))
206+
case (spd, a) :: as =>
207+
subTerm_nonTail(a): ar =>
208+
rec(as, Arg(spd, ar) :: asr)
209+
rec(as, Nil)
210+
*/
211+
var asr: Ls[Arg] = Nil
212+
def rec(as: Ls[Bool -> st]): Block = as match
213+
case Nil => End()
214+
case (spd, a) :: as =>
215+
subTerm_nonTail(a): ar =>
216+
asr ::= Arg(spd, ar)
217+
rec(as)
218+
val b = rec(as)
219+
Begin(
220+
b,
221+
k(Call(fr, asr.reverse)(isMlsFun, true))
222+
)
207223
case _ =>
208224
// Application arguments that are not tuples represent spreads, as in `f(...arg)`
209225
subTerm_nonTail(arg): ar =>

hkmc2/shared/src/main/scala/hkmc2/semantics/Elaborator.scala

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ object Elaborator:
103103
val untyped = assumeBuiltinTpe("untyped")
104104
// println(s"Builtins: $Int, $Num, $Str, $untyped")
105105
val Predef = assumeBuiltinMod("Predef")
106+
object source:
107+
private val module = assumeBuiltinMod("source")
108+
private def assumeObject(nme: Str): BlockMemberSymbol =
109+
module.tree.definedSymbols.get(nme).getOrElse:
110+
throw new NoSuchElementException:
111+
s"builtin module symbol source.$nme. we have"
112+
val line = assumeObject("line")
113+
val name = assumeObject("name")
114+
val file = assumeObject("file")
106115
def getBuiltinOp(op: Str): Opt[Str] =
107116
if getBuiltin(op).isDefined then builtinBinOps.get(op) else N
108117
/** Classes that do not use `instanceof` in pattern matching. */
@@ -493,8 +502,18 @@ extends Importer:
493502
ErrorReport(
494503
msg"[debinding error] Method '${nme.name}' cannot be accessed without being called." -> nme.toLoc :: Nil)
495504
case S(_) | N => ()
496-
maybeApp:
497-
Term.Sel(preTrm, nme)(sym)
505+
if sym.contains(ctx.builtins.source.line) then
506+
val loc = tree.toLoc.getOrElse(???)
507+
val (line, _, _) = loc.origin.fph.getLineColAt(loc.spanStart)
508+
Term.Lit(Tree.IntLit(loc.origin.startLineNum + line))
509+
else if sym.contains(ctx.builtins.source.name) then
510+
Term.Lit(Tree.StrLit(ctx.getOuter.map(_.nme).getOrElse("")))
511+
else if sym.contains(ctx.builtins.source.file) then
512+
val loc = tree.toLoc.getOrElse(???)
513+
Term.Lit(Tree.StrLit(loc.origin.fileName.toString))
514+
else
515+
maybeApp:
516+
Term.Sel(preTrm, nme)(sym)
498517
case MemberProj(ct, nme) =>
499518
val c = cls(ct, inAppPrefix = false)
500519
val f = c.symbol.flatMap(_.asCls) match

hkmc2/shared/src/main/scala/hkmc2/semantics/ucs/Desugarer.scala

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import syntax.Literal
1010
import Keyword.{as, and, `do`, `else`, is, let, `then`}
1111
import collection.mutable.{HashMap, SortedSet}
1212
import Elaborator.{ctx, Ctxl}
13+
import scala.annotation.targetName
1314

1415
object Desugarer:
1516
extension (op: Keyword.Infix)
@@ -70,9 +71,18 @@ class Desugarer(val elaborator: Elaborator)
7071
type Ctor = SynthSel | Sel | Ident
7172

7273
extension (sequel: Sequel)
74+
@targetName("traceSequel")
7375
def traced(pre: Str, post: Split => Str): Sequel =
7476
if doTrace then ctx => trace(pre, post)(sequel(ctx)) else sequel
75-
77+
78+
extension (desugar: Split => Sequel)
79+
@targetName("traceDesugar")
80+
def traced(pre: Str, post: Split => Str): Split => Sequel =
81+
if doTrace then
82+
fallback => ctx => trace(pre, post)(desugar(fallback)(ctx))
83+
else
84+
desugar
85+
7686
extension (split: Split)
7787
/** Concatenate two splits. */
7888
def ++(fallback: Split): Split =
@@ -347,7 +357,17 @@ class Desugarer(val elaborator: Elaborator)
347357
* @param tree the `Tree` representing the pattern split
348358
* @param scrutSymbol the symbol representing the elaborated scrutinee
349359
*/
350-
def patternSplit(tree: Tree, scrutSymbol: BlockLocalSymbol): Split => Sequel = tree match
360+
def patternSplit(tree: Tree, scrutSymbol: BlockLocalSymbol): Split => Sequel =
361+
patternSplit(N, tree, scrutSymbol)
362+
363+
/** Similar to `patternSplit`, but allows a transformation on the pattern head.
364+
* @param transform the partial pattern before enter this pattern split
365+
*/
366+
def patternSplit(
367+
finish: Opt[Tree => Tree],
368+
tree: Tree,
369+
scrutSymbol: BlockLocalSymbol
370+
): Split => Sequel = tree match
351371
case blk: Block => blk.desugStmts.foldRight(default): (branch, elabFallback) =>
352372
// Terminology: _fallback_ refers to subsequent branches, _backup_ refers
353373
// to the backup plan passed from the parent split.
@@ -376,12 +396,26 @@ class Desugarer(val elaborator: Elaborator)
376396
pre = "patternSplit (alternative)",
377397
post = (res: Split) => s"patternSplit (alternative) >>> ${res.showDbg}"
378398
):
379-
patternSplit(branch, scrutSymbol)(elabFallback(backup)(ctx))(ctx)
399+
patternSplit(finish, branch, scrutSymbol)(elabFallback(backup)(ctx))(ctx)
400+
// For example, `Some of "A" then 0`, and
401+
// ```
402+
// Some of
403+
// "A" then 0
404+
// "B" then 1
405+
// ```
406+
// The precedence of `of` is higher than `then`.
407+
case app @ App(_: (Ident | Sel | SynthSel), Tup(branches)) =>
408+
patternSplit(S(tree => app.copy(rhs = Tup(tree :: Nil))), Block(branches), scrutSymbol)
409+
.traced(pre = s"patternSplit <<< partial pattern", post = (_) => s"patternSplit >>>")
380410
case patternAndMatches ~> consequent => fallback =>
381411
// There are N > 0 conjunct matches. We use `::[T]` instead of `List[T]`.
382412
// Each match is represented by a pair of a _coda_ and a _pattern_
383413
// that is yet to be elaborated.
384414
val (headPattern, _) :: tail = disaggregate(patternAndMatches)
415+
val realPattern = finish match
416+
case N => headPattern
417+
case S(f) => f(headPattern)
418+
log(s"realPattern: $realPattern")
385419
// The `consequent` serves as the innermost split, based on which we
386420
// expand from the N-th to the second match.
387421
val tailSplit =
@@ -395,7 +429,7 @@ class Desugarer(val elaborator: Elaborator)
395429
.traced(
396430
pre = s"conjunct matches <<< $tail",
397431
post = (res: Split) => s"conjunct matches >>> $res")
398-
expandMatch(scrutSymbol, headPattern, tailSplit)(fallback).traced(
432+
expandMatch(scrutSymbol, realPattern, tailSplit)(fallback).traced(
399433
pre = s"patternBranch <<< $patternAndMatches -> ${consequent.fold(_.showDbg, _.showDbg)}",
400434
post = (res: Split) => s"patternBranch >>> ${res.showDbg}")
401435
case _ =>
@@ -447,11 +481,11 @@ class Desugarer(val elaborator: Elaborator)
447481
val arity = cls.arity
448482
if arity =/= args.length then
449483
val m = args.length.toString
450-
ErrorReport:
484+
error:
451485
if arity == 0 then
452-
msg"the constructor does not take any arguments but found $m" -> app.toLoc :: Nil
486+
msg"the constructor does not take any arguments but found $m" -> app.toLoc
453487
else
454-
msg"mismatched arity: expect ${arity.toString}, found $m" -> app.toLoc :: Nil
488+
msg"mismatched arity: expect ${arity.toString}, found $m" -> app.toLoc
455489
val params = scrutSymbol.getSubScrutinees(cls)
456490
Branch(
457491
ref,

hkmc2/shared/src/main/scala/hkmc2/syntax/Keyword.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,16 @@ object Keyword:
121121
val `outer` = Keyword("outer", N, N)
122122
val `pattern` = Keyword("pattern", N, N)
123123

124+
val `->` = Keyword("->", nextPrec, eqPrec)
125+
126+
val maxPrec = curPrec
127+
124128
// * The lambda operator is special:
125129
// * it should associate very strongly on the left and very loosely on the right
126130
// * so that we can write things like `f() |> x => x is 0` ie `(f()) |> (x => (x is 0))`
127-
val `=>` = Keyword("=>", nextPrec, eqPrec)
128-
val `->` = Keyword("->", curPrec, eqPrec)
131+
// * Currently, the precedence of normal operators starts at the maximum precedence of keywords,
132+
// * so we need to start the precedence of `=>` to account for that.
133+
val `=>` = Keyword("=>", S(_curPrec + charPrecList.length), eqPrec)
129134

130135
val __ = Keyword("_", N, N)
131136

@@ -139,6 +144,5 @@ object Keyword:
139144

140145
type letLike = `let`.type | `set`.type
141146

142-
val maxPrec = curPrec
143147

144148

hkmc2/shared/src/main/scala/hkmc2/syntax/Lexer.scala

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,69 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
165165
case 'r' => str(i + 1, false, '\r' :: cur)
166166
case 'b' => str(i + 1, false, '\b' :: cur)
167167
case 'f' => str(i + 1, false, '\f' :: cur)
168+
case 'u' =>
169+
/**
170+
* This code handles two types of Unicode escape sequences:
171+
*
172+
* + Traditional Unicode escape: "\uXXXX"
173+
* - Consists of the characters '\' and 'u' followed by exactly
174+
* four hexadecimal digits.
175+
* - Example: "\u0041" represents the character 'A'.
176+
* + Unicode code point escape: "\u{XXXXXX}"
177+
* - Starts with "\u{" and ends with "}", allowing between 1 and
178+
* 6 hexadecimal digits in between.
179+
* - Example: "\u{1F600}" represents the grinning face emoji.
180+
*
181+
* In both cases, the scanned code point is validated to ensure
182+
* that it falls within the allowed Unicode range (0x0 to 0x10FFFF).
183+
* If any errors occur during scanning or conversion, such as
184+
* invalid characters, missing digits, or code points out of range,
185+
* a warning is raised with a precise location.
186+
*/
187+
@tailrec
188+
def scanHexDigits(idx: Int, maxDigits: Int, value: Int, count: Int): (Int, Int, Int) =
189+
if idx < length && isHexDigit(bytes(idx)) then
190+
if count < maxDigits then
191+
scanHexDigits(idx + 1, maxDigits, (value << 4) + Character.digit(bytes(idx), 16), count + 1)
192+
else
193+
scanHexDigits(idx + 1, maxDigits, value, count + 1)
194+
else
195+
(idx, value, count)
196+
197+
if i + 1 < length && bytes(i + 1) == '{' then
198+
// Scan up to 6 hex digits after the opening brace.
199+
val (nextIdx, acc, count) = scanHexDigits(i + 2, 6, 0, 0)
200+
val result = if count == 0 then
201+
raise(WarningReport(msg"Expected at least one hexadecimal digit in Unicode escape sequence" -> S(loc(i + 1, nextIdx)) :: Nil,
202+
source = Lexing))
203+
cur
204+
else if count > 6 then
205+
raise(WarningReport(msg"Too many hexadecimal digits in Unicode escape sequence" -> S(loc(nextIdx - (count - 6), nextIdx)) :: Nil,
206+
source = Lexing))
207+
cur
208+
else if acc > 0x10FFFF then
209+
raise(WarningReport(msg"Unicode code point out of range: 0x${acc.toHexString}" -> S(loc(i + 2, nextIdx)) :: Nil,
210+
source = Lexing))
211+
cur
212+
else
213+
Character.toChars(acc).reverseIterator.toList ::: cur
214+
// Close the brace.
215+
val finalIdx = if nextIdx >= length || bytes(nextIdx) != '}' then
216+
raise(WarningReport(msg"Unterminated Unicode escape sequence: missing '}'" -> S(loc(nextIdx, nextIdx)) :: Nil,
217+
source = Lexing))
218+
nextIdx
219+
else
220+
nextIdx + 1
221+
str(finalIdx, false, result)
222+
else
223+
// Process the traditional 4-digit Unicode escape (\uXXXX).
224+
val (nextIdx, acc, count) = scanHexDigits(i + 1, 4, 0, 0)
225+
if count != 4 then
226+
raise(WarningReport(msg"Invalid Unicode escape sequence: expected 4 hexadecimal digits but got ${count.toString}" -> S(loc(i + 1, nextIdx)) :: Nil,
227+
source = Lexing))
228+
str(nextIdx, false, cur)
229+
else
230+
str(nextIdx, false, acc.toChar :: cur)
168231
case ch =>
169232
raise(WarningReport(msg"Found invalid escape character" -> S(loc(i, i + 1)) :: Nil,
170233
source = Lexing))

hkmc2/shared/src/main/scala/hkmc2/syntax/Parser.scala

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ import hkmc2.syntax.Keyword.Ellipsis
1717
import semantics.Elaborator.State
1818

1919

20+
val charPrecList: List[Str] = List(
21+
"", // `of` rhs
22+
",",
23+
// ^ for keywords
24+
";",
25+
// "=", // higher than || means `a == 1 || b` parses surprisingly
26+
"@",
27+
":",
28+
"|",
29+
"&",
30+
"=",
31+
"/ \\",
32+
"^",
33+
// "= !",
34+
"!",
35+
"< >",
36+
"+ -",
37+
// "* / %",
38+
"* %",
39+
"~",
40+
"", // Precedence of prefix operators
41+
"", // Precedence of application
42+
".",
43+
)
44+
45+
2046
object Parser:
2147

2248
type TokLoc = (Stroken, Loc)
@@ -31,30 +57,7 @@ object Parser:
3157
false
3258

3359
private val precOf: Map[Char,Int] =
34-
List(
35-
"", // `of` rhs
36-
",",
37-
// ^ for keywords
38-
";",
39-
// "=", // higher than || means `a == 1 || b` parses surprisingly
40-
"@",
41-
":",
42-
"|",
43-
"&",
44-
"=",
45-
"/ \\",
46-
"^",
47-
// "= !",
48-
"!",
49-
"< >",
50-
"+ -",
51-
// "* / %",
52-
"* %",
53-
"~",
54-
"", // Precedence of prefix operators
55-
"", // Precedence of application
56-
".",
57-
).zipWithIndex.flatMap {
60+
charPrecList.zipWithIndex.flatMap {
5861
case (cs, i) => cs.filterNot(_ === ' ').map(_ -> (i + Keyword.maxPrec.get))
5962
}.toMap.withDefaultValue(Int.MaxValue)
6063

@@ -72,7 +75,7 @@ object Parser:
7275
(Keyword.maxPrec.get, Keyword.maxPrec.get)
7376
case _ =>
7477
val r = opStr.last
75-
(precOf(opStr.head), precOf(r) - (if r === '@' || r === '/' || r === ',' || r === ':' then 1 else 0))
78+
(precOf(opStr.head), precOf(r) - (if r === '/' || r === ',' || r === ':' then 1 else 0))
7679
}
7780
val prefixOps: Set[Str] = Set("!", "+", "-", "~", "@")
7881

@@ -275,7 +278,7 @@ abstract class Parser(
275278
case Nil => Nil
276279
case (NEWLINE, _) :: _ if allowNewlines => consume; blockOf(rule, annotations, allowNewlines)
277280
case (SPACE, _) :: _ => consume; blockOf(rule, annotations, allowNewlines)
278-
case (IDENT("@", _), l0) :: _ =>
281+
case (IDENT("@", _), l0) :: rest if rest.nonEmpty =>
279282
consume
280283
blockOf(rule, simpleExpr(AppPrec) :: annotations, allowNewlines)
281284
case (tok @ (id: IDENT), loc) :: _ =>
@@ -498,7 +501,7 @@ abstract class Parser(
498501
def simpleExpr(prec: Int)(using Line): Tree = wrap(prec)(simpleExprImpl(prec))
499502
def simpleExprImpl(prec: Int): Tree =
500503
yeetSpaces match
501-
case (IDENT("@", _), l0) :: _ =>
504+
case (IDENT("@", _), l0) :: rest if rest.nonEmpty =>
502505
consume
503506
val annotation = simpleExpr(AppPrec)
504507
Annotated(annotation, simpleExpr(prec))
@@ -536,7 +539,8 @@ abstract class Parser(
536539
exprCont(
537540
Quoted(InfixApp(lhs, kw, Unquoted(rhs)).withLoc(S(loc))).withLoc(S(l ++ loc)),
538541
prec, allowNewlines = true)
539-
case (KEYWORD(kw @ (Keyword.`=>` | Keyword.`->`)), l0) :: _ =>
542+
case (KEYWORD(kw @ (Keyword.`=>` | Keyword.`->`)), l0) :: _
543+
if kw.leftPrecOrMin > prec =>
540544
consume
541545
val rhs = effectfulRhs(kw.rightPrecOrMin)
542546
val lhs = bk match
@@ -753,7 +757,8 @@ abstract class Parser(
753757
// case (KEYWORD(kw @ (Keyword.`=`)), l0) :: _ if kw.leftPrecOrMin > prec =>
754758
// consume
755759
// ???
756-
case (KEYWORD(kw @ (Keyword.`=>` | Keyword.`->`)), l0) :: _ if kw.leftPrecOrMin > prec =>
760+
case (KEYWORD(kw @ (Keyword.`=>` | Keyword.`->`)), l0) :: _
761+
if kw.leftPrecOrMin > prec =>
757762
consume
758763
val rhs = effectfulRhs(kw.rightPrecOrMin)
759764
val res = acc match
@@ -992,7 +997,7 @@ abstract class Parser(
992997
yeetSpaces match
993998
case (NEWLINE, l0) :: _ =>
994999
consume
995-
???
1000+
lastWords(s"infix on newline at ${l0.showStart}:${l0.showEnd}")
9961001
case _ =>
9971002
if verbose then printDbg("$ parsing the right-hand side")
9981003
val rhs = expr(kw.rightPrecOrMin)

hkmc2/shared/src/main/scala/hkmc2/syntax/Tree.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ enum Tree extends AutoLocated:
169169
case Open(_) => "open"
170170
case MemberProj(_, _) => "member projection"
171171
case Keywrd(kw) => s"'${kw.name}' keyword"
172+
case Unt() => "unit"
172173

173174
def deparenthesized: Tree = this match
174175
case Bra(BracketKind.Round, inner) => inner.deparenthesized

0 commit comments

Comments
 (0)