@@ -32,19 +32,24 @@ module.exports = grammar({
32
32
externals : $ => [
33
33
$ . _automatic_semicolon ,
34
34
$ . _indent ,
35
+ $ . _outdent ,
36
+ $ . _simple_string_start ,
37
+ $ . _simple_string_middle ,
38
+ $ . _simple_multiline_string_start ,
35
39
$ . _interpolated_string_middle ,
36
- $ . _interpolated_string_end ,
37
40
$ . _interpolated_multiline_string_middle ,
38
- $ . _interpolated_multiline_string_end ,
39
- $ . _outdent ,
40
- $ . _simple_multiline_string ,
41
- $ . _simple_string ,
41
+ $ . _raw_string_start ,
42
+ $ . _raw_string_middle ,
43
+ $ . _raw_string_multiline_middle ,
44
+ $ . _single_line_string_end ,
45
+ $ . _multiline_string_end ,
42
46
"else" ,
43
47
"catch" ,
44
48
"finally" ,
45
49
"extends" ,
46
50
"derives" ,
47
51
"with" ,
52
+ $ . error_sentinel ,
48
53
] ,
49
54
50
55
inline : $ => [
@@ -209,7 +214,7 @@ module.exports = grammar({
209
214
"package" ,
210
215
field ( "name" , $ . package_identifier ) ,
211
216
// This is slightly more permissive than the EBNF in that it allows any
212
- // kind of delcaration inside of the package blocks. As we're more
217
+ // kind of declaration inside of the package blocks. As we're more
213
218
// concerned with the structure rather than the validity of the program
214
219
// we'll allow it.
215
220
field ( "body" , optional ( $ . template_body ) ) ,
@@ -677,7 +682,7 @@ module.exports = grammar({
677
682
// In theory structural_type should just be added to simple_type,
678
683
// but doing so increases the state of template_body to 4000
679
684
$ . _structural_type ,
680
- // This adds _simple_type, but not the above intentionall/y .
685
+ // This adds _simple_type, but not the above intentionally .
681
686
seq ( $ . _simple_type , field ( "arguments" , $ . arguments ) ) ,
682
687
seq ( $ . _annotated_type , field ( "arguments" , $ . arguments ) ) ,
683
688
seq ( $ . compound_type , field ( "arguments" , $ . arguments ) ) ,
@@ -1540,14 +1545,14 @@ module.exports = grammar({
1540
1545
1541
1546
/**
1542
1547
* Regex patterns created to avoid matching // comments and /* comment starts.
1543
- * This could technically match illeagal tokens such as val ?// = 1
1548
+ * This could technically match illegal tokens such as val ?// = 1
1544
1549
*/
1545
1550
operator_identifier : $ =>
1546
1551
token (
1547
1552
choice (
1548
1553
// opchar minus colon, equal, at
1549
1554
// Technically speaking, Sm (Math symbols https://www.compart.com/en/unicode/category/Sm)
1550
- // should be allowed as a single-characeter opchar, however, it includes `=`,
1555
+ // should be allowed as a single-character opchar, however, it includes `=`,
1551
1556
// so we should to avoid that to prevent bad parsing of `=` as infix term or type.
1552
1557
/ [ \- ! # % & * + \/ \\ < > ? \u005e \u007c ~ \u00ac \u00b1 \u00d7 \u00f7 \u2190 - \u2194 \p{ So} ] / ,
1553
1558
seq (
@@ -1616,7 +1621,7 @@ module.exports = grammar({
1616
1621
choice (
1617
1622
seq (
1618
1623
"\\" ,
1619
- choice ( / [ ^ x u ] / , / u u ? [ 0 - 9 a - f A - F ] { 4 } / , / x [ 0 - 9 a - f A - F ] { 2 } / ) ,
1624
+ choice ( / [ ^ x u ] / , / [ u U ] + [ 0 - 9 a - f A - F ] { 4 } / , / x [ 0 - 9 a - f A - F ] { 2 } / ) ,
1620
1625
) ,
1621
1626
/ [ ^ \\ ' \n ] / ,
1622
1627
) ,
@@ -1625,14 +1630,13 @@ module.exports = grammar({
1625
1630
) ,
1626
1631
) ,
1627
1632
1628
- interpolated_string_expression : $ =>
1629
- seq ( field ( "interpolator" , $ . identifier ) , $ . interpolated_string ) ,
1630
-
1631
- _interpolated_string_start : $ => '"' ,
1632
-
1633
- _interpolated_multiline_string_start : $ => '"""' ,
1633
+ interpolated_string_expression : $ =>
1634
+ choice (
1635
+ seq ( field ( "interpolator" , alias ( $ . _raw_string_start , $ . identifier ) ) , alias ( $ . _raw_string , $ . interpolated_string ) ) ,
1636
+ seq ( field ( "interpolator" , $ . identifier ) , $ . interpolated_string ) ,
1637
+ ) ,
1634
1638
1635
- _dollar_escape : $ => seq ( "$" , choice ( "$" , '"' ) ) ,
1639
+ _dollar_escape : $ => alias ( token ( seq ( "$" , choice ( "$" , '"' ) ) ) , $ . escape_sequence ) ,
1636
1640
1637
1641
_aliased_interpolation_identifier : $ =>
1638
1642
alias ( $ . _interpolation_identifier , $ . identifier ) ,
@@ -1643,28 +1647,88 @@ module.exports = grammar({
1643
1647
interpolated_string : $ =>
1644
1648
choice (
1645
1649
seq (
1646
- $ . _interpolated_string_start ,
1650
+ token . immediate ( '"' ) ,
1647
1651
repeat (
1648
1652
seq (
1649
1653
$ . _interpolated_string_middle ,
1650
- choice ( $ . _dollar_escape , $ . interpolation ) ,
1654
+ choice ( $ . _dollar_escape , $ . interpolation , $ . escape_sequence ) ,
1651
1655
) ,
1652
1656
) ,
1653
- $ . _interpolated_string_end ,
1657
+ $ . _single_line_string_end ,
1654
1658
) ,
1655
1659
seq (
1656
- $ . _interpolated_multiline_string_start ,
1660
+ token . immediate ( '"""' ) ,
1657
1661
repeat (
1658
1662
seq (
1659
1663
$ . _interpolated_multiline_string_middle ,
1664
+ // Multiline strings ignore escape sequences
1660
1665
choice ( $ . _dollar_escape , $ . interpolation ) ,
1661
1666
) ,
1662
1667
) ,
1663
- $ . _interpolated_multiline_string_end ,
1668
+ $ . _multiline_string_end ,
1669
+ ) ,
1670
+ ) ,
1671
+
1672
+ // We need to handle single-line raw strings separately from interpolated strings,
1673
+ // because raw strings are not parsed for escape sequences. For example, raw strings
1674
+ // are often used for regular expressions, which contain backslashes that would
1675
+ // be invalid if parsed as escape sequences. We do not special case multiline
1676
+ // raw strings, because multiline strings do not parse escape sequences anyway.
1677
+ // Scala handles multiline raw strings identically to other multiline interpolated,
1678
+ // so we could parse them as interpolated strings, but I think the code is cleaner
1679
+ // if we maintain the distinction.
1680
+ _raw_string : $ =>
1681
+ choice (
1682
+ seq (
1683
+ $ . _simple_string_start ,
1684
+ seq (
1685
+ repeat (
1686
+ seq (
1687
+ $ . _raw_string_middle ,
1688
+ choice ( $ . _dollar_escape , $ . interpolation ) ,
1689
+ ) ,
1690
+ ) ,
1691
+ $ . _single_line_string_end ,
1692
+ ) ,
1693
+ ) ,
1694
+ seq (
1695
+ $ . _simple_multiline_string_start ,
1696
+ repeat (
1697
+ seq (
1698
+ $ . _raw_string_multiline_middle ,
1699
+ choice ( $ . _dollar_escape , $ . interpolation ) ,
1700
+ )
1701
+ ) ,
1702
+ $ . _multiline_string_end ,
1664
1703
) ,
1665
1704
) ,
1666
1705
1667
- string : $ => choice ( $ . _simple_string , $ . _simple_multiline_string ) ,
1706
+ escape_sequence : _ => token . immediate ( seq (
1707
+ '\\' ,
1708
+ choice (
1709
+ / [ t b n r f " ' \\ ] / ,
1710
+ // The Java spec allows any number of u's and U's at the start of a unicode escape.
1711
+ / [ u U ] + [ 0 - 9 a - f A - F ] { 4 } / ,
1712
+ // Octals are not allowed in Scala 3, but are allowed in Scala 2. tree-sitter
1713
+ // does not have a mechanism for distinguishing between different versions of a
1714
+ // language, so I think it makes sense to allow them. Maybe in the future we
1715
+ // should move them to a `deprecated` syntax node?
1716
+ / [ 0 - 3 ] ? [ 0 - 7 ] { 1 , 2 } / ,
1717
+ ) ,
1718
+ ) ) ,
1719
+
1720
+ string : $ => choice (
1721
+ seq (
1722
+ $ . _simple_string_start ,
1723
+ repeat ( seq ( $ . _simple_string_middle , $ . escape_sequence ) ) ,
1724
+ $ . _single_line_string_end ,
1725
+ ) ,
1726
+ seq (
1727
+ $ . _simple_multiline_string_start ,
1728
+ /// Multiline strings ignore escape sequences
1729
+ $ . _multiline_string_end ,
1730
+ ) ,
1731
+ ) ,
1668
1732
1669
1733
_semicolon : $ => choice ( ";" , $ . _automatic_semicolon ) ,
1670
1734
0 commit comments