starting to remove explicit space and comment

uben0 · Sep 14, 2023 · c19be8e · c19be8e
1 parent 3076c9d
commit c19be8e
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 56 deletions.
diff --git a/corpus/test.scm b/corpus/test.scm
@@ -5,7 +5,6 @@ a b
 ---------------------
 
 (source_file
-	(text)
 	(text))
 
 
@@ -16,7 +15,6 @@ a b#a b
 ---------------------
 
 (source_file
-	(text)
 	(text)
 	(ident)
 	(text))
@@ -45,7 +43,6 @@ Test 003
 	(branch
 		condition: (ident)
 		(block))
-	(text)
 	(text))
 
 
@@ -103,8 +100,6 @@ Hello #a + b World!
 (source_file
 	(text)
 	(ident)
-	(text)
-	(text)
 	(text))
 
 
@@ -198,7 +193,6 @@ Test 014
 
 (source_file
 	(content
-		(text)
 		(text)))
 
 
@@ -314,7 +308,6 @@ elsa {}
 	(branch
 		condition: (ident)
 		(block))
-	(text)
 	(text))
 
 
@@ -328,7 +321,6 @@ Test 023
 	(branch
 		condition: (ident)
 		(block))
-	(text)
 	(text))
 
 
@@ -342,11 +334,9 @@ d e
 ---------------------
 
 (source_file
-	(text)
 	(text)
 	(text)
 	(parbreak)
-	(text)
 	(text))
 
 
@@ -719,7 +709,6 @@ a b
 (source_file
 	(parbreak)
 	(text)
-	(text)
 	(parbreak))
 
 
@@ -817,7 +806,7 @@ Test 059
 (source_file
 	(let
 		pattern: (ident)
-		value: (comment)
+		(comment)
 		value: (number)))
 
 
@@ -829,7 +818,7 @@ Test 060
 
 (source_file
 	(let
-		pattern: (comment)
+		(comment)
 		pattern: (ident)
 		value: (number)))
 
@@ -1313,7 +1302,6 @@ Sum is #add(2, 3).
 			(ident)
 			(ident)))
 	(text)
-	(text)
 	(call
 		item: (ident)
 		(group
@@ -1330,7 +1318,6 @@ Test 095
 
 (source_file
 	(ident)
-	(text)
 	(text))
 
 
@@ -1860,7 +1847,6 @@ c, d
 	(import
 		(string)
 		(ident))
-	(text)
 	(text))
 
 
@@ -2053,7 +2039,6 @@ https://hello\ world.com Hey
 (source_file
 	(url)
 	(linebreak)
-	(text)
 	(text))
 
 
@@ -2081,7 +2066,6 @@ Test 149
 		condition: (bool)
 		(block))
 	(comment)
-	(text)
 	(text))
 
 
@@ -2108,7 +2092,6 @@ Test 151
 
 (source_file
 	(return)
-	(text)
 	(text))
 
 
@@ -2324,7 +2307,6 @@ Test 165
 
 (source_file
 	(content)
-	(text)
 	(text))
 
 
@@ -3041,7 +3023,6 @@ _ + e _
 
 (source_file
 	(emph
-		(text)
 		(text)))
 
 
@@ -3054,7 +3035,6 @@ _ #e + Hello _
 (source_file
 	(emph
 		(ident)
-		(text)
 		(text)))
 
 
@@ -3475,8 +3455,8 @@ $mat(/* hello */ delim: "[")$
 	(math
 		(call
 			item: (ident)
+			(comment)
 			(tagged
-				field: (comment)
 				field: (ident)
 				(string)))))
 
@@ -3729,7 +3709,6 @@ Test 266
 (source_file
 	(builtin)
 	(text)
-	(text)
 	(builtin)
 	(text))
 
@@ -3837,7 +3816,6 @@ Test 271
 					(ident))
 				(linebreak)
 				(emph
-					(text)
 					(text)
 					(ident))))))
 
@@ -3985,7 +3963,6 @@ if critical
 			item: (builtin)
 			(group
 				(builtin))))
-	(text)
 	(text))
 
 
@@ -4016,9 +3993,6 @@ This list is affected: #[
 ---------------------
 
 (source_file
-	(text)
-	(text)
-	(text)
 	(text)
 	(content
 		(set
@@ -4553,8 +4527,8 @@ else /*Hello*/ if true [] /* World */ Hello
 		(comment)
 		(branch
 			condition: (bool)
-			(content)
-			(comment)))
+			(content)))
+	(comment)
 	(text))
 
 
@@ -4885,3 +4859,15 @@ Test 335
 (source_file
 	(raw_blck
 		(blob)))
+
+
+=====================
+Test 336
+=====================
+/* Hello */ = World
+---------------------
+
+(source_file
+	(comment)
+	(heading
+		(text)))
diff --git a/grammar.js b/grammar.js
@@ -15,7 +15,7 @@ const ALPHANUM = /[\p{Alphabetic}\p{Nd}\p{Nl}\p{No}]/;
 
 // extras
 function ws($) {
-  return optional($._cws);
+  return optional($._ws);
 }
 
 // a line break in a content context
@@ -25,17 +25,16 @@ function content_lb($) {
 
 // document as a whole, or what is inside content delimiter
 function content($) {
-  const elem = $._line_content;
+  const elem = $._theline_content;
   const sep = content_lb($);
-  return seq(optional(sep), repeat(seq(elem, sep)), optional(elem));
-  // return repeat(choice($._line_content, content_lb($)));
+  return seq(optional(elem), repeat(seq(sep, optional(elem))));
 }
 
 // content inside emph or strong delimiters
 function inside($) {
   return seq(
     // the first line can't contain markup like headings or items
-    repeat(choice($._sp, $.comment, $._markup)),
+    repeat($._markup),
     optional(seq(
       content_lb($),
       // after the first new line, it is just regular content
@@ -89,11 +88,6 @@ module.exports = grammar({
     $._token_immediate_math_apply,
     $._token_immediate_math_field,
     $._token_immediate_math_prime,
-    // $._token_immediate_lbrk,
-    // $._token_immediate_lpar,
-    // $._token_immediate_dot,
-    // $._token_immediate_ident,
-    // $._token_immediate_math_ident,
 
     $._recovery,
   ],
@@ -105,15 +99,11 @@ module.exports = grammar({
   rules: {
     source_file: $ => content($),
 
-    _line_content: $ => choice(
-      seq($._csp, optional($._theline_content)),
-      $._theline_content
-    ),
     _theline_content: $ => choice(
       $.heading,
       $.item,
       $.term,
-      seq($._markup, repeat(choice($._sp, $.comment, $._markup))),
+      repeat1($._markup),
     ),
 
     parbreak: $ => token(seq(LB, repeat1(seq(repeat(SP), LB)))),
@@ -123,16 +113,15 @@ module.exports = grammar({
     ), $._token_immediate_set),
     url: $ => seq(/http(s?):\/\//, $._token_url),
 
-    _ws: $ => token(repeat1(WS)),
+    _ws: $ => prec(40, repeat1(LB)),
 
     // this token matches `_`, `*` and `"` when they are between alphanumeric
     // characters because, in that case, they do not count as markup
     _anti_markup: $ => token(seq(ALPHANUM, /[_*"]/, ALPHANUM)),
 
     linebreak: $ => /\\/,
     quote: $ => /"|'/,
-    _cws: $ => prec(40, repeat1(choice($.comment, $._ws))),
-    _csp: $ => prec(40, repeat1(choice($.comment, $._sp))),
+    // _cws: $ => prec(40, repeat1($._ws)),
 
     _markup: $ => choice(
       $._code,
@@ -232,8 +221,8 @@ module.exports = grammar({
       $._math_ws_suffix,
       alias($._math_token_align, $.align),
     ),
-    _math_ws_prefix:   $ => prec(8, seq($._cws, $._math_expr)),
-    _math_ws_suffix:   $ => prec(7, seq($._math_expr, $._cws)),
+    _math_ws_prefix:   $ => prec(8, seq($._ws, $._math_expr)),
+    _math_ws_suffix:   $ => prec(7, seq($._math_expr, $._ws)),
 
     _math_token_align: $ => '&',
     _math_token_colon: $ => ':',
@@ -285,7 +274,7 @@ module.exports = grammar({
     _math_tag: $ =>prec(9, choice(
       alias($._token_math_ident, $.ident),
       alias($._token_math_letter, $.ident),
-      seq($._cws, $._math_tag),
+      seq($._ws, $._math_tag),
     )),
     _math_tagged: $ => prec(9, seq(field('field', $._math_tag), $._math_token_colon, repeat1($._math_expr))),
     _math_apply: $ => prec(7, seq(
@@ -409,8 +398,8 @@ module.exports = grammar({
       $._expr_ws_suffix,
     ),
 
-    _expr_ws_prefix: $ => prec(14, seq($._cws, $._expr)),
-    _expr_ws_suffix: $ => prec(13, seq($._expr, $._cws)),
+    _expr_ws_prefix: $ => prec(14, seq($._ws, $._expr)),
+    _expr_ws_suffix: $ => prec(13, seq($._expr, $._ws)),
     _identifier: $ => /[\p{XID_Start}_][\p{XID_Continue}\-]*/,
     ident:  $ => seq($._identifier, $._token_immediate_set),
     unit:   $ => $._token_unit,
@@ -426,7 +415,7 @@ module.exports = grammar({
       optional(seq($.unit, $._token_immediate_set))
     )),
     string: $ => seq('"', repeat(choice($._token_string_blob, $.escape)), '"', $._token_immediate_set),
-    elude:  $ => prec.left(2, seq('..', optional($._expr), ws($))),
+    elude:  $ => prec.left(2, seq('..', optional(choice($._expr, $._ws)))),
     assign: $ => prec.right(4, seq(field('pattern', $._expr), alias(token(choice('=', '+=', '-=', '*=', '/=')), "assign"), field('value', $._expr))),
     lambda: $ => prec.right(5, seq(field('pattern', $._expr), '=>', field('value', $._expr))),
     or:     $ => prec.left(6, seq($._expr, 'or', $._expr)),
@@ -463,7 +452,7 @@ module.exports = grammar({
     ),
     block: $ => seq(
       '{',
-      repeat(choice($._cws, seq($._expr, $._token_blocked_expr_end))),
+      repeat(choice($._ws, seq($._expr, $._token_blocked_expr_end))),
       '}',
       $._token_immediate_set,
     ),