From 6c896325238b63b6fc09f774968be6da0b9c89c1 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Wed, 22 Aug 2018 17:40:33 -0400 Subject: No more multi-line comments. --- luxc/src/lux/lexer.clj | 17 +------ stdlib/source/lux/compiler/default/syntax.lux | 63 +----------------------- stdlib/test/test/lux/compiler/default/syntax.lux | 36 ++------------ 3 files changed, 6 insertions(+), 110 deletions(-) diff --git a/luxc/src/lux/lexer.clj b/luxc/src/lux/lexer.clj index 0a09c0619..9e1414b7f 100644 --- a/luxc/src/lux/lexer.clj +++ b/luxc/src/lux/lexer.clj @@ -106,26 +106,11 @@ (|do [[meta _ white-space] (&reader/read-regex #"^(\s+|$)")] (return (&/T [meta ($White_Space white-space)])))) -(def ^:private lex-single-line-comment +(def ^:private lex-comment (|do [_ (&reader/read-text "##") [meta _ comment] (&reader/read-regex #"^(.*)$")] (return (&/T [meta ($Comment comment)])))) -(defn- lex-multi-line-comment [_] - (|do [_ (&reader/read-text "#(") - [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex+ #"(?is)^(?!#\()((?!\)#).)*")] - (return (&/T [meta comment]))) - (|do [[meta pre] (&reader/read-regex+ #"(?is)^((?!#\().)*") - [_ ($Comment inner)] (lex-multi-line-comment nil) - [_ post] (&reader/read-regex+ #"(?is)^((?!\)#).)*")] - (return (&/T [meta (str pre "#(" inner ")#" post)]))))) - _ (&reader/read-text ")#")] - (return (&/T [meta ($Comment comment)])))) - -(def ^:private lex-comment - (&/try-all% (&/|list lex-single-line-comment - (lex-multi-line-comment nil)))) - (do-template [ ] (def (|do [[meta _ token] (&reader/read-regex )] diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 6a52687ec..3b3b3e411 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -132,7 +132,7 @@ ## Single-line comments can start anywhere, but only go up to the ## next new-line. -(def: (single-line-comment^ where) +(def: (comment^ where) (-> Cursor (Lexer Cursor)) (do p.Monad [_ (l.this ..single-line-comment-marker) @@ -142,67 +142,6 @@ (update@ #.line inc) (set@ #.column 0))))) -## This is just a helper parser to find text which doesn't run into -## any special character sequences for multi-line comments. -(def: multi-line-comment-start^ (l.this (format ..sigil open-form))) -(def: multi-line-comment-end^ (l.this (format close-form ..sigil))) - -(def: multi-line-comment-bound^ - (Lexer Any) - ($_ p.either - ..new-line^ - ..multi-line-comment-start^ - ..multi-line-comment-end^)) - -## Multi-line comments are bounded by #( these delimiters, #(and, they may -## also be nested)# )#. -## Multi-line comment syntax must be balanced. -## That is, any nested comment must have matched delimiters. -## Unbalanced comments ought to be rejected as invalid code. -(def: (multi-line-comment^ where) - (-> Cursor (Lexer Cursor)) - (do p.Monad - [_ ..multi-line-comment-start^] - (loop [where (update@ #.column (n/+ 2) where)] - ($_ p.either - ## These are normal chunks of commented text. - (do @ - [chunk (l.many! (l.not! multi-line-comment-bound^))] - (recur (|> where - (update@ #.column (n/+ (get@ #l.distance chunk)))))) - ## This is a special rule to handle new-lines within - ## comments properly. - (do @ - [_ ..new-line^] - (recur (|> where - (update@ #.line inc) - (set@ #.column 0)))) - ## This is the rule for handling nested sub-comments. - ## Ultimately, the whole comment is just treated as text - ## (the comment must respect the syntax structure, but the - ## output produced is just a block of text). - ## That is why the sub-comment is covered in delimiters - ## and then appended to the rest of the comment text. - (do @ - [sub-where (multi-line-comment^ where)] - (recur sub-where)) - ## Finally, this is the rule for closing the comment. - (do @ - [_ ..multi-line-comment-end^] - (wrap (update@ #.column (n/+ 2) where))) - )))) - -## This is the only parser that should be used directly by other -## parsers, since all comments must be treated as either being -## single-line or multi-line. -## That is, there is no syntactic rule prohibiting one type of comment -## from being used in any situation (alternatively, forcing one type -## of comment to be the only usable one). -(def: (comment^ where) - (-> Cursor (Lexer Cursor)) - (p.either (single-line-comment^ where) - (multi-line-comment^ where))) - ## To simplify parsing, I remove any left-padding that a Code token ## may have prior to parsing the token itself. ## Left-padding is assumed to be either white-space or a comment. diff --git a/stdlib/test/test/lux/compiler/default/syntax.lux b/stdlib/test/test/lux/compiler/default/syntax.lux index a9baa546c..1bcb9dad8 100644 --- a/stdlib/test/test/lux/compiler/default/syntax.lux +++ b/stdlib/test/test/lux/compiler/default/syntax.lux @@ -114,25 +114,16 @@ (def: comment-text^ (r.Random Text) - (let [char-gen (|> r.nat (r.filter (function (_ value) - (not (or (text.space? value) - (n/= (char "#") value) - (n/= (char "(") value) - (n/= (char ")") value))))))] + (let [char-gen (|> r.nat (r.filter (|>> (n/= (char "\n")) not)))] (do r.Monad [size (|> r.nat (r/map (n/% 20)))] (r.text char-gen size)))) (def: comment^ (r.Random Text) - (r.either (do r.Monad - [comment comment-text^] - (wrap (format "## " comment "\n"))) - (r.rec (function (_ nested^) - (do r.Monad - [comment (r.either comment-text^ - nested^)] - (wrap (format "#( " comment " )#"))))))) + (do r.Monad + [comment comment-text^] + (wrap (format "## " comment "\n")))) (context: "Multi-line text & comments." (<| (seed 12137892244981970631) @@ -189,23 +180,4 @@ (#e.Success [_ parsed]) (:: code.Equivalence = parsed sample))) - (test "Will reject unbalanced multi-line comments." - (and (case (&.read "" (dict.new text.Hash) - [default-cursor 0 - (format "#(" "#(" unbalanced-comment ")#" - (code.to-text sample))]) - (#e.Error error) - #1 - - (#e.Success [_ parsed]) - #0) - (case (&.read "" (dict.new text.Hash) - [default-cursor 0 - (format "#(" unbalanced-comment ")#" ")#" - (code.to-text sample))]) - (#e.Error error) - #1 - - (#e.Success [_ parsed]) - #0))) )))) -- cgit v1.2.3