From 368226ff76ea0a9e29b333b82f287305051593cb Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Fri, 14 Apr 2017 23:08:41 -0400 Subject: - The lexer now takes cursors into account. --- new-luxc/source/luxc/parser.lux | 345 ++++++++++++++++++++++++------------- new-luxc/test/test/luxc/parser.lux | 4 +- 2 files changed, 230 insertions(+), 119 deletions(-) diff --git a/new-luxc/source/luxc/parser.lux b/new-luxc/source/luxc/parser.lux index 010911128..10f406bc7 100644 --- a/new-luxc/source/luxc/parser.lux +++ b/new-luxc/source/luxc/parser.lux @@ -1,11 +1,15 @@ (;module: lux (lux (control monad) - (data [char] + (data [bool] + [char] [text] [number] (text ["l" lexer #+ Lexer Monad "l/" Monad] - format)))) + format) + [product] + (coll [list "L/" Functor Fold] + ["V" vector])))) (def: default-cursor Cursor @@ -13,55 +17,85 @@ #;line +0 #;column +0}) -(def: space^ - (Lexer Text) - (l;some' l;space)) +(def: (space^ where) + (-> Cursor (Lexer [Text Cursor])) + (do Monad + [head (l;some' (l;one-of "\t\v \r\f"))] + (l;either (l;after (l;one-of "\n") + (do @ + [[tail end] (space^ (|> where + (update@ #;line n.inc) + (set@ #;column +0)))] + (wrap [(format head tail) + end]))) + (wrap [head + (|> where + (update@ #;column (n.+ (text;size head))))])))) -(def: single-line-comment^ - (Lexer Text) +(def: (single-line-comment^ where) + (-> Cursor (Lexer [Text Cursor])) (l;enclosed ["##" "\n"] - (l;some' (l;none-of "\n")))) + (do Monad + [comment (l;some' (l;none-of "\n"))] + (wrap [comment + (|> where + (update@ #;line n.inc) + (set@ #;column +0))])))) -(def: multi-line-comment^ +(def: comment-bound^ (Lexer Text) - (let [bound^ (l;alt (l;text ")#") - (l;text "#("))] - (l;rec - (function [multi-line-comment^] - (do Monad - [_ (l;text "#(") - chunks (l;some (l;either (l;some' (l;not bound^)) - (do @ - [sub multi-line-comment^] - (wrap (format "#(" sub ")#"))))) - _ (l;text ")#")] - (wrap (text;join-with "" chunks))))))) + ($_ l;either + (l;text "\n") + (l;text ")#") + (l;text "#("))) -(def: comment^ - (Lexer Text) - (l;either single-line-comment^ - multi-line-comment^)) +(def: (multi-line-comment^ where) + (-> Cursor (Lexer [Text Cursor])) + (do Monad + [_ (l;text "#(") + [comment end] (loop [comment "" + where (|> where + (update@ #;column (n.+ +2)))] + ($_ l;either + (do @ + [_ (l;one-of "\n")] + (recur (format comment "\n") + (|> where + (update@ #;line n.inc) + (set@ #;column +0)))) + (do @ + [chunk (l;some' (l;not comment-bound^))] + (recur (format comment chunk) + (|> where + (update@ #;column (n.+ (text;size chunk)))))) + (do @ + [[sub-comment sub-where] (multi-line-comment^ where)] + (wrap [(format comment "#(" sub-comment ")#") + sub-where])))) + _ (l;text ")#")] + (wrap [comment + (|> end + (update@ #;column (n.+ +2)))]))) -(def: padded^ - (All [a] (-> (Lexer a) (Lexer a))) - (let [padding^ (l;either space^ comment^)] - (|>. (l;before padding^) - (l;after padding^)))) +(def: (comment^ where) + (-> Cursor (Lexer [Text Cursor])) + (l;either (single-line-comment^ where) + (multi-line-comment^ where))) (def: escaped-char^ - (Lexer Char) + (Lexer [Text Char]) (l;after (l;char #"\\") (do Monad [code l;any] (case code - #"t" (wrap #"\t") - #"v" (wrap #"\v") - #"b" (wrap #"\b") - #"n" (wrap #"\n") - #"r" (wrap #"\r") - #"f" (wrap #"\f") - #"\"" (wrap #"\"") - #"\\" (wrap #"\\") + #"t" (wrap ["\\t" #"\t"]) + #"v" (wrap ["\\v" #"\v"]) + #"b" (wrap ["\\b" #"\b"]) + #"n" (wrap ["\\n" #"\n"]) + #"r" (wrap ["\\r" #"\r"]) + #"f" (wrap ["\\f" #"\f"]) + #"\"" (wrap ["\\\"" #"\""]) + #"\\" (wrap ["\\\\" #"\\"]) #"u" (do Monad @@ -69,7 +103,7 @@ (wrap (case (:: number;Hex@Codec decode (format "+" code)) (#;Right value) - (char;char value) + [(format "\\u" code) (char;char value)] _ (undefined)))) @@ -78,70 +112,137 @@ (l;fail (format "Invalid escaping syntax: " (%c code))))))) (def: raw-char^ - (Lexer Char) - (l;either (l;none-of "\\\"\n") + (Lexer [Text Char]) + (l;either (do Monad + [char (l;none-of "\\\"\n")] + (wrap [(char;as-text char) char])) escaped-char^)) -(do-template [ ] - [(def: - (Lexer AST) +(do-template [ ] + [(def: ( where) + (-> Cursor (Lexer [AST Cursor])) (do Monad - [value ] - (wrap [default-cursor ( value)])))] + [chunk ] + (case (:: decode chunk) + (#;Left error) + (l;fail error) + + (#;Right value) + (wrap [[default-cursor ( value)] + (|> where + (update@ #;column (n.+ (text;size chunk))))]))))] [bool^ #;BoolS - (l;either (l;after (l;text "true") (l/wrap true)) - (l;after (l;text "false") (l/wrap false)))] + (l;either (l;text "true") (l;text "false")) + bool;Codec] [nat^ #;NatS - (l;codec number;Codec - (do @ - [sign (l;text "+") - digits (l;many' l;digit)] - (wrap (format sign digits))))] + (l;seq' (l;text "+") (l;many' l;digit)) + number;Codec] [int^ #;IntS - (l;codec number;Codec - (do @ - [sign (l;opt (l;text "-")) - digits (l;many' l;digit)] - (wrap (format (default "" sign) - digits))))] + (l;seq' (l;default "" (l;text "-")) + (l;many' l;digit)) + number;Codec] [real^ #;RealS - (l;codec number;Codec - (do @ - [sign (l;opt (l;text "-")) - whole (l;many' l;digit) - _ (l;text ".") - frac (l;many' l;digit)] - (wrap (format (default "" sign) - whole - "." - frac))))] + ($_ l;seq' + (l;default "" (l;text "-")) + (l;many' l;digit) + (l;text ".") + (l;many' l;digit)) + number;Codec] [deg^ #;DegS - (l;codec number;Codec - (do @ - [_ (l;text ".") - frac (l;many' l;digit)] - (wrap (format "." frac))))] - [char^ #;CharS - (l;enclosed ["#\"" "\""] raw-char^)] - [text^ #;TextS - (l;enclosed ["\"" "\""] - (l;some' raw-char^))] + (l;seq' (l;text ".") + (l;many' l;digit)) + number;Codec] ) -(do-template [ ] - [(def: ( ast^) - (-> (Lexer AST) (Lexer AST)) +(def: (char^ where) + (-> Cursor (Lexer [AST Cursor])) + (do Monad + [[chunk value] (l;enclosed ["#\"" "\""] + raw-char^)] + (wrap [[default-cursor (#;CharS value)] + (|> where + (update@ #;column (function [column] + ($_ n.+ + +3 + column + (text;size chunk)))))]))) + +(def: (text^ where) + (-> Cursor (Lexer [AST Cursor])) + (do Monad + [_ (l;text "\"") + [columns-read text-read] (loop [columns-read +1 + text-read ""] + ($_ l;either + (do @ + [normal (l;many' (l;none-of "\\\"\n"))] + (recur (n.+ columns-read (text;size normal)) + (format text-read normal))) + (do @ + [_ (l;text "\"")] + (wrap [(n.inc columns-read) + text-read])) + (do @ + [[chunk char] escaped-char^] + (wrap [(n.+ columns-read (text;size chunk)) + (format text-read (char;as-text char))]))))] + (wrap [[default-cursor (#;TextS text-read)] + (|> where + (update@ #;column (n.+ columns-read)))]))) + +(do-template [ ] + [(def: ( where ast^) + (-> Cursor + (-> Cursor (Lexer [AST Cursor])) + (Lexer [AST Cursor])) (do Monad - [elems (l;enclosed [ ] - (l;some ))] - (wrap [default-cursor ( elems)])))] + [_ (l;text ) + [elems where'] (loop [elems (: (V;Vector AST) + V;empty) + where where] + (l;either (do @ + [[elem where'] (ast^ where)] + (recur (V;add elem elems) + where')) + (do @ + [[_ where'] (l;either (space^ where) + (comment^ where)) + _ (l;text )] + (wrap [(V;to-list elems) + (|> where' + (update@ #;column n.inc))]))))] + (wrap [[default-cursor ( elems)] + where'])))] - [form^ #;FormS "(" ")" ast^] - [tuple^ #;TupleS "[" "]" ast^] - [record^ #;RecordS "{" "}" (l;seq ast^ ast^)] + [form^ #;FormS "(" ")"] + [tuple^ #;TupleS "[" "]"] ) +(def: (record^ where ast^) + (-> Cursor + (-> Cursor (Lexer [AST Cursor])) + (Lexer [AST Cursor])) + (do Monad + [_ (l;text "{") + [elems where'] (loop [elems (: (V;Vector [AST AST]) + V;empty) + where where] + (l;either (do @ + [[key where] (ast^ where) + [val where'] (ast^ where)] + (recur (V;add [key val] elems) + where')) + (do @ + [[_ where'] (l;either (space^ where) + (comment^ where)) + _ (l;text "}")] + (wrap [(V;to-list elems) + (|> where' + (update@ #;column n.inc))]))))] + (wrap [[default-cursor (#;RecordS elems)] + where']))) + (def: ident-part^ (Lexer Text) (do Monad @@ -156,49 +257,59 @@ tail)))) (def: ident^ - (Lexer Ident) + (Lexer [Ident Nat]) ($_ l;either (do Monad [_ (l;text ";;") def-name ident-part^] (l;fail "Cannot handle ;; syntax for identifiers.")) - (l;seq (l;after (l;text ";") - (l/wrap "lux")) - ident-part^) + (do Monad + [_ (l;text ";") + def-name ident-part^] + (wrap [["lux" def-name] + (n.inc (text;size def-name))])) (do Monad [first-part ident-part^] (l;either (do @ [_ (l;char #";") second-part ident-part^] - (wrap [first-part second-part])) - (wrap ["" first-part]))))) + (wrap [[first-part second-part] + ($_ n.+ + (text;size first-part) + +1 + (text;size second-part))])) + (wrap [["" first-part] + (text;size first-part)]))))) (do-template [ ] - [(def: - (Lexer AST) + [(def: ( where) + (-> Cursor (Lexer [AST Cursor])) (do Monad - [value ] - (wrap [default-cursor ( value)])))] + [[value length] ] + (wrap [[default-cursor ( value)] + (|> where + (update@ #;column (n.+ length)))])))] [symbol^ #;SymbolS ident^] [tag^ #;TagS (l;after (l;char #"#") ident^)] ) -(def: #export ast^ - (Lexer AST) - (l;rec (function [ast^] - (padded^ - ($_ l;either - bool^ - nat^ - real^ - int^ - deg^ - char^ - text^ - symbol^ - tag^ - (form^ ast^) - (tuple^ ast^) - (record^ ast^) - ))))) +(def: #export (ast^ where) + (-> Cursor (Lexer [AST Cursor])) + (do Monad + [[_ where] (l;either (space^ where) + (comment^ where))] + ($_ l;either + (bool^ where) + (nat^ where) + (real^ where) + (int^ where) + (deg^ where) + (char^ where) + (text^ where) + (symbol^ where) + (tag^ where) + (form^ where ast^) + (tuple^ where ast^) + (record^ where ast^) + ))) diff --git a/new-luxc/test/test/luxc/parser.lux b/new-luxc/test/test/luxc/parser.lux index 7ccc0c451..d6b420660 100644 --- a/new-luxc/test/test/luxc/parser.lux +++ b/new-luxc/test/test/luxc/parser.lux @@ -71,11 +71,11 @@ (test: "Lux code parser." [sample ast^] (assert "Can parse Lux code." - (|> &;ast^ + (|> (&;ast^ default-cursor) (l;run (ast;to-text sample)) (case> (#;Left error) false - (#;Right parsed) + (#;Right [parsed _]) (:: ast;Eq = parsed sample)) ))) -- cgit v1.2.3