From 8aefdf11b42ebde7dc1877b62b2f067cd52ebb10 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Fri, 14 Apr 2017 20:33:36 -0400 Subject: - Initial parser implementation (still missing correct cursors and multi-line text). --- new-luxc/source/luxc/parser.lux | 204 ++++++++++++++++++++++++++++++++++++++++ new-luxc/source/program.lux | 5 +- 2 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 new-luxc/source/luxc/parser.lux (limited to 'new-luxc') diff --git a/new-luxc/source/luxc/parser.lux b/new-luxc/source/luxc/parser.lux new file mode 100644 index 000000000..585184e6f --- /dev/null +++ b/new-luxc/source/luxc/parser.lux @@ -0,0 +1,204 @@ +(;module: + lux + (lux (control monad) + (data [char] + [text] + [number] + (text ["l" lexer #+ Lexer Monad<Lexer> "l/" Monad<Lexer>] + format)))) + +(def: default-cursor + Cursor + {#;module "" + #;line +0 + #;column +0}) + +(def: space^ + (Lexer Text) + (l;some' l;space)) + +(def: single-line-comment^ + (Lexer Text) + (l;enclosed ["##" "\n"] + (l;some' (l;none-of "\n")))) + +(def: multi-line-comment^ + (Lexer Text) + (let [bound^ (l;alt (l;text ")#") + (l;text "#("))] + (l;rec + (function [multi-line-comment^] + (do Monad<Lexer> + [_ (l;text "#(") + chunks (l;some (l;either (l;some' (l;not bound^)) + (do @ + [sub multi-line-comment^] + (wrap (format "#(" sub ")#"))))) + _ (l;text ")#")] + (wrap (text;join-with "" chunks))))))) + +(def: comment^ + (Lexer Text) + (l;either single-line-comment^ + multi-line-comment^)) + +(def: padded^ + (All [a] (-> (Lexer a) (Lexer a))) + (let [padding^ (l;either space^ comment^)] + (|>. (l;before padding^) + (l;after padding^)))) + +(def: escaped-char^ + (Lexer Char) + (l;after (l;char #"\\") + (do Monad<Lexer> + [code l;any] + (case code + #"t" (wrap #"\t") + #"v" (wrap #"\v") + #"b" (wrap #"\b") + #"n" (wrap #"\n") + #"r" (wrap #"\r") + #"f" (wrap #"\f") + #"\"" (wrap #"\"") + #"\\" (wrap #"\\") + + #"u" + (do Monad<Lexer> + [code (l;between' +1 +4 l;hex-digit)] + (wrap (case (:: number;Hex@Codec<Text,Nat> decode + (format "+" code)) + (#;Right value) + (char;char value) + + _ + (undefined)))) + + _ + (l;fail (format "Invalid escaping syntax: " (%c code))))))) + +(def: raw-char^ + (Lexer Char) + (l;either (l;none-of "\\\"\n") + escaped-char^)) + +(do-template [<name> <tag> <lexer>] + [(def: <name> + (Lexer AST) + (do Monad<Lexer> + [value <lexer>] + (wrap [default-cursor (<tag> value)])))] + + [bool^ #;BoolS + (l;either (l;after (l;text "true") (l/wrap true)) + (l;after (l;text "false") (l/wrap false)))] + [nat^ #;NatS + (l;codec number;Codec<Text,Nat> + (do @ + [sign (l;text "+") + digits (l;many' l;digit)] + (wrap (format sign digits))))] + [int^ #;IntS + (l;codec number;Codec<Text,Int> + (do @ + [sign (l;opt (l;text "-")) + digits (l;many' l;digit)] + (wrap (format (default "" sign) + digits))))] + [real^ #;RealS + (l;codec number;Codec<Text,Real> + (do @ + [sign (l;opt (l;text "-")) + whole (l;many' l;digit) + _ (l;text ".") + frac (l;many' l;digit)] + (wrap (format (default "" sign) + whole + "." + frac))))] + [deg^ #;DegS + (l;codec number;Codec<Text,Deg> + (do @ + [_ (l;text ".") + frac (l;many' l;digit)] + (wrap (format "." frac))))] + [char^ #;CharS + (l;enclosed ["#\"" "\""] raw-char^)] + [text^ #;TextS + (l;enclosed ["\"" "\""] + (l;some' raw-char^))] + ) + +(do-template [<name> <tag> <open> <close> <lexer>] + [(def: (<name> ast^) + (-> (Lexer AST) (Lexer AST)) + (do Monad<Lexer> + [elems (l;enclosed [<open> <close>] + (l;some <lexer>))] + (wrap [default-cursor (<tag> elems)])))] + + [form^ #;FormS "(" ")" ast^] + [tuple^ #;TupleS "[" "]" ast^] + [record^ #;RecordS "{" "}" (l;seq ast^ ast^)] + ) + +(def: ident-part^ + (Lexer Text) + (do Monad<Lexer> + [#let [digits "0123456789" + delimiters "()[]{}#;" + space "\t\v \n\r\f" + head-lexer (l;none-of (format digits delimiters space)) + tail-lexer (l;some' (l;none-of (format delimiters space)))] + head head-lexer + tail tail-lexer] + (wrap (format (char;as-text head) + tail)))) + +(def: ident^ + (Lexer Ident) + ($_ l;either + (do Monad<Lexer> + [_ (l;text ";;") + def-name ident-part^] + (l;fail "Cannot handle ;; syntax for identifiers.")) + (l;seq (l;after (l;text ";") + (l/wrap "lux")) + ident-part^) + (do Monad<Lexer> + [first-part ident-part^] + (l;either (do @ + [_ (l;char #";") + second-part ident-part^] + (wrap [first-part second-part])) + (wrap ["" first-part]))))) + +(do-template [<name> <tag> <lexer>] + [(def: <name> + (Lexer AST) + (do Monad<Lexer> + [value <lexer>] + (wrap [default-cursor (<tag> value)])))] + + [symbol^ #;SymbolS ident^] + [tag^ #;TagS (l;after (l;char #"#") ident^)] + ) + +(def: ast^ + (Lexer AST) + (l;rec (function [ast^] + (padded^ + ($_ l;either + bool^ + nat^ + real^ + int^ + deg^ + char^ + text^ + symbol^ + tag^ + (form^ ast^) + (tuple^ ast^) + (record^ ast^) + ))))) diff --git a/new-luxc/source/program.lux b/new-luxc/source/program.lux index dbe875d12..b1619e830 100644 --- a/new-luxc/source/program.lux +++ b/new-luxc/source/program.lux @@ -2,7 +2,8 @@ lux (lux (control monad) [io #- run] - [cli #+ program: CLI Monad<CLI>])) + [cli #+ program: CLI Monad<CLI>]) + (luxc ["&;" parser])) (type: Path Text) @@ -25,7 +26,7 @@ #sources (List Path)}) (def: (marker tokens) - (-> Text (CLI Unit)) + (-> (List Text) (CLI Unit)) (cli;after (cli;option tokens) (:: Monad<CLI> wrap []))) -- cgit v1.2.3