diff options
Diffstat (limited to '')
-rw-r--r-- | source/luxc/lexer.lux | 119 | ||||
-rw-r--r-- | source/luxc/parser.lux | 72 | ||||
-rw-r--r-- | source/luxc/util.lux (renamed from source/util.lux) | 0 |
3 files changed, 191 insertions, 0 deletions
diff --git a/source/luxc/lexer.lux b/source/luxc/lexer.lux new file mode 100644 index 000000000..ed86be68f --- /dev/null +++ b/source/luxc/lexer.lux @@ -0,0 +1,119 @@ +(use ./util #as &util #refer [do return fail try-all]) + +## [Utils] +(def (lex-regex regex) + ...) + +(def (lex-regex2 regex) + ...) + +(def (lex-prefix prefix) + ...) + +(def (escape-char escaped) + (case escaped + "\\t" (return "\t") + "\\b" (return "\b") + "\\n" (return "\n") + "\\r" (return "\r") + "\\f" (return "\f") + "\\\"" (return "\"") + "\\\\" (return "\\") + _ (fail (fold concat "" (list "[Lexer Error] Unknown escape character: " escaped))))) + +(defrec lex-text-body + (try-all (list (do [[prefix escaped] (lex-regex2 "(?s)^([^\\\"\\\\]*)(\\\\.)") + unescaped (escape-char escaped) + postfix lex-text-body] + (return (str prefix unescaped postfix))) + (lex-regex "(?s)^([^\\\"\\\\]*)^")))) + +(def +ident-re+ ...) + +## [Lexers] +(def lex-white-space + (do [white-space (lex-regex #"^(\s+)")] + (return (#White-Space white-space)))) + +(def lex-single-line-comment + (do [_ (lex-prefix "##") + comment (lex-regex #"^([^\n]*)") + _ (lex-regex #"^(\n?)")] + (return (#Comment comment)))) + +(def lex-multi-line-comment + (do [_ (lex-prefix "#(") + comment (try-all (list (lex-regex #"(?is)^((?!#\().)*?(?=\)#)") + (do [pre (lex-regex #"(?is)^(.+?(?=#\())") + [_ inner] lex-multi-line-comment + post (lex-regex #"(?is)^(.+?(?=\)#))")] + (return (fold concat "" (list pre "#(" inner ")#" post)))))) + _ (lex-prefix ")#")] + (return (#Comment comment)))) + +(def lex-comment + (try-all (list lex-single-line-comment + lex-multi-line-comment))) + +(do-template [<name> <tag> <regex>] + (def <name> + (do [token (lex-regex <regex>)] + (return (<tag> token)))) + + lex-bool #Bool #"^(true|false)" + lex-real #Real #"^(0|[1-9][0-9]*)\.[0-9]+" + lex-int #Int #"^(0|[1-9][0-9]*)" + lex-ident #Ident +ident-re+) + +(def lex-char + (do [_ (lex-prefix "#\"") + token (try-all (list (do [escaped (lex-regex #"^(\\.)")] + (escape-char escaped)) + (lex-regex #"^(.)"))) + _ (lex-prefix "\"")] + (return (#Char token)))) + +(def lex-text + (do [_ (lex-prefix "\"") + token lex-text-body + _ (lex-prefix "\"")] + (return (#Text token)))) + +(def lex-tag + (do [_ (lex-prefix "#") + token (lex-regex +ident-re+)] + (return (#Tag token)))) + +(do-template [<name> <delim> <tag>] + (def <name> + (do [_ (lex-prefix <delim>)] + (return <tag>))) + + lex-open-paren "(" #Open-Paren + lex-close-paren ")" #Close-Paren + lex-open-bracket "[" #Open-Bracket + lex-close-bracket "]" #Close-Bracket + lex-open-brace "{" #Open-Brace + lex-close-brace "}" #Close-Brace + ) + +(def lex-delimiter + (try-all (list lex-open-paren + lex-close-paren + lex-open-bracket + lex-close-bracket + lex-open-brace + lex-close-brace))) + +;; [Interface] +(def #export lex + (try-all (list lex-white-space + lex-comment + lex-bool + lex-real + lex-int + lex-char + lex-text + lex-ident + lex-tag + lex-delimiter))) diff --git a/source/luxc/parser.lux b/source/luxc/parser.lux new file mode 100644 index 000000000..35ec12b17 --- /dev/null +++ b/source/luxc/parser.lux @@ -0,0 +1,72 @@ +(use ./util #as &util #refer [do return fail try-all repeat]) +(use ./lexer #as &lexer) + +;; [Utils] +(do-template [<name> <close-token> <description> <tag>] + (def (<name> parse) + (do [elems (repeat parse) + token &lexer:lex] + (case token + <close-token> + (return (list (<tag> (fold ++ (list) elems)))) + + _ + (fail (concat (list "[Parser Error] Unbalanced " <description> ".")))))) + + parse-form #&lexer:Close-Paren "parantheses" #Form + parse-tuple #&lexer:Close-Bracket "brackets" #Tuple + ) + +(def (parse-record parse) + (do [elems* (repeat parse) + token &lexer:lex + #let [elems (fold ++ (list) elems*)]] + (case token + #&lexer:Close-Bracket + (if (odd? (size elems)) + (fail "[Parser Error] Records must have an even number of elements.") + (return (list (#Record elems)))) + + _ + (fail "[Parser Error] Unbalanced braces.")))) + +;; [Interface] +(def parse + (do [token &lexer/lex] + (match token + (#&lexer:White-Space _) + (return (list)) + + (#&lexer:Comment _) + (return (list)) + + (#&lexer:Bool ?value) + (return (list [#Bool (jvm:invokestatic Boolean "parseBoolean" [String] [?value])])) + + (#&lexer:Int ?value) + (return (list [#Int (jvm:invokestatic Integer "parseInt" [String] [?value])])) + + (#&lexer:Real ?value) + (return (list [#Real (jvm:invokestatic Float "parseFloat" [String] [?value])])) + + (#&lexer:Char ?value) + (return (list [#Char (jvm:invokevirtual String "charAt" [int] ?value [0])])) + + (#&lexer:Text ?value) + (return (list [#Text ?value])) + + (#&lexer:Ident ?value) + (return (list [#Ident ?value])) + + (#&lexer:Tag ?value) + (return (list [#Tag ?value])) + + #&lexer:Open-Paren + (parse-form parse) + + #&lexer:Open-Bracket + (parse-tuple parse) + + #&lexer:Open-Brace + (parse-record parse) + ))) diff --git a/source/util.lux b/source/luxc/util.lux index 88b035571..88b035571 100644 --- a/source/util.lux +++ b/source/luxc/util.lux |