From 38fe9e91f451d9682ff7edf65fc395b85ddde961 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 10 Feb 2015 02:04:46 -0400 Subject: Super refactoring that breaks the system: Part 1 --- source/luxc/lexer.lux | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 source/luxc/lexer.lux (limited to 'source/luxc/lexer.lux') diff --git a/source/luxc/lexer.lux b/source/luxc/lexer.lux new file mode 100644 index 000000000..ed86be68f --- /dev/null +++ b/source/luxc/lexer.lux @@ -0,0 +1,119 @@ +(use ./util #as &util #refer [do return fail try-all]) + +## [Utils] +(def (lex-regex regex) + ...) + +(def (lex-regex2 regex) + ...) + +(def (lex-prefix prefix) + ...) + +(def (escape-char escaped) + (case escaped + "\\t" (return "\t") + "\\b" (return "\b") + "\\n" (return "\n") + "\\r" (return "\r") + "\\f" (return "\f") + "\\\"" (return "\"") + "\\\\" (return "\\") + _ (fail (fold concat "" (list "[Lexer Error] Unknown escape character: " escaped))))) + +(defrec lex-text-body + (try-all (list (do [[prefix escaped] (lex-regex2 "(?s)^([^\\\"\\\\]*)(\\\\.)") + unescaped (escape-char escaped) + postfix lex-text-body] + (return (str prefix unescaped postfix))) + (lex-regex "(?s)^([^\\\"\\\\]*)^")))) + +(def +ident-re+ ...) + +## [Lexers] +(def lex-white-space + (do [white-space (lex-regex #"^(\s+)")] + (return (#White-Space white-space)))) + +(def lex-single-line-comment + (do [_ (lex-prefix "##") + comment (lex-regex #"^([^\n]*)") + _ (lex-regex #"^(\n?)")] + (return (#Comment comment)))) + +(def lex-multi-line-comment + (do [_ (lex-prefix "#(") + comment (try-all (list (lex-regex #"(?is)^((?!#\().)*?(?=\)#)") + (do [pre (lex-regex #"(?is)^(.+?(?=#\())") + [_ inner] lex-multi-line-comment + post (lex-regex #"(?is)^(.+?(?=\)#))")] + (return (fold concat "" (list pre "#(" inner ")#" post)))))) + _ (lex-prefix ")#")] + (return (#Comment comment)))) + +(def lex-comment + (try-all (list lex-single-line-comment + lex-multi-line-comment))) + +(do-template [ ] + (def + (do [token (lex-regex )] + (return ( token)))) + + lex-bool #Bool #"^(true|false)" + lex-real #Real #"^(0|[1-9][0-9]*)\.[0-9]+" + lex-int #Int #"^(0|[1-9][0-9]*)" + lex-ident #Ident +ident-re+) + +(def lex-char + (do [_ (lex-prefix "#\"") + token (try-all (list (do [escaped (lex-regex #"^(\\.)")] + (escape-char escaped)) + (lex-regex #"^(.)"))) + _ (lex-prefix "\"")] + (return (#Char token)))) + +(def lex-text + (do [_ (lex-prefix "\"") + token lex-text-body + _ (lex-prefix "\"")] + (return (#Text token)))) + +(def lex-tag + (do [_ (lex-prefix "#") + token (lex-regex +ident-re+)] + (return (#Tag token)))) + +(do-template [ ] + (def + (do [_ (lex-prefix )] + (return ))) + + lex-open-paren "(" #Open-Paren + lex-close-paren ")" #Close-Paren + lex-open-bracket "[" #Open-Bracket + lex-close-bracket "]" #Close-Bracket + lex-open-brace "{" #Open-Brace + lex-close-brace "}" #Close-Brace + ) + +(def lex-delimiter + (try-all (list lex-open-paren + lex-close-paren + lex-open-bracket + lex-close-bracket + lex-open-brace + lex-close-brace))) + +;; [Interface] +(def #export lex + (try-all (list lex-white-space + lex-comment + lex-bool + lex-real + lex-int + lex-char + lex-text + lex-ident + lex-tag + lex-delimiter))) -- cgit v1.2.3