From 43c3c1bc3d09c85104cda4d9bfed2327f7f9e4f0 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Sun, 17 Jan 2016 20:18:05 -0400 Subject: - Modified the syntax for multi-line text so line-delimiters (\ \) are no longer necessary, and necessary indentation is deduced from the column of the initial double-quote (") delimiting the text. - Modified some functions in the reader so they give back information as to whether the line was fully consumed or not after each reading. --- src/lux/lexer.clj | 93 +++++++++++++++++++++++++----------------------------- src/lux/reader.clj | 12 +++---- 2 files changed, 49 insertions(+), 56 deletions(-) diff --git a/src/lux/lexer.clj b/src/lux/lexer.clj index 59f49d6a1..74efc9fc4 100644 --- a/src/lux/lexer.clj +++ b/src/lux/lexer.clj @@ -55,36 +55,35 @@ (defn ^:private clean-line [raw-line] (string/replace raw-line #"\\." escape-char*)) -(def ^:private lex-text-line - (&reader/read-regex #"^(.*) \\$")) - -(def ^:private lext-text-line-prefix - (&reader/read-regex #"^(\s*\\ )")) - -(defn ^:private lex-text-next-line [within-multiline? lex-text-body] - (&/try-all% (&/|list (if within-multiline? - (|do [[_ blank-line] (&reader/read-regex #"^()$") - next-part (lex-text-next-line within-multiline? lex-text-body)] - (return (str "\n" next-part))) - (fail "")) - (|do [[_ line-prefix] lext-text-line-prefix - next-part lex-text-body] - (return (str "\n" next-part)))))) - -(defn ^:private lex-text-body [within-multiline?] - (&/try-all% (&/|list (|do [[_ ^String this-line*] lex-text-line - :let [this-line (.substring this-line* 0 (- (.length this-line*) 2))] - next-lines (lex-text-next-line true (lex-text-body true))] - (return (str (clean-line this-line) - next-lines)) - ) - (|do [[_ ^String pre-quotes] (&reader/read-regex #"^([^\"]*)") - post-quotes (if (.endsWith pre-quotes "\\") - (|do [_ (&reader/read-regex #"^([\"])") - next-part (lex-text-body within-multiline?)] - (return (str "\"" next-part))) - (return ""))] - (return (clean-line (str pre-quotes post-quotes))))))) +(defn ^:private lex-text-body [offset] + (|do [[_ eol? ^String pre-quotes] (&reader/read-regex #"^([^\"]*)") + post-quotes (if (.endsWith pre-quotes "\\") + (if eol? + (fail "[Lexer Error] Can't leave dangling back-slash \\") + (|do [_ (&reader/read-regex #"^([\"])") + next-part (lex-text-body offset)] + (return (str "\"" next-part)))) + (if eol? + (|do [[_ _ ^String line-prefix] (&reader/read-regex #"^( +|$)") + :let [empty-line? (= "" line-prefix)] + _ (&/assert! (or empty-line? + (>= (.length line-prefix) offset)) + "Each line of a multi-line text must have an appropriate offset!") + next-part (lex-text-body offset)] + (return (str "\n" + (if empty-line? + "" + (.substring line-prefix offset)) + next-part))) + (return "")))] + (return (clean-line (str pre-quotes post-quotes))))) + +(def ^:private lex-text + (|do [[meta _ _] (&reader/read-text "\"") + :let [[_ _ _column] meta] + token (lex-text-body (inc _column)) + _ (&reader/read-text "\"")] + (return (&/T [meta (&/V $Text token)])))) (def ^:private +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'`:\~\?][0-9a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'`:\~\?]*)" ;; #"^([^0-9\[\]\(\)\{\};#\s\"][^\[\]\(\)\{\};#\s\"]*)" @@ -92,12 +91,12 @@ ;; [Lexers] (def ^:private lex-white-space - (|do [[meta white-space] (&reader/read-regex #"^(\s+|$)")] + (|do [[meta _ white-space] (&reader/read-regex #"^(\s+|$)")] (return (&/T [meta (&/V $White_Space white-space)])))) (def ^:private lex-single-line-comment (|do [_ (&reader/read-text "##") - [meta comment] (&reader/read-regex #"^(.*)$")] + [meta _ comment] (&reader/read-regex #"^(.*)$")] (return (&/T [meta (&/V $Comment comment)])))) (defn ^:private lex-multi-line-comment [_] @@ -117,7 +116,7 @@ (do-template [ ] (def - (|do [[meta token] (&reader/read-regex )] + (|do [[meta _ token] (&reader/read-regex )] (return (&/T [meta (&/V token)])))) ^:private lex-bool $Bool #"^(true|false)" @@ -126,24 +125,18 @@ ) (def ^:private lex-char - (|do [[meta _] (&reader/read-text "#\"") - token (&/try-all% (&/|list (|do [[_ escaped] (&reader/read-regex #"^(\\.)")] + (|do [[meta _ _] (&reader/read-text "#\"") + token (&/try-all% (&/|list (|do [[_ _ escaped] (&reader/read-regex #"^(\\.)")] (escape-char escaped)) - (|do [[_ char] (&reader/read-regex #"^(.)")] + (|do [[_ _ char] (&reader/read-regex #"^(.)")] (return char)))) _ (&reader/read-text "\"")] (return (&/T [meta (&/V $Char token)])))) -(def ^:private lex-text - (|do [[meta _] (&reader/read-text "\"") - token (lex-text-body false) - _ (&reader/read-text "\"")] - (return (&/T [meta (&/V $Text token)])))) - (def ^:private lex-ident - (&/try-all% (&/|list (|do [[meta token] (&reader/read-regex +ident-re+)] + (&/try-all% (&/|list (|do [[meta _ token] (&reader/read-regex +ident-re+)] (&/try-all% (&/|list (|do [_ (&reader/read-text ";") - [_ local-token] (&reader/read-regex +ident-re+) + [_ _ local-token] (&reader/read-regex +ident-re+) ? (&module/exists? token)] (if ? (return (&/T [meta (&/T [token local-token])])) @@ -151,12 +144,12 @@ (return (&/T [meta (&/T [unaliased local-token])]))))) (return (&/T [meta (&/T ["" token])])) ))) - (|do [[meta _] (&reader/read-text ";;") - [_ token] (&reader/read-regex +ident-re+) + (|do [[meta _ _] (&reader/read-text ";;") + [_ _ token] (&reader/read-regex +ident-re+) module-name &/get-module-name] (return (&/T [meta (&/T [module-name token])]))) - (|do [[meta _] (&reader/read-text ";") - [_ token] (&reader/read-regex +ident-re+)] + (|do [[meta _ _] (&reader/read-text ";") + [_ _ token] (&reader/read-regex +ident-re+)] (return (&/T [meta (&/T ["lux" token])]))) ))) @@ -165,13 +158,13 @@ (return (&/T [meta (&/V $Symbol ident)])))) (def ^:private lex-tag - (|do [[meta _] (&reader/read-text "#") + (|do [[meta _ _] (&reader/read-text "#") [_ ident] lex-ident] (return (&/T [meta (&/V $Tag ident)])))) (do-template [ ] (def - (|do [[meta _] (&reader/read-text )] + (|do [[meta _ _] (&reader/read-text )] (return (&/T [meta (&/V &/unit-tag)])))) ^:private lex-open-paren "(" $Open_Paren diff --git a/src/lux/reader.clj b/src/lux/reader.clj index ab3aed73e..2ff8c4093 100644 --- a/src/lux/reader.clj +++ b/src/lux/reader.clj @@ -72,8 +72,8 @@ (let [match-length (.length match) column-num* (+ column-num match-length)] (if (= column-num* (.length line)) - (&/V $Done (&/T [(&/T [file-name line-num column-num]) match])) - (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) match]) + (&/V $Done (&/T [(&/T [file-name line-num column-num]) true match])) + (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) false match]) (&/T [(&/T [file-name line-num column-num*]) line])])))) (&/V $No (str "[Reader Error] Pattern failed: " regex)))))) @@ -84,8 +84,8 @@ (let [match-length (.length match) column-num* (+ column-num match-length)] (if (= column-num* (.length line)) - (&/V $Done (&/T [(&/T [file-name line-num column-num]) (&/T [tok1 tok2])])) - (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) (&/T [tok1 tok2])]) + (&/V $Done (&/T [(&/T [file-name line-num column-num]) true (&/T [tok1 tok2])])) + (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) false (&/T [tok1 tok2])]) (&/T [(&/T [file-name line-num column-num*]) line])])))) (&/V $No (str "[Reader Error] Pattern failed: " regex)))))) @@ -120,8 +120,8 @@ (let [match-length (.length text) column-num* (+ column-num match-length)] (if (= column-num* (.length line)) - (&/V $Done (&/T [(&/T [file-name line-num column-num]) text])) - (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) text]) + (&/V $Done (&/T [(&/T [file-name line-num column-num]) true text])) + (&/V $Yes (&/T [(&/T [(&/T [file-name line-num column-num]) false text]) (&/T [(&/T [file-name line-num column-num*]) line])])))) (&/V $No (str "[Reader Error] Text failed: " text)))))) -- cgit v1.2.3