From 748267663bb0d1e67eb674c010bd97b00b03fff3 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Mon, 15 Feb 2016 01:24:03 -0400 Subject: - Can now use unicode characters in identifiers. - Can now escape unicode characters in both Char and Text. --- src/lux/lexer.clj | 14 ++++++++++---- src/lux/reader.clj | 21 --------------------- 2 files changed, 10 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/lux/lexer.clj b/src/lux/lexer.clj index 772ce2353..cbb351703 100644 --- a/src/lux/lexer.clj +++ b/src/lux/lexer.clj @@ -52,8 +52,13 @@ :else (assert false (str "[Lexer Error] Unknown escape character: " escaped)))) +(defn ^:private escape-unicode [unicode] + (str (char (Integer/valueOf (.substring unicode 2) 16)))) + (defn ^:private clean-line [raw-line] - (string/replace raw-line #"\\." escape-char*)) + (-> raw-line + (string/replace #"\\u[0-9a-fA-F]{4}" escape-unicode) + (string/replace #"\\." escape-char*))) (defn ^:private lex-text-body [offset] (|do [[_ eol? ^String pre-quotes] (&reader/read-regex #"^([^\"]*)") @@ -85,9 +90,8 @@ _ (&reader/read-text "\"")] (return (&/T [meta ($Text token)])))) -(def ^:private +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'`:\~\?][0-9a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'`:\~\?]*)" - ;; #"^([^0-9\[\]\(\)\{\};#\s\"][^\[\]\(\)\{\};#\s\"]*)" - ) +(def ^:private +ident-re+ + #"^([^0-9\[\]\{\}\(\)\s\"#;][^\[\]\{\}\(\)\s\"#;]*)") ;; [Lexers] (def ^:private lex-white-space @@ -128,6 +132,8 @@ (|do [[meta _ _] (&reader/read-text "#\"") token (&/try-all% (&/|list (|do [[_ _ escaped] (&reader/read-regex #"^(\\.)")] (escape-char escaped)) + (|do [[_ _ ^String unicode] (&reader/read-regex #"^(\\u[0-9a-fA-F]{4})")] + (return (str (char (Integer/valueOf (.substring unicode 2) 16))))) (|do [[_ _ char] (&reader/read-regex #"^(.)")] (return char)))) _ (&reader/read-text "\"")] diff --git a/src/lux/reader.clj b/src/lux/reader.clj index 72d0ee11b..f0509ec19 100644 --- a/src/lux/reader.clj +++ b/src/lux/reader.clj @@ -55,15 +55,6 @@ (when (.find matcher) (.group matcher 0)))) -(defn ^:private re-find3! [^java.util.regex.Pattern regex column ^String line] - (let [matcher (doto (.matcher regex line) - (.region column (.length line)) - (.useAnchoringBounds true))] - (when (.find matcher) - (list (.group matcher 0) - (.group matcher 1) - (.group matcher 2))))) - ;; [Exports] (defn read-regex [regex] (with-line @@ -77,18 +68,6 @@ (&/T [(&/T [file-name line-num column-num*]) line])))) ($No (str "[Reader Error] Pattern failed: " regex)))))) -(defn read-regex2 [regex] - (with-line - (fn [file-name line-num column-num ^String line] - (if-let [[^String match tok1 tok2] (re-find3! regex column-num line)] - (let [match-length (.length match) - column-num* (+ column-num match-length)] - (if (= column-num* (.length line)) - ($Done (&/T [(&/T [file-name line-num column-num]) true (&/T [tok1 tok2])])) - ($Yes (&/T [(&/T [file-name line-num column-num]) false (&/T [tok1 tok2])]) - (&/T [(&/T [file-name line-num column-num*]) line])))) - ($No (str "[Reader Error] Pattern failed: " regex)))))) - (defn read-regex+ [regex] (with-lines (fn [reader] -- cgit v1.2.3