diff options
author | The Lux Programming Language | 2018-08-26 09:14:57 -0400 |
---|---|---|
committer | GitHub | 2018-08-26 09:14:57 -0400 |
commit | 2cfa4184f908054b7bb3c3cdc2372cfbeafdd5d2 (patch) | |
tree | 4223297955b046205c017b58cf31e490b26e8cea /luxc | |
parent | 58c299b90fbb3a20cf4e624fd20e4bb7f5846672 (diff) | |
parent | b614f2875fb2e98e8867399b7013503f2b1a4e4c (diff) |
Merge pull request #47 from LuxLang/faster-lexer
Faster new-luxc lexer/syntax
Diffstat (limited to '')
-rw-r--r-- | luxc/src/lux/analyser/proc/common.clj | 134 | ||||
-rw-r--r-- | luxc/src/lux/compiler/jvm/proc/common.clj | 49 | ||||
-rw-r--r-- | luxc/src/lux/compiler/jvm/rt.clj | 49 | ||||
-rw-r--r-- | luxc/src/lux/lexer.clj | 85 | ||||
-rw-r--r-- | luxc/src/lux/reader.clj | 15 |
5 files changed, 139 insertions, 193 deletions
diff --git a/luxc/src/lux/analyser/proc/common.clj b/luxc/src/lux/analyser/proc/common.clj index 8c6bd9d88..fbdf05546 100644 --- a/luxc/src/lux/analyser/proc/common.clj +++ b/luxc/src/lux/analyser/proc/common.clj @@ -6,7 +6,7 @@ [type :as &type]) (lux.analyser [base :as &&]))) -(defn ^:private analyse-lux-is [analyse exo-type ?values] +(defn- analyse-lux-is [analyse exo-type ?values] (&type/with-var (fn [$var] (|do [:let [(&/$Cons left (&/$Cons right (&/$Nil))) ?values] @@ -17,7 +17,7 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["lux" "is"]) (&/|list =left =right) (&/|list))))))))) -(defn ^:private analyse-lux-try [analyse exo-type ?values] +(defn- analyse-lux-try [analyse exo-type ?values] (&type/with-var (fn [$var] (|do [:let [(&/$Cons op (&/$Nil)) ?values] @@ -30,7 +30,7 @@ (&&/$proc (&/T ["lux" "try"]) (&/|list =op) (&/|list))))))))) (do-template [<name> <proc> <input-type> <output-type>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons x (&/$Cons y (&/$Nil))) ?values] =x (&&/analyse-1 analyse <input-type> x) =y (&&/analyse-1 analyse <input-type> y) @@ -39,13 +39,13 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <proc>) (&/|list =x =y) (&/|list))))))) - ^:private analyse-text-eq ["text" "="] &type/Text &type/Bit - ^:private analyse-text-lt ["text" "<"] &type/Text &type/Bit - ^:private analyse-text-concat ["text" "concat"] &type/Text &type/Text + analyse-text-eq ["text" "="] &type/Text &type/Bit + analyse-text-lt ["text" "<"] &type/Text &type/Bit + analyse-text-concat ["text" "concat"] &type/Text &type/Text ) (do-template [<name> <proc-name> <output-type>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons text (&/$Cons part (&/$Cons start (&/$Nil)))) ?values] =text (&&/analyse-1 analyse &type/Text text) =part (&&/analyse-1 analyse &type/Text part) @@ -57,15 +57,15 @@ (&/|list =text =part =start) (&/|list))))))) - ^:private analyse-text-index "index" (&/$Apply &type/Nat &type/Maybe) + analyse-text-index "index" (&/$Apply &type/Nat &type/Maybe) ) -(defn ^:private analyse-text-clip [analyse exo-type ?values] +(defn- analyse-text-clip [analyse exo-type ?values] (|do [:let [(&/$Cons text (&/$Cons from (&/$Cons to (&/$Nil)))) ?values] =text (&&/analyse-1 analyse &type/Text text) =from (&&/analyse-1 analyse &type/Nat from) =to (&&/analyse-1 analyse &type/Nat to) - _ (&type/check exo-type (&/$Apply &type/Text &type/Maybe)) + _ (&type/check exo-type &type/Text) _cursor &/cursor] (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["text" "clip"]) @@ -73,7 +73,7 @@ (&/|list))))))) (do-template [<name> <proc>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons text (&/$Nil)) ?values] =text (&&/analyse-1 analyse &type/Text text) _ (&type/check exo-type &type/Nat) @@ -83,14 +83,14 @@ (&/|list =text) (&/|list))))))) - ^:private analyse-text-size "size" + analyse-text-size "size" ) -(defn ^:private analyse-text-char [analyse exo-type ?values] +(defn- analyse-text-char [analyse exo-type ?values] (|do [:let [(&/$Cons text (&/$Cons idx (&/$Nil))) ?values] =text (&&/analyse-1 analyse &type/Text text) =idx (&&/analyse-1 analyse &type/Nat idx) - _ (&type/check exo-type (&/$Apply &type/Nat &type/Maybe)) + _ (&type/check exo-type &type/Nat) _cursor &/cursor] (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["text" "char"]) @@ -100,7 +100,7 @@ (do-template [<name> <op>] (let [inputT (&/$Apply &type/Any &type/I64) outputT &type/I64] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons mask (&/$Cons input (&/$Nil))) ?values] =mask (&&/analyse-1 analyse inputT mask) =input (&&/analyse-1 analyse inputT input) @@ -109,15 +109,15 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["i64" <op>]) (&/|list =input =mask) (&/|list)))))))) - ^:private analyse-i64-and "and" - ^:private analyse-i64-or "or" - ^:private analyse-i64-xor "xor" + analyse-i64-and "and" + analyse-i64-or "or" + analyse-i64-xor "xor" ) (do-template [<name> <op>] (let [inputT (&/$Apply &type/Any &type/I64) outputT &type/I64] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons shift (&/$Cons input (&/$Nil))) ?values] =shift (&&/analyse-1 analyse &type/Nat shift) =input (&&/analyse-1 analyse inputT input) @@ -126,15 +126,15 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["i64" <op>]) (&/|list =input =shift) (&/|list)))))))) - ^:private analyse-i64-left-shift "left-shift" - ^:private analyse-i64-arithmetic-right-shift "arithmetic-right-shift" - ^:private analyse-i64-logical-right-shift "logical-right-shift" + analyse-i64-left-shift "left-shift" + analyse-i64-arithmetic-right-shift "arithmetic-right-shift" + analyse-i64-logical-right-shift "logical-right-shift" ) (do-template [<name> <proc> <input-type> <output-type>] (let [inputT <input-type> outputT <output-type>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons subjectC (&/$Cons paramC (&/$Nil))) ?values] subjectA (&&/analyse-1 analyse <input-type> subjectC) paramA (&&/analyse-1 analyse <input-type> paramC) @@ -143,15 +143,15 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <proc>) (&/|list subjectA paramA) (&/|list)))))))) - ^:private analyse-i64-eq ["i64" "="] (&/$Apply &type/Any &type/I64) &type/Bit - ^:private analyse-i64-add ["i64" "+"] (&/$Apply &type/Any &type/I64) &type/I64 - ^:private analyse-i64-sub ["i64" "-"] (&/$Apply &type/Any &type/I64) &type/I64 + analyse-i64-eq ["i64" "="] (&/$Apply &type/Any &type/I64) &type/Bit + analyse-i64-add ["i64" "+"] (&/$Apply &type/Any &type/I64) &type/I64 + analyse-i64-sub ["i64" "-"] (&/$Apply &type/Any &type/I64) &type/I64 ) (do-template [<name> <proc> <input-type> <output-type>] (let [inputT <input-type> outputT <output-type>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons x (&/$Cons y (&/$Nil))) ?values] =x (&&/analyse-1 analyse <input-type> x) =y (&&/analyse-1 analyse <input-type> y) @@ -160,22 +160,22 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <proc>) (&/|list =x =y) (&/|list)))))))) - ^:private analyse-int-mul ["int" "*"] &type/Int &type/Int - ^:private analyse-int-div ["int" "/"] &type/Int &type/Int - ^:private analyse-int-rem ["int" "%"] &type/Int &type/Int - ^:private analyse-int-lt ["int" "<"] &type/Int &type/Bit - - ^:private analyse-frac-add ["frac" "+"] &type/Frac &type/Frac - ^:private analyse-frac-sub ["frac" "-"] &type/Frac &type/Frac - ^:private analyse-frac-mul ["frac" "*"] &type/Frac &type/Frac - ^:private analyse-frac-div ["frac" "/"] &type/Frac &type/Frac - ^:private analyse-frac-rem ["frac" "%"] &type/Frac &type/Frac - ^:private analyse-frac-eq ["frac" "="] &type/Frac &type/Bit - ^:private analyse-frac-lt ["frac" "<"] &type/Frac &type/Bit + analyse-int-mul ["int" "*"] &type/Int &type/Int + analyse-int-div ["int" "/"] &type/Int &type/Int + analyse-int-rem ["int" "%"] &type/Int &type/Int + analyse-int-lt ["int" "<"] &type/Int &type/Bit + + analyse-frac-add ["frac" "+"] &type/Frac &type/Frac + analyse-frac-sub ["frac" "-"] &type/Frac &type/Frac + analyse-frac-mul ["frac" "*"] &type/Frac &type/Frac + analyse-frac-div ["frac" "/"] &type/Frac &type/Frac + analyse-frac-rem ["frac" "%"] &type/Frac &type/Frac + analyse-frac-eq ["frac" "="] &type/Frac &type/Bit + analyse-frac-lt ["frac" "<"] &type/Frac &type/Bit ) (do-template [<encode> <encode-op> <decode> <decode-op> <type>] - (do (defn <encode> [analyse exo-type ?values] + (do (defn- <encode> [analyse exo-type ?values] (|do [:let [(&/$Cons x (&/$Nil)) ?values] =x (&&/analyse-1 analyse <type> x) _ (&type/check exo-type &type/Text) @@ -184,7 +184,7 @@ (&&/$proc (&/T <encode-op>) (&/|list =x) (&/|list))))))) (let [decode-type (&/$Apply <type> &type/Maybe)] - (defn <decode> [analyse exo-type ?values] + (defn- <decode> [analyse exo-type ?values] (|do [:let [(&/$Cons x (&/$Nil)) ?values] =x (&&/analyse-1 analyse &type/Text x) _ (&type/check exo-type decode-type) @@ -192,24 +192,24 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <decode-op>) (&/|list =x) (&/|list))))))))) - ^:private analyse-frac-encode ["frac" "encode"] ^:private analyse-frac-decode ["frac" "decode"] &type/Frac + analyse-frac-encode ["frac" "encode"] analyse-frac-decode ["frac" "decode"] &type/Frac ) (do-template [<name> <type> <op>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Nil) ?values] _ (&type/check exo-type <type>) _cursor &/cursor] (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <op>) (&/|list) (&/|list))))))) - ^:private analyse-frac-smallest &type/Frac ["frac" "smallest"] - ^:private analyse-frac-min &type/Frac ["frac" "min"] - ^:private analyse-frac-max &type/Frac ["frac" "max"] + analyse-frac-smallest &type/Frac ["frac" "smallest"] + analyse-frac-min &type/Frac ["frac" "min"] + analyse-frac-max &type/Frac ["frac" "max"] ) (do-template [<name> <from-type> <to-type> <op>] - (defn <name> [analyse exo-type ?values] + (defn- <name> [analyse exo-type ?values] (|do [:let [(&/$Cons x (&/$Nil)) ?values] =x (&&/analyse-1 analyse <from-type> x) _ (&type/check exo-type <to-type>) @@ -217,22 +217,44 @@ (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T <op>) (&/|list =x) (&/|list))))))) - ^:private analyse-int-char &type/Int &type/Text ["int" "char"] - ^:private analyse-int-frac &type/Int &type/Frac ["int" "frac"] - ^:private analyse-frac-int &type/Frac &type/Int ["frac" "int"] + analyse-int-char &type/Int &type/Text ["int" "char"] + analyse-int-frac &type/Int &type/Frac ["int" "frac"] + analyse-frac-int &type/Frac &type/Int ["frac" "int"] - ^:private analyse-io-log &type/Text &type/Any ["io" "log"] - ^:private analyse-io-error &type/Text &type/Nothing ["io" "error"] - ^:private analyse-io-exit &type/Int &type/Nothing ["io" "exit"] + analyse-io-log &type/Text &type/Any ["io" "log"] + analyse-io-error &type/Text &type/Nothing ["io" "error"] + analyse-io-exit &type/Int &type/Nothing ["io" "exit"] ) -(defn ^:private analyse-io-current-time [analyse exo-type ?values] +(defn- analyse-io-current-time [analyse exo-type ?values] (|do [:let [(&/$Nil) ?values] _ (&type/check exo-type &type/Int) _cursor &/cursor] (return (&/|list (&&/|meta exo-type _cursor (&&/$proc (&/T ["io" "current-time"]) (&/|list) (&/|list))))))) +(defn- analyse-syntax-char-case! [analyse exo-type ?values] + (|do [:let [(&/$Cons ?input (&/$Cons [_ (&/$Tuple ?pairs)] (&/$Cons ?else (&/$Nil)))) ?values] + _cursor &/cursor + =input (&&/analyse-1 analyse &type/Nat ?input) + _ (assert! (even? (&/|length ?pairs)) "The number of matches must be even!") + =pairs (&/map% (fn [?pair] + (|let [[[_ (&/$Tuple ?patterns)] ?match] ?pair] + (|do [=match (&&/analyse-1 analyse exo-type ?match)] + (return (&/T [(&/|map (fn [?pattern] + (|let [[_ (&/$Text ^String ?pattern-char)] ?pattern] + (int (.charAt ?pattern-char 0)))) + ?patterns) + =match]))))) + (&/|as-pairs ?pairs)) + =else (&&/analyse-1 analyse exo-type ?else)] + (return (&/|list (&&/|meta exo-type _cursor + (&&/$proc (&/T ["lux" "syntax char case!"]) + (&/|list =input + (&&/|meta exo-type _cursor (&&/$tuple (&/|map &/|second =pairs))) + =else) + (&/|map &/|first =pairs))))))) + (defn analyse-proc [analyse exo-type proc ?values] (try (case proc "lux is" (analyse-lux-is analyse exo-type ?values) @@ -281,6 +303,10 @@ "lux frac min" (analyse-frac-min analyse exo-type ?values) "lux frac max" (analyse-frac-max analyse exo-type ?values) "lux frac int" (analyse-frac-int analyse exo-type ?values) + + ;; Special extensions for performance reasons + ;; Will be replaced by custom extensions in the future. + "lux syntax char case!" (analyse-syntax-char-case! analyse exo-type ?values) ;; else (&/fail-with-loc (str "[Analyser Error] Unknown host procedure: " proc))) diff --git a/luxc/src/lux/compiler/jvm/proc/common.clj b/luxc/src/lux/compiler/jvm/proc/common.clj index 36f23263d..5cff63d86 100644 --- a/luxc/src/lux/compiler/jvm/proc/common.clj +++ b/luxc/src/lux/compiler/jvm/proc/common.clj @@ -263,7 +263,7 @@ &&/unwrap-long (.visitInsn Opcodes/L2I))] :let [_ (doto *writer* - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "text_clip" "(Ljava/lang/String;II)[Ljava/lang/Object;"))]] + (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "substring" "(II)Ljava/lang/String;"))]] (return nil))) (defn ^:private compile-text-index [compile ?values special-args] @@ -322,7 +322,9 @@ :let [_ (doto *writer* &&/unwrap-long (.visitInsn Opcodes/L2I) - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "text_char" "(Ljava/lang/String;I)[Ljava/lang/Object;"))]] + (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "charAt" "(I)C") + (.visitInsn Opcodes/I2L) + &&/wrap-long)]] (return nil))) (defn ^:private compile-io-log [compile ?values special-args] @@ -369,12 +371,53 @@ &&/wrap-long)]] (return nil))) +(defn ^:private compile-syntax-char-case! [compile ?values ?patterns] + (|do [:let [(&/$Cons ?input (&/$Cons [_ (&a/$tuple ?matches)] (&/$Cons ?else (&/$Nil)))) ?values] + ^MethodVisitor *writer* &/get-writer + :let [pattern-labels (&/|map (fn [_] (new Label)) ?patterns) + matched-patterns (->> (&/zip2 ?patterns pattern-labels) + (&/flat-map (fn [?chars+?label] + (|let [[?chars ?label] ?chars+?label] + (&/|map (fn [?char] + (&/T [?char ?label])) + ?chars)))) + &/->seq + (sort-by &/|first <) + &/->list) + end-label (new Label) + else-label (new Label)] + _ (compile ?input) + :let [_ (doto *writer* + &&/unwrap-long + (.visitInsn Opcodes/L2I) + (.visitLookupSwitchInsn else-label + (int-array (&/->seq (&/|map &/|first matched-patterns))) + (into-array (&/->seq (&/|map &/|second matched-patterns)))))] + _ (&/map% (fn [?label+?match] + (|let [[?label ?match] ?label+?match] + (|do [:let [_ (doto *writer* + (.visitLabel ?label))] + _ (compile ?match) + :let [_ (doto *writer* + (.visitJumpInsn Opcodes/GOTO end-label))]] + (return nil)))) + (&/zip2 pattern-labels ?matches)) + :let [_ (doto *writer* + (.visitLabel else-label))] + _ (compile ?else) + :let [_ (doto *writer* + (.visitLabel end-label))]] + (return nil))) + (defn compile-proc [compile category proc ?values special-args] (case category "lux" (case proc "is" (compile-lux-is compile ?values special-args) - "try" (compile-lux-try compile ?values special-args)) + "try" (compile-lux-try compile ?values special-args) + ;; Special extensions for performance reasons + ;; Will be replaced by custom extensions in the future. + "syntax char case!" (compile-syntax-char-case! compile ?values special-args)) "io" (case proc diff --git a/luxc/src/lux/compiler/jvm/rt.clj b/luxc/src/lux/compiler/jvm/rt.clj index 6b9aeb680..948f08805 100644 --- a/luxc/src/lux/compiler/jvm/rt.clj +++ b/luxc/src/lux/compiler/jvm/rt.clj @@ -339,52 +339,6 @@ (.visitEnd))] nil)) -(defn ^:private compile-LuxRT-text-methods [^ClassWriter =class] - (do (let [$from (new Label) - $to (new Label) - $handler (new Label) - $end (new Label)] - (doto (.visitMethod =class (+ Opcodes/ACC_PUBLIC Opcodes/ACC_STATIC) "text_clip" "(Ljava/lang/String;II)[Ljava/lang/Object;" nil nil) - (.visitCode) - (.visitTryCatchBlock $from $to $handler "java/lang/IndexOutOfBoundsException") - (.visitLabel $from) - (.visitVarInsn Opcodes/ALOAD 0) - (.visitVarInsn Opcodes/ILOAD 1) - (.visitVarInsn Opcodes/ILOAD 2) - (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "substring" "(II)Ljava/lang/String;") - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_some" "(Ljava/lang/Object;)[Ljava/lang/Object;") - (.visitJumpInsn Opcodes/GOTO $end) - (.visitLabel $to) - (.visitLabel $handler) - (.visitInsn Opcodes/POP) - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_none" "()[Ljava/lang/Object;") - (.visitLabel $end) - (.visitInsn Opcodes/ARETURN) - (.visitMaxs 0 0) - (.visitEnd))) - (let [$from (new Label) - $to (new Label) - $handler (new Label)] - (doto (.visitMethod =class (+ Opcodes/ACC_PUBLIC Opcodes/ACC_STATIC) "text_char" "(Ljava/lang/String;I)[Ljava/lang/Object;" nil nil) - (.visitCode) - (.visitTryCatchBlock $from $to $handler "java/lang/IndexOutOfBoundsException") - (.visitLabel $from) - (.visitVarInsn Opcodes/ALOAD 0) - (.visitVarInsn Opcodes/ILOAD 1) - (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "codePointAt" "(I)I") - (.visitInsn Opcodes/I2L) - &&/wrap-long - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_some" "(Ljava/lang/Object;)[Ljava/lang/Object;") - (.visitInsn Opcodes/ARETURN) - (.visitLabel $to) - (.visitLabel $handler) - (.visitInsn Opcodes/POP) - (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_none" "()[Ljava/lang/Object;") - (.visitInsn Opcodes/ARETURN) - (.visitMaxs 0 0) - (.visitEnd))) - nil)) - (def compile-LuxRT-class (|do [_ (return nil) :let [full-name &&/lux-utils-class @@ -489,7 +443,6 @@ (compile-LuxRT-pm-methods) (compile-LuxRT-adt-methods) (compile-LuxRT-int-methods) - (compile-LuxRT-frac-methods) - (compile-LuxRT-text-methods))]] + (compile-LuxRT-frac-methods))]] (&&/save-class! (second (string/split &&/lux-utils-class #"/")) (.toByteArray (doto =class .visitEnd))))) diff --git a/luxc/src/lux/lexer.clj b/luxc/src/lux/lexer.clj index f74e14dfd..8ed75b940 100644 --- a/luxc/src/lux/lexer.clj +++ b/luxc/src/lux/lexer.clj @@ -26,76 +26,12 @@ ) ;; [Utils] -(defn ^:private clean-line [^String raw-line] - "(-> Text Text)" - (let [line-length (.length raw-line) - buffer (new StringBuffer line-length)] - (loop [idx 0] - (if (< idx line-length) - (let [current-char (.charAt raw-line idx)] - (if (= \\ current-char) - (do (assert (< (+ 1 idx) line-length) (str "[Lexer Error] Text is too short for escaping: " raw-line " " idx)) - (case (.charAt raw-line (+ 1 idx)) - \t (do (.append buffer "\t") - (recur (+ 2 idx))) - \v (do (.append buffer "\u000B") - (recur (+ 2 idx))) - \b (do (.append buffer "\b") - (recur (+ 2 idx))) - \n (do (.append buffer "\n") - (recur (+ 2 idx))) - \r (do (.append buffer "\r") - (recur (+ 2 idx))) - \f (do (.append buffer "\f") - (recur (+ 2 idx))) - \" (do (.append buffer "\"") - (recur (+ 2 idx))) - \\ (do (.append buffer "\\") - (recur (+ 2 idx))) - \u (do (assert (< (+ 5 idx) line-length) (str "[Lexer Error] Text is too short for unicode-escaping: " raw-line " " idx)) - (.append buffer (char (Integer/valueOf (.substring raw-line (+ 2 idx) (+ 6 idx)) 16))) - (recur (+ 6 idx))) - ;; else - (assert false (str "[Lexer Error] Invalid escaping syntax: " raw-line " " idx)))) - (do (.append buffer current-char) - (recur (+ 1 idx))))) - (.toString buffer))))) - -(defn ^:private lex-text-body [multi-line? offset] - (|do [[_ eol? ^String pre-quotes**] (&reader/read-regex #"^([^\"]*)") - ^String pre-quotes* (if multi-line? - (|do [:let [empty-line? (and eol? (= "" pre-quotes**))] - _ (&/assert! (or empty-line? - (>= (.length pre-quotes**) offset)) - "Each line of a multi-line text must have an appropriate offset!")] - (return (if empty-line? - "\n" - (str "\n" (.substring pre-quotes** offset))))) - (return pre-quotes**)) - [pre-quotes post-quotes] (if (.endsWith pre-quotes* "\\") - (if eol? - (&/fail-with-loc "[Lexer Error] Cannot leave dangling back-slash \\") - (if (if-let [^String back-slashes (re-find #"\\+$" pre-quotes*)] - (odd? (.length back-slashes))) - (|do [[_ eol?* _] (&reader/read-regex #"^([\"])") - next-part (lex-text-body eol?* offset)] - (return (&/T [(.substring pre-quotes* 0 (dec (.length pre-quotes*))) - (str "\"" next-part)]))) - (|do [post-quotes* (lex-text-body false offset)] - (return (&/T [pre-quotes* post-quotes*]))))) - (if eol? - (|do [next-part (lex-text-body true offset)] - (return (&/T [pre-quotes* - next-part]))) - (return (&/T [pre-quotes* ""]))))] - (return (str (clean-line pre-quotes) post-quotes)))) - (def lex-text (|do [[meta _ _] (&reader/read-text "\"") :let [[_ _ _column] meta] - token (lex-text-body false (inc _column)) + [_ _ ^String content] (&reader/read-regex #"^([^\"]*)") _ (&reader/read-text "\"")] - (return (&/T [meta ($Text token)])))) + (return (&/T [meta ($Text content)])))) (def +ident-re+ #"^([^0-9\[\]\{\}\(\)\s\"#.][^\[\]\{\}\(\)\s\"#.]*)") @@ -105,26 +41,11 @@ (|do [[meta _ white-space] (&reader/read-regex #"^(\s+|$)")] (return (&/T [meta ($White_Space white-space)])))) -(def ^:private lex-single-line-comment +(def ^:private lex-comment (|do [_ (&reader/read-text "##") [meta _ comment] (&reader/read-regex #"^(.*)$")] (return (&/T [meta ($Comment comment)])))) -(defn ^:private lex-multi-line-comment [_] - (|do [_ (&reader/read-text "#(") - [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex+ #"(?is)^(?!#\()((?!\)#).)*")] - (return (&/T [meta comment]))) - (|do [[meta pre] (&reader/read-regex+ #"(?is)^((?!#\().)*") - [_ ($Comment inner)] (lex-multi-line-comment nil) - [_ post] (&reader/read-regex+ #"(?is)^((?!\)#).)*")] - (return (&/T [meta (str pre "#(" inner ")#" post)]))))) - _ (&reader/read-text ")#")] - (return (&/T [meta ($Comment comment)])))) - -(def ^:private lex-comment - (&/try-all% (&/|list lex-single-line-comment - (lex-multi-line-comment nil)))) - (do-template [<name> <tag> <regex>] (def <name> (|do [[meta _ token] (&reader/read-regex <regex>)] diff --git a/luxc/src/lux/reader.clj b/luxc/src/lux/reader.clj index 5f4aa8afe..14914cc2e 100644 --- a/luxc/src/lux/reader.clj +++ b/luxc/src/lux/reader.clj @@ -11,7 +11,7 @@ ("Yes" 2)) ;; [Utils] -(defn ^:private with-line [body] +(defn- with-line [body] (fn [state] (|case (&/get$ &/$source state) (&/$Nil) @@ -32,7 +32,7 @@ output)) ))) -(defn ^:private with-lines [body] +(defn- with-lines [body] (fn [state] (|case (body (&/get$ &/$source state)) (&/$Right reader* match) @@ -43,7 +43,7 @@ ((&/fail-with-loc msg) state) ))) -(defn ^:private re-find! [^java.util.regex.Pattern regex column ^String line] +(defn- re-find! [^java.util.regex.Pattern regex column ^String line] (let [matcher (doto (.matcher regex line) (.region column (.length line)) (.useAnchoringBounds true))] @@ -63,8 +63,9 @@ (&/T [(&/T [file-name line-num column-num*]) line])))) ($No (str "[Reader Error] Pattern failed: " regex)))))) -(defn read-regex? [regex] +(defn read-regex? "(-> Regex (Reader (Maybe Text)))" + [regex] (with-line (fn [file-name line-num column-num ^String line] (if-let [^String match (re-find! regex column-num line)] @@ -101,8 +102,9 @@ (&/T [(&/T [file-name line-num column-num]) prefix*])])))) (&/$Left (str "[Reader Error] Pattern failed: " regex)))))))) -(defn read-text [^String text] +(defn read-text "(-> Text (Reader Text))" + [^String text] (with-line (fn [file-name line-num column-num ^String line] (if (.startsWith line text column-num) @@ -114,8 +116,9 @@ (&/T [(&/T [file-name line-num column-num*]) line])))) ($No (str "[Reader Error] Text failed: " text)))))) -(defn read-text? [^String text] +(defn read-text? "(-> Text (Reader (Maybe Text)))" + [^String text] (with-line (fn [file-name line-num column-num ^String line] (if (.startsWith line text column-num) |