aboutsummaryrefslogtreecommitdiff
path: root/luxc
diff options
context:
space:
mode:
authorThe Lux Programming Language2018-08-26 09:14:57 -0400
committerGitHub2018-08-26 09:14:57 -0400
commit2cfa4184f908054b7bb3c3cdc2372cfbeafdd5d2 (patch)
tree4223297955b046205c017b58cf31e490b26e8cea /luxc
parent58c299b90fbb3a20cf4e624fd20e4bb7f5846672 (diff)
parentb614f2875fb2e98e8867399b7013503f2b1a4e4c (diff)
Merge pull request #47 from LuxLang/faster-lexer
Faster new-luxc lexer/syntax
Diffstat (limited to '')
-rw-r--r--luxc/src/lux/analyser/proc/common.clj134
-rw-r--r--luxc/src/lux/compiler/jvm/proc/common.clj49
-rw-r--r--luxc/src/lux/compiler/jvm/rt.clj49
-rw-r--r--luxc/src/lux/lexer.clj85
-rw-r--r--luxc/src/lux/reader.clj15
5 files changed, 139 insertions, 193 deletions
diff --git a/luxc/src/lux/analyser/proc/common.clj b/luxc/src/lux/analyser/proc/common.clj
index 8c6bd9d88..fbdf05546 100644
--- a/luxc/src/lux/analyser/proc/common.clj
+++ b/luxc/src/lux/analyser/proc/common.clj
@@ -6,7 +6,7 @@
[type :as &type])
(lux.analyser [base :as &&])))
-(defn ^:private analyse-lux-is [analyse exo-type ?values]
+(defn- analyse-lux-is [analyse exo-type ?values]
(&type/with-var
(fn [$var]
(|do [:let [(&/$Cons left (&/$Cons right (&/$Nil))) ?values]
@@ -17,7 +17,7 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["lux" "is"]) (&/|list =left =right) (&/|list)))))))))
-(defn ^:private analyse-lux-try [analyse exo-type ?values]
+(defn- analyse-lux-try [analyse exo-type ?values]
(&type/with-var
(fn [$var]
(|do [:let [(&/$Cons op (&/$Nil)) ?values]
@@ -30,7 +30,7 @@
(&&/$proc (&/T ["lux" "try"]) (&/|list =op) (&/|list)))))))))
(do-template [<name> <proc> <input-type> <output-type>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons x (&/$Cons y (&/$Nil))) ?values]
=x (&&/analyse-1 analyse <input-type> x)
=y (&&/analyse-1 analyse <input-type> y)
@@ -39,13 +39,13 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <proc>) (&/|list =x =y) (&/|list)))))))
- ^:private analyse-text-eq ["text" "="] &type/Text &type/Bit
- ^:private analyse-text-lt ["text" "<"] &type/Text &type/Bit
- ^:private analyse-text-concat ["text" "concat"] &type/Text &type/Text
+ analyse-text-eq ["text" "="] &type/Text &type/Bit
+ analyse-text-lt ["text" "<"] &type/Text &type/Bit
+ analyse-text-concat ["text" "concat"] &type/Text &type/Text
)
(do-template [<name> <proc-name> <output-type>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons text (&/$Cons part (&/$Cons start (&/$Nil)))) ?values]
=text (&&/analyse-1 analyse &type/Text text)
=part (&&/analyse-1 analyse &type/Text part)
@@ -57,15 +57,15 @@
(&/|list =text =part =start)
(&/|list)))))))
- ^:private analyse-text-index "index" (&/$Apply &type/Nat &type/Maybe)
+ analyse-text-index "index" (&/$Apply &type/Nat &type/Maybe)
)
-(defn ^:private analyse-text-clip [analyse exo-type ?values]
+(defn- analyse-text-clip [analyse exo-type ?values]
(|do [:let [(&/$Cons text (&/$Cons from (&/$Cons to (&/$Nil)))) ?values]
=text (&&/analyse-1 analyse &type/Text text)
=from (&&/analyse-1 analyse &type/Nat from)
=to (&&/analyse-1 analyse &type/Nat to)
- _ (&type/check exo-type (&/$Apply &type/Text &type/Maybe))
+ _ (&type/check exo-type &type/Text)
_cursor &/cursor]
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["text" "clip"])
@@ -73,7 +73,7 @@
(&/|list)))))))
(do-template [<name> <proc>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons text (&/$Nil)) ?values]
=text (&&/analyse-1 analyse &type/Text text)
_ (&type/check exo-type &type/Nat)
@@ -83,14 +83,14 @@
(&/|list =text)
(&/|list)))))))
- ^:private analyse-text-size "size"
+ analyse-text-size "size"
)
-(defn ^:private analyse-text-char [analyse exo-type ?values]
+(defn- analyse-text-char [analyse exo-type ?values]
(|do [:let [(&/$Cons text (&/$Cons idx (&/$Nil))) ?values]
=text (&&/analyse-1 analyse &type/Text text)
=idx (&&/analyse-1 analyse &type/Nat idx)
- _ (&type/check exo-type (&/$Apply &type/Nat &type/Maybe))
+ _ (&type/check exo-type &type/Nat)
_cursor &/cursor]
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["text" "char"])
@@ -100,7 +100,7 @@
(do-template [<name> <op>]
(let [inputT (&/$Apply &type/Any &type/I64)
outputT &type/I64]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons mask (&/$Cons input (&/$Nil))) ?values]
=mask (&&/analyse-1 analyse inputT mask)
=input (&&/analyse-1 analyse inputT input)
@@ -109,15 +109,15 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["i64" <op>]) (&/|list =input =mask) (&/|list))))))))
- ^:private analyse-i64-and "and"
- ^:private analyse-i64-or "or"
- ^:private analyse-i64-xor "xor"
+ analyse-i64-and "and"
+ analyse-i64-or "or"
+ analyse-i64-xor "xor"
)
(do-template [<name> <op>]
(let [inputT (&/$Apply &type/Any &type/I64)
outputT &type/I64]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons shift (&/$Cons input (&/$Nil))) ?values]
=shift (&&/analyse-1 analyse &type/Nat shift)
=input (&&/analyse-1 analyse inputT input)
@@ -126,15 +126,15 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["i64" <op>]) (&/|list =input =shift) (&/|list))))))))
- ^:private analyse-i64-left-shift "left-shift"
- ^:private analyse-i64-arithmetic-right-shift "arithmetic-right-shift"
- ^:private analyse-i64-logical-right-shift "logical-right-shift"
+ analyse-i64-left-shift "left-shift"
+ analyse-i64-arithmetic-right-shift "arithmetic-right-shift"
+ analyse-i64-logical-right-shift "logical-right-shift"
)
(do-template [<name> <proc> <input-type> <output-type>]
(let [inputT <input-type>
outputT <output-type>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons subjectC (&/$Cons paramC (&/$Nil))) ?values]
subjectA (&&/analyse-1 analyse <input-type> subjectC)
paramA (&&/analyse-1 analyse <input-type> paramC)
@@ -143,15 +143,15 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <proc>) (&/|list subjectA paramA) (&/|list))))))))
- ^:private analyse-i64-eq ["i64" "="] (&/$Apply &type/Any &type/I64) &type/Bit
- ^:private analyse-i64-add ["i64" "+"] (&/$Apply &type/Any &type/I64) &type/I64
- ^:private analyse-i64-sub ["i64" "-"] (&/$Apply &type/Any &type/I64) &type/I64
+ analyse-i64-eq ["i64" "="] (&/$Apply &type/Any &type/I64) &type/Bit
+ analyse-i64-add ["i64" "+"] (&/$Apply &type/Any &type/I64) &type/I64
+ analyse-i64-sub ["i64" "-"] (&/$Apply &type/Any &type/I64) &type/I64
)
(do-template [<name> <proc> <input-type> <output-type>]
(let [inputT <input-type>
outputT <output-type>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons x (&/$Cons y (&/$Nil))) ?values]
=x (&&/analyse-1 analyse <input-type> x)
=y (&&/analyse-1 analyse <input-type> y)
@@ -160,22 +160,22 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <proc>) (&/|list =x =y) (&/|list))))))))
- ^:private analyse-int-mul ["int" "*"] &type/Int &type/Int
- ^:private analyse-int-div ["int" "/"] &type/Int &type/Int
- ^:private analyse-int-rem ["int" "%"] &type/Int &type/Int
- ^:private analyse-int-lt ["int" "<"] &type/Int &type/Bit
-
- ^:private analyse-frac-add ["frac" "+"] &type/Frac &type/Frac
- ^:private analyse-frac-sub ["frac" "-"] &type/Frac &type/Frac
- ^:private analyse-frac-mul ["frac" "*"] &type/Frac &type/Frac
- ^:private analyse-frac-div ["frac" "/"] &type/Frac &type/Frac
- ^:private analyse-frac-rem ["frac" "%"] &type/Frac &type/Frac
- ^:private analyse-frac-eq ["frac" "="] &type/Frac &type/Bit
- ^:private analyse-frac-lt ["frac" "<"] &type/Frac &type/Bit
+ analyse-int-mul ["int" "*"] &type/Int &type/Int
+ analyse-int-div ["int" "/"] &type/Int &type/Int
+ analyse-int-rem ["int" "%"] &type/Int &type/Int
+ analyse-int-lt ["int" "<"] &type/Int &type/Bit
+
+ analyse-frac-add ["frac" "+"] &type/Frac &type/Frac
+ analyse-frac-sub ["frac" "-"] &type/Frac &type/Frac
+ analyse-frac-mul ["frac" "*"] &type/Frac &type/Frac
+ analyse-frac-div ["frac" "/"] &type/Frac &type/Frac
+ analyse-frac-rem ["frac" "%"] &type/Frac &type/Frac
+ analyse-frac-eq ["frac" "="] &type/Frac &type/Bit
+ analyse-frac-lt ["frac" "<"] &type/Frac &type/Bit
)
(do-template [<encode> <encode-op> <decode> <decode-op> <type>]
- (do (defn <encode> [analyse exo-type ?values]
+ (do (defn- <encode> [analyse exo-type ?values]
(|do [:let [(&/$Cons x (&/$Nil)) ?values]
=x (&&/analyse-1 analyse <type> x)
_ (&type/check exo-type &type/Text)
@@ -184,7 +184,7 @@
(&&/$proc (&/T <encode-op>) (&/|list =x) (&/|list)))))))
(let [decode-type (&/$Apply <type> &type/Maybe)]
- (defn <decode> [analyse exo-type ?values]
+ (defn- <decode> [analyse exo-type ?values]
(|do [:let [(&/$Cons x (&/$Nil)) ?values]
=x (&&/analyse-1 analyse &type/Text x)
_ (&type/check exo-type decode-type)
@@ -192,24 +192,24 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <decode-op>) (&/|list =x) (&/|list)))))))))
- ^:private analyse-frac-encode ["frac" "encode"] ^:private analyse-frac-decode ["frac" "decode"] &type/Frac
+ analyse-frac-encode ["frac" "encode"] analyse-frac-decode ["frac" "decode"] &type/Frac
)
(do-template [<name> <type> <op>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Nil) ?values]
_ (&type/check exo-type <type>)
_cursor &/cursor]
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <op>) (&/|list) (&/|list)))))))
- ^:private analyse-frac-smallest &type/Frac ["frac" "smallest"]
- ^:private analyse-frac-min &type/Frac ["frac" "min"]
- ^:private analyse-frac-max &type/Frac ["frac" "max"]
+ analyse-frac-smallest &type/Frac ["frac" "smallest"]
+ analyse-frac-min &type/Frac ["frac" "min"]
+ analyse-frac-max &type/Frac ["frac" "max"]
)
(do-template [<name> <from-type> <to-type> <op>]
- (defn <name> [analyse exo-type ?values]
+ (defn- <name> [analyse exo-type ?values]
(|do [:let [(&/$Cons x (&/$Nil)) ?values]
=x (&&/analyse-1 analyse <from-type> x)
_ (&type/check exo-type <to-type>)
@@ -217,22 +217,44 @@
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T <op>) (&/|list =x) (&/|list)))))))
- ^:private analyse-int-char &type/Int &type/Text ["int" "char"]
- ^:private analyse-int-frac &type/Int &type/Frac ["int" "frac"]
- ^:private analyse-frac-int &type/Frac &type/Int ["frac" "int"]
+ analyse-int-char &type/Int &type/Text ["int" "char"]
+ analyse-int-frac &type/Int &type/Frac ["int" "frac"]
+ analyse-frac-int &type/Frac &type/Int ["frac" "int"]
- ^:private analyse-io-log &type/Text &type/Any ["io" "log"]
- ^:private analyse-io-error &type/Text &type/Nothing ["io" "error"]
- ^:private analyse-io-exit &type/Int &type/Nothing ["io" "exit"]
+ analyse-io-log &type/Text &type/Any ["io" "log"]
+ analyse-io-error &type/Text &type/Nothing ["io" "error"]
+ analyse-io-exit &type/Int &type/Nothing ["io" "exit"]
)
-(defn ^:private analyse-io-current-time [analyse exo-type ?values]
+(defn- analyse-io-current-time [analyse exo-type ?values]
(|do [:let [(&/$Nil) ?values]
_ (&type/check exo-type &type/Int)
_cursor &/cursor]
(return (&/|list (&&/|meta exo-type _cursor
(&&/$proc (&/T ["io" "current-time"]) (&/|list) (&/|list)))))))
+(defn- analyse-syntax-char-case! [analyse exo-type ?values]
+ (|do [:let [(&/$Cons ?input (&/$Cons [_ (&/$Tuple ?pairs)] (&/$Cons ?else (&/$Nil)))) ?values]
+ _cursor &/cursor
+ =input (&&/analyse-1 analyse &type/Nat ?input)
+ _ (assert! (even? (&/|length ?pairs)) "The number of matches must be even!")
+ =pairs (&/map% (fn [?pair]
+ (|let [[[_ (&/$Tuple ?patterns)] ?match] ?pair]
+ (|do [=match (&&/analyse-1 analyse exo-type ?match)]
+ (return (&/T [(&/|map (fn [?pattern]
+ (|let [[_ (&/$Text ^String ?pattern-char)] ?pattern]
+ (int (.charAt ?pattern-char 0))))
+ ?patterns)
+ =match])))))
+ (&/|as-pairs ?pairs))
+ =else (&&/analyse-1 analyse exo-type ?else)]
+ (return (&/|list (&&/|meta exo-type _cursor
+ (&&/$proc (&/T ["lux" "syntax char case!"])
+ (&/|list =input
+ (&&/|meta exo-type _cursor (&&/$tuple (&/|map &/|second =pairs)))
+ =else)
+ (&/|map &/|first =pairs)))))))
+
(defn analyse-proc [analyse exo-type proc ?values]
(try (case proc
"lux is" (analyse-lux-is analyse exo-type ?values)
@@ -281,6 +303,10 @@
"lux frac min" (analyse-frac-min analyse exo-type ?values)
"lux frac max" (analyse-frac-max analyse exo-type ?values)
"lux frac int" (analyse-frac-int analyse exo-type ?values)
+
+ ;; Special extensions for performance reasons
+ ;; Will be replaced by custom extensions in the future.
+ "lux syntax char case!" (analyse-syntax-char-case! analyse exo-type ?values)
;; else
(&/fail-with-loc (str "[Analyser Error] Unknown host procedure: " proc)))
diff --git a/luxc/src/lux/compiler/jvm/proc/common.clj b/luxc/src/lux/compiler/jvm/proc/common.clj
index 36f23263d..5cff63d86 100644
--- a/luxc/src/lux/compiler/jvm/proc/common.clj
+++ b/luxc/src/lux/compiler/jvm/proc/common.clj
@@ -263,7 +263,7 @@
&&/unwrap-long
(.visitInsn Opcodes/L2I))]
:let [_ (doto *writer*
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "text_clip" "(Ljava/lang/String;II)[Ljava/lang/Object;"))]]
+ (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "substring" "(II)Ljava/lang/String;"))]]
(return nil)))
(defn ^:private compile-text-index [compile ?values special-args]
@@ -322,7 +322,9 @@
:let [_ (doto *writer*
&&/unwrap-long
(.visitInsn Opcodes/L2I)
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "text_char" "(Ljava/lang/String;I)[Ljava/lang/Object;"))]]
+ (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "charAt" "(I)C")
+ (.visitInsn Opcodes/I2L)
+ &&/wrap-long)]]
(return nil)))
(defn ^:private compile-io-log [compile ?values special-args]
@@ -369,12 +371,53 @@
&&/wrap-long)]]
(return nil)))
+(defn ^:private compile-syntax-char-case! [compile ?values ?patterns]
+ (|do [:let [(&/$Cons ?input (&/$Cons [_ (&a/$tuple ?matches)] (&/$Cons ?else (&/$Nil)))) ?values]
+ ^MethodVisitor *writer* &/get-writer
+ :let [pattern-labels (&/|map (fn [_] (new Label)) ?patterns)
+ matched-patterns (->> (&/zip2 ?patterns pattern-labels)
+ (&/flat-map (fn [?chars+?label]
+ (|let [[?chars ?label] ?chars+?label]
+ (&/|map (fn [?char]
+ (&/T [?char ?label]))
+ ?chars))))
+ &/->seq
+ (sort-by &/|first <)
+ &/->list)
+ end-label (new Label)
+ else-label (new Label)]
+ _ (compile ?input)
+ :let [_ (doto *writer*
+ &&/unwrap-long
+ (.visitInsn Opcodes/L2I)
+ (.visitLookupSwitchInsn else-label
+ (int-array (&/->seq (&/|map &/|first matched-patterns)))
+ (into-array (&/->seq (&/|map &/|second matched-patterns)))))]
+ _ (&/map% (fn [?label+?match]
+ (|let [[?label ?match] ?label+?match]
+ (|do [:let [_ (doto *writer*
+ (.visitLabel ?label))]
+ _ (compile ?match)
+ :let [_ (doto *writer*
+ (.visitJumpInsn Opcodes/GOTO end-label))]]
+ (return nil))))
+ (&/zip2 pattern-labels ?matches))
+ :let [_ (doto *writer*
+ (.visitLabel else-label))]
+ _ (compile ?else)
+ :let [_ (doto *writer*
+ (.visitLabel end-label))]]
+ (return nil)))
+
(defn compile-proc [compile category proc ?values special-args]
(case category
"lux"
(case proc
"is" (compile-lux-is compile ?values special-args)
- "try" (compile-lux-try compile ?values special-args))
+ "try" (compile-lux-try compile ?values special-args)
+ ;; Special extensions for performance reasons
+ ;; Will be replaced by custom extensions in the future.
+ "syntax char case!" (compile-syntax-char-case! compile ?values special-args))
"io"
(case proc
diff --git a/luxc/src/lux/compiler/jvm/rt.clj b/luxc/src/lux/compiler/jvm/rt.clj
index 6b9aeb680..948f08805 100644
--- a/luxc/src/lux/compiler/jvm/rt.clj
+++ b/luxc/src/lux/compiler/jvm/rt.clj
@@ -339,52 +339,6 @@
(.visitEnd))]
nil))
-(defn ^:private compile-LuxRT-text-methods [^ClassWriter =class]
- (do (let [$from (new Label)
- $to (new Label)
- $handler (new Label)
- $end (new Label)]
- (doto (.visitMethod =class (+ Opcodes/ACC_PUBLIC Opcodes/ACC_STATIC) "text_clip" "(Ljava/lang/String;II)[Ljava/lang/Object;" nil nil)
- (.visitCode)
- (.visitTryCatchBlock $from $to $handler "java/lang/IndexOutOfBoundsException")
- (.visitLabel $from)
- (.visitVarInsn Opcodes/ALOAD 0)
- (.visitVarInsn Opcodes/ILOAD 1)
- (.visitVarInsn Opcodes/ILOAD 2)
- (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "substring" "(II)Ljava/lang/String;")
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_some" "(Ljava/lang/Object;)[Ljava/lang/Object;")
- (.visitJumpInsn Opcodes/GOTO $end)
- (.visitLabel $to)
- (.visitLabel $handler)
- (.visitInsn Opcodes/POP)
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_none" "()[Ljava/lang/Object;")
- (.visitLabel $end)
- (.visitInsn Opcodes/ARETURN)
- (.visitMaxs 0 0)
- (.visitEnd)))
- (let [$from (new Label)
- $to (new Label)
- $handler (new Label)]
- (doto (.visitMethod =class (+ Opcodes/ACC_PUBLIC Opcodes/ACC_STATIC) "text_char" "(Ljava/lang/String;I)[Ljava/lang/Object;" nil nil)
- (.visitCode)
- (.visitTryCatchBlock $from $to $handler "java/lang/IndexOutOfBoundsException")
- (.visitLabel $from)
- (.visitVarInsn Opcodes/ALOAD 0)
- (.visitVarInsn Opcodes/ILOAD 1)
- (.visitMethodInsn Opcodes/INVOKEVIRTUAL "java/lang/String" "codePointAt" "(I)I")
- (.visitInsn Opcodes/I2L)
- &&/wrap-long
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_some" "(Ljava/lang/Object;)[Ljava/lang/Object;")
- (.visitInsn Opcodes/ARETURN)
- (.visitLabel $to)
- (.visitLabel $handler)
- (.visitInsn Opcodes/POP)
- (.visitMethodInsn Opcodes/INVOKESTATIC "lux/LuxRT" "make_none" "()[Ljava/lang/Object;")
- (.visitInsn Opcodes/ARETURN)
- (.visitMaxs 0 0)
- (.visitEnd)))
- nil))
-
(def compile-LuxRT-class
(|do [_ (return nil)
:let [full-name &&/lux-utils-class
@@ -489,7 +443,6 @@
(compile-LuxRT-pm-methods)
(compile-LuxRT-adt-methods)
(compile-LuxRT-int-methods)
- (compile-LuxRT-frac-methods)
- (compile-LuxRT-text-methods))]]
+ (compile-LuxRT-frac-methods))]]
(&&/save-class! (second (string/split &&/lux-utils-class #"/"))
(.toByteArray (doto =class .visitEnd)))))
diff --git a/luxc/src/lux/lexer.clj b/luxc/src/lux/lexer.clj
index f74e14dfd..8ed75b940 100644
--- a/luxc/src/lux/lexer.clj
+++ b/luxc/src/lux/lexer.clj
@@ -26,76 +26,12 @@
)
;; [Utils]
-(defn ^:private clean-line [^String raw-line]
- "(-> Text Text)"
- (let [line-length (.length raw-line)
- buffer (new StringBuffer line-length)]
- (loop [idx 0]
- (if (< idx line-length)
- (let [current-char (.charAt raw-line idx)]
- (if (= \\ current-char)
- (do (assert (< (+ 1 idx) line-length) (str "[Lexer Error] Text is too short for escaping: " raw-line " " idx))
- (case (.charAt raw-line (+ 1 idx))
- \t (do (.append buffer "\t")
- (recur (+ 2 idx)))
- \v (do (.append buffer "\u000B")
- (recur (+ 2 idx)))
- \b (do (.append buffer "\b")
- (recur (+ 2 idx)))
- \n (do (.append buffer "\n")
- (recur (+ 2 idx)))
- \r (do (.append buffer "\r")
- (recur (+ 2 idx)))
- \f (do (.append buffer "\f")
- (recur (+ 2 idx)))
- \" (do (.append buffer "\"")
- (recur (+ 2 idx)))
- \\ (do (.append buffer "\\")
- (recur (+ 2 idx)))
- \u (do (assert (< (+ 5 idx) line-length) (str "[Lexer Error] Text is too short for unicode-escaping: " raw-line " " idx))
- (.append buffer (char (Integer/valueOf (.substring raw-line (+ 2 idx) (+ 6 idx)) 16)))
- (recur (+ 6 idx)))
- ;; else
- (assert false (str "[Lexer Error] Invalid escaping syntax: " raw-line " " idx))))
- (do (.append buffer current-char)
- (recur (+ 1 idx)))))
- (.toString buffer)))))
-
-(defn ^:private lex-text-body [multi-line? offset]
- (|do [[_ eol? ^String pre-quotes**] (&reader/read-regex #"^([^\"]*)")
- ^String pre-quotes* (if multi-line?
- (|do [:let [empty-line? (and eol? (= "" pre-quotes**))]
- _ (&/assert! (or empty-line?
- (>= (.length pre-quotes**) offset))
- "Each line of a multi-line text must have an appropriate offset!")]
- (return (if empty-line?
- "\n"
- (str "\n" (.substring pre-quotes** offset)))))
- (return pre-quotes**))
- [pre-quotes post-quotes] (if (.endsWith pre-quotes* "\\")
- (if eol?
- (&/fail-with-loc "[Lexer Error] Cannot leave dangling back-slash \\")
- (if (if-let [^String back-slashes (re-find #"\\+$" pre-quotes*)]
- (odd? (.length back-slashes)))
- (|do [[_ eol?* _] (&reader/read-regex #"^([\"])")
- next-part (lex-text-body eol?* offset)]
- (return (&/T [(.substring pre-quotes* 0 (dec (.length pre-quotes*)))
- (str "\"" next-part)])))
- (|do [post-quotes* (lex-text-body false offset)]
- (return (&/T [pre-quotes* post-quotes*])))))
- (if eol?
- (|do [next-part (lex-text-body true offset)]
- (return (&/T [pre-quotes*
- next-part])))
- (return (&/T [pre-quotes* ""]))))]
- (return (str (clean-line pre-quotes) post-quotes))))
-
(def lex-text
(|do [[meta _ _] (&reader/read-text "\"")
:let [[_ _ _column] meta]
- token (lex-text-body false (inc _column))
+ [_ _ ^String content] (&reader/read-regex #"^([^\"]*)")
_ (&reader/read-text "\"")]
- (return (&/T [meta ($Text token)]))))
+ (return (&/T [meta ($Text content)]))))
(def +ident-re+
#"^([^0-9\[\]\{\}\(\)\s\"#.][^\[\]\{\}\(\)\s\"#.]*)")
@@ -105,26 +41,11 @@
(|do [[meta _ white-space] (&reader/read-regex #"^(\s+|$)")]
(return (&/T [meta ($White_Space white-space)]))))
-(def ^:private lex-single-line-comment
+(def ^:private lex-comment
(|do [_ (&reader/read-text "##")
[meta _ comment] (&reader/read-regex #"^(.*)$")]
(return (&/T [meta ($Comment comment)]))))
-(defn ^:private lex-multi-line-comment [_]
- (|do [_ (&reader/read-text "#(")
- [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex+ #"(?is)^(?!#\()((?!\)#).)*")]
- (return (&/T [meta comment])))
- (|do [[meta pre] (&reader/read-regex+ #"(?is)^((?!#\().)*")
- [_ ($Comment inner)] (lex-multi-line-comment nil)
- [_ post] (&reader/read-regex+ #"(?is)^((?!\)#).)*")]
- (return (&/T [meta (str pre "#(" inner ")#" post)])))))
- _ (&reader/read-text ")#")]
- (return (&/T [meta ($Comment comment)]))))
-
-(def ^:private lex-comment
- (&/try-all% (&/|list lex-single-line-comment
- (lex-multi-line-comment nil))))
-
(do-template [<name> <tag> <regex>]
(def <name>
(|do [[meta _ token] (&reader/read-regex <regex>)]
diff --git a/luxc/src/lux/reader.clj b/luxc/src/lux/reader.clj
index 5f4aa8afe..14914cc2e 100644
--- a/luxc/src/lux/reader.clj
+++ b/luxc/src/lux/reader.clj
@@ -11,7 +11,7 @@
("Yes" 2))
;; [Utils]
-(defn ^:private with-line [body]
+(defn- with-line [body]
(fn [state]
(|case (&/get$ &/$source state)
(&/$Nil)
@@ -32,7 +32,7 @@
output))
)))
-(defn ^:private with-lines [body]
+(defn- with-lines [body]
(fn [state]
(|case (body (&/get$ &/$source state))
(&/$Right reader* match)
@@ -43,7 +43,7 @@
((&/fail-with-loc msg) state)
)))
-(defn ^:private re-find! [^java.util.regex.Pattern regex column ^String line]
+(defn- re-find! [^java.util.regex.Pattern regex column ^String line]
(let [matcher (doto (.matcher regex line)
(.region column (.length line))
(.useAnchoringBounds true))]
@@ -63,8 +63,9 @@
(&/T [(&/T [file-name line-num column-num*]) line]))))
($No (str "[Reader Error] Pattern failed: " regex))))))
-(defn read-regex? [regex]
+(defn read-regex?
"(-> Regex (Reader (Maybe Text)))"
+ [regex]
(with-line
(fn [file-name line-num column-num ^String line]
(if-let [^String match (re-find! regex column-num line)]
@@ -101,8 +102,9 @@
(&/T [(&/T [file-name line-num column-num]) prefix*])]))))
(&/$Left (str "[Reader Error] Pattern failed: " regex))))))))
-(defn read-text [^String text]
+(defn read-text
"(-> Text (Reader Text))"
+ [^String text]
(with-line
(fn [file-name line-num column-num ^String line]
(if (.startsWith line text column-num)
@@ -114,8 +116,9 @@
(&/T [(&/T [file-name line-num column-num*]) line]))))
($No (str "[Reader Error] Text failed: " text))))))
-(defn read-text? [^String text]
+(defn read-text?
"(-> Text (Reader (Maybe Text)))"
+ [^String text]
(with-line
(fn [file-name line-num column-num ^String line]
(if (.startsWith line text column-num)