aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEduardo Julian2015-08-30 18:36:17 -0400
committerEduardo Julian2015-08-30 18:36:17 -0400
commit817d244adff361104ae0aa6ce53efe6c2bc07552 (patch)
tree816a3d40b74994b51f9acf56fbf6342506aa65aa
parent0a0fab3581eedbc13df2af40e3db8bc2d2fd8178 (diff)
- Added unit-tests for lexer.
- Fixed a bug when lexing multi-line comments.
-rw-r--r--src/lux/lexer.clj20
-rw-r--r--src/lux/reader.clj27
-rw-r--r--test/test/lux/lexer.clj264
-rw-r--r--test/test/lux/reader.clj4
4 files changed, 281 insertions, 34 deletions
diff --git a/src/lux/lexer.clj b/src/lux/lexer.clj
index 4c7741769..b3a47f3e0 100644
--- a/src/lux/lexer.clj
+++ b/src/lux/lexer.clj
@@ -64,20 +64,12 @@
(defn ^:private lex-multi-line-comment [_]
(|do [_ (&reader/read-text "#(")
- [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex #"(?is)^(?!#\()(.*?(?=\)#))")
- ;; :let [_ (prn 'immediate comment)]
- _ (&reader/read-text ")#")]
+ [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex+ #"(?is)^(?!#\()((?!\)#).)*")]
(return (&/T meta comment)))
- (|do [;; :let [_ (prn 'pre/_0)]
- [meta pre] (&reader/read-regex+ #"(?is)^(.*?)(#\(|$)")
- ;; :let [_ (prn 'pre pre)]
- [_ inner] (lex-multi-line-comment nil)
- ;; :let [_ (prn 'inner inner)]
- [_ post] (&reader/read-regex #"(?is)^(.+?(?=\)#))")
- ;; :let [_ (prn 'post post (str pre "#(" inner ")#" post))]
- ]
+ (|do [[meta pre] (&reader/read-regex+ #"(?is)^((?!#\().)*")
+ [_ ($Comment inner)] (lex-multi-line-comment nil)
+ [_ post] (&reader/read-regex+ #"(?is)^((?!\)#).)*")]
(return (&/T meta (str pre "#(" inner ")#" post))))))
- ;; :let [_ (prn 'lex-multi-line-comment (str comment ")#"))]
_ (&reader/read-text ")#")]
(return (&/T meta (&/V $Comment comment)))))
@@ -91,8 +83,8 @@
(return (&/T meta (&/V <tag> token)))))
^:private lex-bool $Bool #"^(true|false)"
- ^:private lex-int $Int #"^(-?0|-?[1-9][0-9]*)"
- ^:private lex-real $Real #"^-?(-?0\.[0-9]+|-?[1-9][0-9]*\.[0-9]+)"
+ ^:private lex-int $Int #"^-?(0|[1-9][0-9]*)"
+ ^:private lex-real $Real #"^-?(0\.[0-9]+|[1-9][0-9]*\.[0-9]+)"
)
(def ^:private lex-char
diff --git a/src/lux/reader.clj b/src/lux/reader.clj
index af6c1ecc3..7b1559f07 100644
--- a/src/lux/reader.clj
+++ b/src/lux/reader.clj
@@ -55,13 +55,6 @@
(when (.find matcher)
(.group matcher 0))))
-(defn ^:private re-find1! [^java.util.regex.Pattern regex column ^String line]
- (let [matcher (doto (.matcher regex line)
- (.region column (.length line))
- (.useAnchoringBounds true))]
- (when (.find matcher)
- (.group matcher 1))))
-
(defn ^:private re-find3! [^java.util.regex.Pattern regex column ^String line]
(let [matcher (doto (.matcher regex line)
(.region column (.length line))
@@ -75,11 +68,8 @@
(defn read-regex [regex]
(with-line
(fn [file-name line-num column-num ^String line]
- ;; (prn 'read-regex [file-name line-num column-num regex line])
- (if-let [^String match (do ;; (prn '[regex line] [regex line])
- (re-find! regex column-num line))]
- (let [;; _ (prn 'match match)
- match-length (.length match)
+ (if-let [^String match (re-find! regex column-num line)]
+ (let [match-length (.length match)
column-num* (+ column-num match-length)]
(if (= column-num* (.length line))
(&/V $Done (&/T (&/T file-name line-num column-num) match))
@@ -90,7 +80,6 @@
(defn read-regex2 [regex]
(with-line
(fn [file-name line-num column-num ^String line]
- ;; (prn 'read-regex2 [file-name line-num column-num regex line])
(if-let [[^String match tok1 tok2] (re-find3! regex column-num line)]
(let [match-length (.length match)
column-num* (+ column-num match-length)]
@@ -111,15 +100,17 @@
(&/$Cons [[file-name line-num column-num] ^String line]
reader**)
- (if-let [^String match (do ;; (prn 'read-regex+ regex line)
- (re-find1! regex column-num line))]
+ (if-let [^String match (re-find! regex column-num line)]
(let [match-length (.length match)
- column-num* (+ column-num match-length)]
+ column-num* (+ column-num match-length)
+ prefix* (if (= 0 column-num)
+ (str prefix "\n" match)
+ (str prefix match))]
(if (= column-num* (.length line))
- (recur (str prefix match "\n") reader**)
+ (recur prefix* reader**)
(&/V &/$Right (&/T (&/Cons$ (&/T (&/T file-name line-num column-num*) line)
reader**)
- (&/T (&/T file-name line-num column-num) (str prefix match))))))
+ (&/T (&/T file-name line-num column-num) prefix*)))))
(&/V &/$Left (str "[Reader Error] Pattern failed: " regex))))))))
(defn read-text [^String text]
diff --git a/test/test/lux/lexer.clj b/test/test/lux/lexer.clj
new file mode 100644
index 000000000..c36e4ea65
--- /dev/null
+++ b/test/test/lux/lexer.clj
@@ -0,0 +1,264 @@
+(ns test.lux.lexer
+ (:use clojure.test)
+ (:require (lux [base :as & :refer [deftags |do return* return fail fail* |let |case]]
+ [reader :as &reader]
+ [lexer :as &lexer])
+ [lux.analyser.module :as &a-module]
+ :reload-all))
+
+;; [Utils]
+(def ^:private module-name "test")
+
+(defn ^:private make-state [source-code]
+ (&/set$ &/$source (&reader/from module-name source-code)
+ (&/init-state nil)))
+
+;; [Tests]
+(deftest lex-white-space
+ (let [input " \t"]
+ (|case (&/run-state &lexer/lex (make-state input))
+ (&/$Right state [cursor (&lexer/$White_Space output)])
+ (is (= input output))
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-comment
+ ;; Should be capable of recognizing both single-line & multi-line comments.
+ (let [input1 " YOLO"
+ input2 "\nLOL\n"
+ input3 " NYAN\n#(\nCAT )#\n"]
+ (|case (&/run-state (|do [[_ single-line] &lexer/lex
+ [_ multi-line] &lexer/lex
+ [_ multi-line-embedded] &lexer/lex]
+ (return (&/T single-line multi-line multi-line-embedded)))
+ (make-state (str "##" input1 "\n" "#(" input2 ")#" "\n" "#(" input3 ")#")))
+ (&/$Right state [(&lexer/$Comment output1)
+ (&lexer/$Comment output2)
+ (&lexer/$Comment output3)])
+ (are [input output] (= input output)
+ input1 output1
+ input2 output2
+ input3 output3)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-bool
+ (let [input1 "true"
+ input2 "false"]
+ (|case (&/run-state (|do [[_ output1] &lexer/lex
+ [_ output2] &lexer/lex]
+ (return (&/T output1 output2)))
+ (make-state (str input1 "\n" input2)))
+ (&/$Right state [(&lexer/$Bool output1)
+ (&lexer/$Bool output2)])
+ (are [input output] (= input output)
+ input1 output1
+ input2 output2)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-int
+ (let [input1 "0"
+ input2 "12"
+ input3 "-123"]
+ (|case (&/run-state (|do [[_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex]
+ (return (&/T output1 output2 output3)))
+ (make-state (str input1 "\n" input2 "\n" input3)))
+ (&/$Right state [(&lexer/$Int output1)
+ (&lexer/$Int output2)
+ (&lexer/$Int output3)])
+ (are [input output] (= input output)
+ input1 output1
+ input2 output2
+ input3 output3)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-real
+ (let [input1 "0.00123"
+ input2 "12.01020300"
+ input3 "-12.3"]
+ (|case (&/run-state (|do [[_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex]
+ (return (&/T output1 output2 output3)))
+ (make-state (str input1 "\n" input2 "\n" input3)))
+ (&/$Right state [(&lexer/$Real output1)
+ (&lexer/$Real output2)
+ (&lexer/$Real output3)])
+ (are [input output] (= input output)
+ input1 output1
+ input2 output2
+ input3 output3)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-char
+ (let [input1 "a"
+ input2 "\\n"
+ input3 " "
+ input4 "\\t"
+ input5 "\\b"
+ input6 "\\r"
+ input7 "\\f"
+ input8 "\\\""
+ input9 "\\\\"]
+ (|case (&/run-state (|do [[_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex
+ [_ output4] &lexer/lex
+ [_ output5] &lexer/lex
+ [_ output6] &lexer/lex
+ [_ output7] &lexer/lex
+ [_ output8] &lexer/lex
+ [_ output9] &lexer/lex]
+ (return (&/T output1 output2 output3 output4 output5 output6 output7 output8 output9)))
+ (make-state (str "#\"" input1 "\"" "\n" "#\"" input2 "\"" "\n" "#\"" input3 "\""
+ "\n" "#\"" input4 "\"" "\n" "#\"" input5 "\"" "\n" "#\"" input6 "\""
+ "\n" "#\"" input7 "\"" "\n" "#\"" input8 "\"" "\n" "#\"" input9 "\"")))
+ (&/$Right state [(&lexer/$Char output1)
+ (&lexer/$Char output2)
+ (&lexer/$Char output3)
+ (&lexer/$Char output4)
+ (&lexer/$Char output5)
+ (&lexer/$Char output6)
+ (&lexer/$Char output7)
+ (&lexer/$Char output8)
+ (&lexer/$Char output9)])
+ (are [input output] (= input output)
+ input1 output1
+ "\n" output2
+ input3 output3
+ "\t" output4
+ "\b" output5
+ "\r" output6
+ "\f" output7
+ "\"" output8
+ "\\" output9)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-text
+ (let [input1 ""
+ input2 "abc"
+ input3 "yolo\\nlol\\tmeme"]
+ (|case (&/run-state (|do [[_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex]
+ (return (&/T output1 output2 output3)))
+ (make-state (str "\"" input1 "\"" "\n" "\"" input2 "\"" "\n" "\"" input3 "\"")))
+ (&/$Right state [(&lexer/$Text output1)
+ (&lexer/$Text output2)
+ (&lexer/$Text output3)])
+ (are [input output] (= input output)
+ input1 output1
+ input2 output2
+ "yolo\nlol\tmeme" output3)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-symbol
+ (let [input1 "foo"
+ input2 "test;bar0123456789"
+ input3 ";b1a2z3"
+ input4 ";;quux"
+ input5 "!_@$%^&*-+=.<>?/|\\~`':"]
+ (|case (&/run-state (|do [_ (&a-module/enter-module module-name)
+ [_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex
+ [_ output4] &lexer/lex
+ [_ output5] &lexer/lex]
+ (return (&/T output1 output2 output3 output4 output5)))
+ (make-state (str input1 "\n" input2 "\n" input3 "\n" input4 "\n" input5)))
+ (&/$Right state [(&lexer/$Symbol output1)
+ (&lexer/$Symbol output2)
+ (&lexer/$Symbol output3)
+ (&lexer/$Symbol output4)
+ (&lexer/$Symbol output5)])
+ (are [input output] (&/ident= input output)
+ (&/T "" "foo") output1
+ (&/T "test" "bar0123456789") output2
+ (&/T "lux" "b1a2z3") output3
+ (&/T "test" "quux") output4
+ (&/T "" "!_@$%^&*-+=.<>?/|\\~`':") output5)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-tag
+ (let [input1 "foo"
+ input2 "test;bar0123456789"
+ input3 ";b1a2z3"
+ input4 ";;quux"
+ input5 "!_@$%^&*-+=.<>?/|\\~`':"]
+ (|case (&/run-state (|do [_ (&a-module/enter-module module-name)
+ [_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex
+ [_ output4] &lexer/lex
+ [_ output5] &lexer/lex]
+ (return (&/T output1 output2 output3 output4 output5)))
+ (make-state (str "#" input1 "\n" "#" input2 "\n" "#" input3 "\n" "#" input4 "\n" "#" input5)))
+ (&/$Right state [(&lexer/$Tag output1)
+ (&lexer/$Tag output2)
+ (&lexer/$Tag output3)
+ (&lexer/$Tag output4)
+ (&lexer/$Tag output5)])
+ (are [input output] (&/ident= input output)
+ (&/T "" "foo") output1
+ (&/T "test" "bar0123456789") output2
+ (&/T "lux" "b1a2z3") output3
+ (&/T "test" "quux") output4
+ (&/T "" "!_@$%^&*-+=.<>?/|\\~`':") output5)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+(deftest lex-delimiter
+ (let [input1 "("
+ input2 ")"
+ input3 "["
+ input4 "]"
+ input5 "{"
+ input6 "}"]
+ (|case (&/run-state (|do [_ (&a-module/enter-module module-name)
+ [_ output1] &lexer/lex
+ [_ output2] &lexer/lex
+ [_ output3] &lexer/lex
+ [_ output4] &lexer/lex
+ [_ output5] &lexer/lex
+ [_ output6] &lexer/lex]
+ (return (&/T output1 output2 output3 output4 output5 output6)))
+ (make-state (str input1 "\n" input2 "\n" input3 "\n" input4 "\n" input5 "\n" input6)))
+ (&/$Right state [(&lexer/$Open_Paren)
+ (&lexer/$Close_Paren)
+ (&lexer/$Open_Bracket)
+ (&lexer/$Close_Bracket)
+ (&lexer/$Open_Brace)
+ (&lexer/$Close_Brace)])
+ (is true)
+
+ _
+ (is false "Couldn't read.")
+ )))
+
+;; (run-all-tests)
diff --git a/test/test/lux/reader.clj b/test/test/lux/reader.clj
index 9b4954c5a..6d3ee0e06 100644
--- a/test/test/lux/reader.clj
+++ b/test/test/lux/reader.clj
@@ -1,11 +1,11 @@
-(ns text.lux.reader
+(ns test.lux.reader
(:use clojure.test)
(:require (lux [base :as & :refer [deftags |do return* return fail fail* |let |case]]
[reader :as &reader])
:reload-all))
;; [Utils]
-(def source (&reader/from "yolo" "lol\nmeme\nnyan cat\n\nlolcat"))
+(def source (&reader/from "test" "lol\nmeme\nnyan cat\n\nlolcat"))
(def init-state (&/set$ &/$source source (&/init-state nil)))
;; [Tests]