From 817d244adff361104ae0aa6ce53efe6c2bc07552 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Sun, 30 Aug 2015 18:36:17 -0400 Subject: - Added unit-tests for lexer. - Fixed a bug when lexing multi-line comments. --- src/lux/lexer.clj | 20 ++-- src/lux/reader.clj | 27 ++--- test/test/lux/lexer.clj | 264 +++++++++++++++++++++++++++++++++++++++++++++++ test/test/lux/reader.clj | 4 +- 4 files changed, 281 insertions(+), 34 deletions(-) create mode 100644 test/test/lux/lexer.clj diff --git a/src/lux/lexer.clj b/src/lux/lexer.clj index 4c7741769..b3a47f3e0 100644 --- a/src/lux/lexer.clj +++ b/src/lux/lexer.clj @@ -64,20 +64,12 @@ (defn ^:private lex-multi-line-comment [_] (|do [_ (&reader/read-text "#(") - [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex #"(?is)^(?!#\()(.*?(?=\)#))") - ;; :let [_ (prn 'immediate comment)] - _ (&reader/read-text ")#")] + [meta comment] (&/try-all% (&/|list (|do [[meta comment] (&reader/read-regex+ #"(?is)^(?!#\()((?!\)#).)*")] (return (&/T meta comment))) - (|do [;; :let [_ (prn 'pre/_0)] - [meta pre] (&reader/read-regex+ #"(?is)^(.*?)(#\(|$)") - ;; :let [_ (prn 'pre pre)] - [_ inner] (lex-multi-line-comment nil) - ;; :let [_ (prn 'inner inner)] - [_ post] (&reader/read-regex #"(?is)^(.+?(?=\)#))") - ;; :let [_ (prn 'post post (str pre "#(" inner ")#" post))] - ] + (|do [[meta pre] (&reader/read-regex+ #"(?is)^((?!#\().)*") + [_ ($Comment inner)] (lex-multi-line-comment nil) + [_ post] (&reader/read-regex+ #"(?is)^((?!\)#).)*")] (return (&/T meta (str pre "#(" inner ")#" post)))))) - ;; :let [_ (prn 'lex-multi-line-comment (str comment ")#"))] _ (&reader/read-text ")#")] (return (&/T meta (&/V $Comment comment))))) @@ -91,8 +83,8 @@ (return (&/T meta (&/V token))))) ^:private lex-bool $Bool #"^(true|false)" - ^:private lex-int $Int #"^(-?0|-?[1-9][0-9]*)" - ^:private lex-real $Real #"^-?(-?0\.[0-9]+|-?[1-9][0-9]*\.[0-9]+)" + ^:private lex-int $Int #"^-?(0|[1-9][0-9]*)" + ^:private lex-real $Real #"^-?(0\.[0-9]+|[1-9][0-9]*\.[0-9]+)" ) (def ^:private lex-char diff --git a/src/lux/reader.clj b/src/lux/reader.clj index af6c1ecc3..7b1559f07 100644 --- a/src/lux/reader.clj +++ b/src/lux/reader.clj @@ -55,13 +55,6 @@ (when (.find matcher) (.group matcher 0)))) -(defn ^:private re-find1! [^java.util.regex.Pattern regex column ^String line] - (let [matcher (doto (.matcher regex line) - (.region column (.length line)) - (.useAnchoringBounds true))] - (when (.find matcher) - (.group matcher 1)))) - (defn ^:private re-find3! [^java.util.regex.Pattern regex column ^String line] (let [matcher (doto (.matcher regex line) (.region column (.length line)) @@ -75,11 +68,8 @@ (defn read-regex [regex] (with-line (fn [file-name line-num column-num ^String line] - ;; (prn 'read-regex [file-name line-num column-num regex line]) - (if-let [^String match (do ;; (prn '[regex line] [regex line]) - (re-find! regex column-num line))] - (let [;; _ (prn 'match match) - match-length (.length match) + (if-let [^String match (re-find! regex column-num line)] + (let [match-length (.length match) column-num* (+ column-num match-length)] (if (= column-num* (.length line)) (&/V $Done (&/T (&/T file-name line-num column-num) match)) @@ -90,7 +80,6 @@ (defn read-regex2 [regex] (with-line (fn [file-name line-num column-num ^String line] - ;; (prn 'read-regex2 [file-name line-num column-num regex line]) (if-let [[^String match tok1 tok2] (re-find3! regex column-num line)] (let [match-length (.length match) column-num* (+ column-num match-length)] @@ -111,15 +100,17 @@ (&/$Cons [[file-name line-num column-num] ^String line] reader**) - (if-let [^String match (do ;; (prn 'read-regex+ regex line) - (re-find1! regex column-num line))] + (if-let [^String match (re-find! regex column-num line)] (let [match-length (.length match) - column-num* (+ column-num match-length)] + column-num* (+ column-num match-length) + prefix* (if (= 0 column-num) + (str prefix "\n" match) + (str prefix match))] (if (= column-num* (.length line)) - (recur (str prefix match "\n") reader**) + (recur prefix* reader**) (&/V &/$Right (&/T (&/Cons$ (&/T (&/T file-name line-num column-num*) line) reader**) - (&/T (&/T file-name line-num column-num) (str prefix match)))))) + (&/T (&/T file-name line-num column-num) prefix*))))) (&/V &/$Left (str "[Reader Error] Pattern failed: " regex)))))))) (defn read-text [^String text] diff --git a/test/test/lux/lexer.clj b/test/test/lux/lexer.clj new file mode 100644 index 000000000..c36e4ea65 --- /dev/null +++ b/test/test/lux/lexer.clj @@ -0,0 +1,264 @@ +(ns test.lux.lexer + (:use clojure.test) + (:require (lux [base :as & :refer [deftags |do return* return fail fail* |let |case]] + [reader :as &reader] + [lexer :as &lexer]) + [lux.analyser.module :as &a-module] + :reload-all)) + +;; [Utils] +(def ^:private module-name "test") + +(defn ^:private make-state [source-code] + (&/set$ &/$source (&reader/from module-name source-code) + (&/init-state nil))) + +;; [Tests] +(deftest lex-white-space + (let [input " \t"] + (|case (&/run-state &lexer/lex (make-state input)) + (&/$Right state [cursor (&lexer/$White_Space output)]) + (is (= input output)) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-comment + ;; Should be capable of recognizing both single-line & multi-line comments. + (let [input1 " YOLO" + input2 "\nLOL\n" + input3 " NYAN\n#(\nCAT )#\n"] + (|case (&/run-state (|do [[_ single-line] &lexer/lex + [_ multi-line] &lexer/lex + [_ multi-line-embedded] &lexer/lex] + (return (&/T single-line multi-line multi-line-embedded))) + (make-state (str "##" input1 "\n" "#(" input2 ")#" "\n" "#(" input3 ")#"))) + (&/$Right state [(&lexer/$Comment output1) + (&lexer/$Comment output2) + (&lexer/$Comment output3)]) + (are [input output] (= input output) + input1 output1 + input2 output2 + input3 output3) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-bool + (let [input1 "true" + input2 "false"] + (|case (&/run-state (|do [[_ output1] &lexer/lex + [_ output2] &lexer/lex] + (return (&/T output1 output2))) + (make-state (str input1 "\n" input2))) + (&/$Right state [(&lexer/$Bool output1) + (&lexer/$Bool output2)]) + (are [input output] (= input output) + input1 output1 + input2 output2) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-int + (let [input1 "0" + input2 "12" + input3 "-123"] + (|case (&/run-state (|do [[_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex] + (return (&/T output1 output2 output3))) + (make-state (str input1 "\n" input2 "\n" input3))) + (&/$Right state [(&lexer/$Int output1) + (&lexer/$Int output2) + (&lexer/$Int output3)]) + (are [input output] (= input output) + input1 output1 + input2 output2 + input3 output3) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-real + (let [input1 "0.00123" + input2 "12.01020300" + input3 "-12.3"] + (|case (&/run-state (|do [[_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex] + (return (&/T output1 output2 output3))) + (make-state (str input1 "\n" input2 "\n" input3))) + (&/$Right state [(&lexer/$Real output1) + (&lexer/$Real output2) + (&lexer/$Real output3)]) + (are [input output] (= input output) + input1 output1 + input2 output2 + input3 output3) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-char + (let [input1 "a" + input2 "\\n" + input3 " " + input4 "\\t" + input5 "\\b" + input6 "\\r" + input7 "\\f" + input8 "\\\"" + input9 "\\\\"] + (|case (&/run-state (|do [[_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex + [_ output4] &lexer/lex + [_ output5] &lexer/lex + [_ output6] &lexer/lex + [_ output7] &lexer/lex + [_ output8] &lexer/lex + [_ output9] &lexer/lex] + (return (&/T output1 output2 output3 output4 output5 output6 output7 output8 output9))) + (make-state (str "#\"" input1 "\"" "\n" "#\"" input2 "\"" "\n" "#\"" input3 "\"" + "\n" "#\"" input4 "\"" "\n" "#\"" input5 "\"" "\n" "#\"" input6 "\"" + "\n" "#\"" input7 "\"" "\n" "#\"" input8 "\"" "\n" "#\"" input9 "\""))) + (&/$Right state [(&lexer/$Char output1) + (&lexer/$Char output2) + (&lexer/$Char output3) + (&lexer/$Char output4) + (&lexer/$Char output5) + (&lexer/$Char output6) + (&lexer/$Char output7) + (&lexer/$Char output8) + (&lexer/$Char output9)]) + (are [input output] (= input output) + input1 output1 + "\n" output2 + input3 output3 + "\t" output4 + "\b" output5 + "\r" output6 + "\f" output7 + "\"" output8 + "\\" output9) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-text + (let [input1 "" + input2 "abc" + input3 "yolo\\nlol\\tmeme"] + (|case (&/run-state (|do [[_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex] + (return (&/T output1 output2 output3))) + (make-state (str "\"" input1 "\"" "\n" "\"" input2 "\"" "\n" "\"" input3 "\""))) + (&/$Right state [(&lexer/$Text output1) + (&lexer/$Text output2) + (&lexer/$Text output3)]) + (are [input output] (= input output) + input1 output1 + input2 output2 + "yolo\nlol\tmeme" output3) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-symbol + (let [input1 "foo" + input2 "test;bar0123456789" + input3 ";b1a2z3" + input4 ";;quux" + input5 "!_@$%^&*-+=.<>?/|\\~`':"] + (|case (&/run-state (|do [_ (&a-module/enter-module module-name) + [_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex + [_ output4] &lexer/lex + [_ output5] &lexer/lex] + (return (&/T output1 output2 output3 output4 output5))) + (make-state (str input1 "\n" input2 "\n" input3 "\n" input4 "\n" input5))) + (&/$Right state [(&lexer/$Symbol output1) + (&lexer/$Symbol output2) + (&lexer/$Symbol output3) + (&lexer/$Symbol output4) + (&lexer/$Symbol output5)]) + (are [input output] (&/ident= input output) + (&/T "" "foo") output1 + (&/T "test" "bar0123456789") output2 + (&/T "lux" "b1a2z3") output3 + (&/T "test" "quux") output4 + (&/T "" "!_@$%^&*-+=.<>?/|\\~`':") output5) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-tag + (let [input1 "foo" + input2 "test;bar0123456789" + input3 ";b1a2z3" + input4 ";;quux" + input5 "!_@$%^&*-+=.<>?/|\\~`':"] + (|case (&/run-state (|do [_ (&a-module/enter-module module-name) + [_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex + [_ output4] &lexer/lex + [_ output5] &lexer/lex] + (return (&/T output1 output2 output3 output4 output5))) + (make-state (str "#" input1 "\n" "#" input2 "\n" "#" input3 "\n" "#" input4 "\n" "#" input5))) + (&/$Right state [(&lexer/$Tag output1) + (&lexer/$Tag output2) + (&lexer/$Tag output3) + (&lexer/$Tag output4) + (&lexer/$Tag output5)]) + (are [input output] (&/ident= input output) + (&/T "" "foo") output1 + (&/T "test" "bar0123456789") output2 + (&/T "lux" "b1a2z3") output3 + (&/T "test" "quux") output4 + (&/T "" "!_@$%^&*-+=.<>?/|\\~`':") output5) + + _ + (is false "Couldn't read.") + ))) + +(deftest lex-delimiter + (let [input1 "(" + input2 ")" + input3 "[" + input4 "]" + input5 "{" + input6 "}"] + (|case (&/run-state (|do [_ (&a-module/enter-module module-name) + [_ output1] &lexer/lex + [_ output2] &lexer/lex + [_ output3] &lexer/lex + [_ output4] &lexer/lex + [_ output5] &lexer/lex + [_ output6] &lexer/lex] + (return (&/T output1 output2 output3 output4 output5 output6))) + (make-state (str input1 "\n" input2 "\n" input3 "\n" input4 "\n" input5 "\n" input6))) + (&/$Right state [(&lexer/$Open_Paren) + (&lexer/$Close_Paren) + (&lexer/$Open_Bracket) + (&lexer/$Close_Bracket) + (&lexer/$Open_Brace) + (&lexer/$Close_Brace)]) + (is true) + + _ + (is false "Couldn't read.") + ))) + +;; (run-all-tests) diff --git a/test/test/lux/reader.clj b/test/test/lux/reader.clj index 9b4954c5a..6d3ee0e06 100644 --- a/test/test/lux/reader.clj +++ b/test/test/lux/reader.clj @@ -1,11 +1,11 @@ -(ns text.lux.reader +(ns test.lux.reader (:use clojure.test) (:require (lux [base :as & :refer [deftags |do return* return fail fail* |let |case]] [reader :as &reader]) :reload-all)) ;; [Utils] -(def source (&reader/from "yolo" "lol\nmeme\nnyan cat\n\nlolcat")) +(def source (&reader/from "test" "lol\nmeme\nnyan cat\n\nlolcat")) (def init-state (&/set$ &/$source source (&/init-state nil))) ;; [Tests] -- cgit v1.2.3