From 117124707863ed7d89ef13db417f883a76da041c Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 20 Jan 2015 01:57:59 -0400 Subject: [Enhancements] - Simplified lexer. - Parser now does cleanup of comments & white-space, plus balancing of parens/brackets/braces and checking for even number of elements in records. - Lexer & parser now share state. --- src/lux.clj | 1 + src/lux/analyser.clj | 21 +++++---- src/lux/compiler.clj | 28 ++++++++---- src/lux/lexer.clj | 115 +++++++++++++++++++--------------------------- src/lux/parser.clj | 127 ++++++++++++++++++++++++++++++--------------------- 5 files changed, 154 insertions(+), 138 deletions(-) (limited to 'src') diff --git a/src/lux.clj b/src/lux.clj index 7553e1845..d5c76cea9 100644 --- a/src/lux.clj +++ b/src/lux.clj @@ -23,6 +23,7 @@ ;; TODO: monitor enter & monitor exit. ;; TODO: Reinplement "if" as a macro on top of case. ;; TODO: Remember to optimized calling global functions. + ;; TODO: Reader macros. ;; TODO: (&compiler/compile-all ["lux" "test2"]) diff --git a/src/lux/analyser.clj b/src/lux/analyser.clj index ce84c7310..fd7a5a5d0 100644 --- a/src/lux/analyser.clj +++ b/src/lux/analyser.clj @@ -196,7 +196,7 @@ (if-let [global|import (or (get-in state [:defs-env ident]) (get-in state [:imports ident]))] [::&util/ok [state global|import]] - [::&util/failure (str "Unresolved identifier: " ident)]) + [::&util/failure (str "[Analyser Error] Unresolved identifier: " ident)]) :else (let [[=local inner*] (reduce (fn [[register new-inner] [frame scope]] @@ -217,8 +217,9 @@ (match token# ~match (~return (assoc state# :forms left#)) + _# - (fail* (str "Unmatched token: " token#)))))) + (fail* (str "[Analyser Error] Unmatched token: " token#)))))) (defn analyse-form* [form] (fn [state] @@ -298,14 +299,14 @@ [::class ?full-name] (return (Class/forName ?full-name)) _ - (fail "Unknown class."))) + (fail "[Analyser Error] Unknown class."))) (let [full-name* (str "java.lang." class)] (if-let [full-name (try (Class/forName full-name*) full-name* (catch Exception e nil))] (return (Class/forName full-name)) - (fail "Unknown class.")))])))) + (fail "[Analyser Error] Unknown class.")))])))) (defn extract-jvm-param [token] (match token @@ -344,7 +345,7 @@ [(.getDeclaringClass =field) (.getType =field)]))] (exec [=type (&type/class->type type)] (return [(.getName owner) =type])) - (fail (str "Field does not exist: " target field mode)))) + (fail (str "[Analyser Error] Field does not exist: " target field mode)))) (defn lookup-method [mode target method args] (if-let [methods (seq (for [=method (.getMethods (Class/forName target)) @@ -357,7 +358,7 @@ (exec [=method (&type/method->type method)] (return [(.getName owner) =method]))) methods) - (fail (str "Method does not exist: " target method mode)))) + (fail (str "[Analyser Error] Method does not exist: " target method mode)))) (defn lookup-static-field [target field] (if-let [type* (first (for [=field (.getFields target) @@ -367,7 +368,7 @@ (.getType =field)))] (exec [=type (&type/class->type type*)] (return =type)) - (fail (str "Field does not exist: " target field)))) + (fail (str "[Analyser Error] Field does not exist: " target field)))) (defn lookup-virtual-method [target method-name args] (if-let [method (first (for [=method (.getMethods target) @@ -377,7 +378,7 @@ =method))] (exec [=method (&type/method->type method)] (&type/return-type =method)) - (fail (str "Virtual method does not exist: " target method-name)))) + (fail (str "[Analyser Error] Virtual method does not exist: " target method-name)))) (defn full-class-name [class] (if (.contains class ".") @@ -387,14 +388,14 @@ [::class ?full-name] (return ?full-name) _ - (fail "Unknown class."))) + (fail "[Analyser Error] Unknown class."))) (let [full-name* (str "java.lang." class)] (if-let [full-name (try (Class/forName full-name*) full-name* (catch Exception e nil))] (return full-name) - (fail "Unknown class.")))]))) + (fail "[Analyser Error] Unknown class.")))]))) (defanalyser analyse-jvm-getstatic [::&parser/form ([[::&parser/ident "jvm/getstatic"] [::&parser/ident ?class] [::&parser/ident ?field]] :seq)] diff --git a/src/lux/compiler.clj b/src/lux/compiler.clj index 9f6a6cd6c..6d8cd08ff 100644 --- a/src/lux/compiler.clj +++ b/src/lux/compiler.clj @@ -20,7 +20,7 @@ MethodVisitor))) (declare compile-form - compile) + compile-file) (def +prefix+ "lux") @@ -961,10 +961,12 @@ (defcompiler compile-use [::&analyser/use ?file ?alias] (let [module-name (re-find #"[^/]+$" ?file) - source-code (slurp (str "source/" module-name ".lux")) - tokens (&lexer/lex source-code) - syntax (&parser/parse tokens) - bytecode (compile module-name syntax)] + ;; source-code (slurp (str "source/" module-name ".lux")) + ;; tokens (&lexer/lex source-code) + ;; syntax (&parser/parse tokens) + ;; bytecode (compile module-name syntax) + ] + (compile-file module-name) nil)) (let [+int-class+ (->class "java.lang.Integer")] @@ -1075,10 +1077,14 @@ )))) (defn compile-file [name] - (->> (slurp (str "source/" name ".lux")) - &lexer/lex - &parser/parse - (compile name))) + (match ((&parser/parse-all) {::&lexer/source (slurp (str "source/" name ".lux"))}) + [::&util/ok [?state ?forms]] + (let [?forms* (filter identity ?forms)] + (prn '?forms ?forms*) + (compile name ?forms*)) + + [::&util/failure ?message] + (assert false ?message))) (defn compile-all [files] (reset! !state {:name nil @@ -1091,3 +1097,7 @@ :env (list (&analyser/fresh-env 0)) :types &type/+init+}) (dorun (map compile-file files))) + +(comment + (compile-all ["lux"]) + ) diff --git a/src/lux/lexer.clj b/src/lux/lexer.clj index 8f7bdbb1d..78b9dc304 100644 --- a/src/lux/lexer.clj +++ b/src/lux/lexer.clj @@ -1,29 +1,26 @@ (ns lux.lexer (:require [clojure.template :refer [do-template]] - [clojure.core.match :refer [match]] [lux.util :as &util :refer [exec return* return fail fail* repeat-m try-m try-all-m]])) -(declare lex-forms lex-list lex-tuple lex-record lex-tag) - ;; [Utils] (defn ^:private lex-regex [regex] - (fn [text] - (if-let [[match] (re-find regex text)] - (return* (.substring text (.length match)) match) - (fail* (str "Pattern failed: " regex " -- " text))))) + (fn [state] + (if-let [[match] (re-find regex (::source state))] + (return* (update-in state [::source] #(.substring % (.length match))) match) + (fail* (str "[Lexer Error] Pattern failed: " regex))))) (defn ^:private lex-regex2 [regex] - (fn [text] - (if-let [[match tok1 tok2] (re-find regex text)] - (return* (.substring text (.length match)) [tok1 tok2]) - (fail* (str "Pattern failed: " regex " -- " text))))) + (fn [state] + (if-let [[match tok1 tok2] (re-find regex (::source state))] + (return* (update-in state [::source] #(.substring % (.length match))) [tok1 tok2]) + (fail* (str "[Lexer Error] Pattern failed: " regex))))) (defn ^:private lex-str [prefix] - (fn [text] - (if (.startsWith text prefix) - (return* (.substring text (.length prefix)) prefix) - (fail* (str "String failed: " prefix " -- " text))))) + (fn [state] + (if (.startsWith (::source state) prefix) + (return* (update-in state [::source] #(.substring % (.length prefix))) prefix) + (fail* (str "[Lexer Error] Text failed: " prefix))))) (defn ^:private escape-char [escaped] (condp = escaped @@ -35,7 +32,7 @@ "\\\"" (return "\"") "\\\\" (return "\\") ;; else - (fail (str "Unknown escape character: " escaped)))) + (fail (str "[Lexer Error] Unknown escape character: " escaped)))) (def ^:private lex-string-body (try-all-m [(exec [[prefix escaped] (lex-regex2 #"(?s)^([^\"\\]*)(\\.)") @@ -47,7 +44,9 @@ (def ^:private +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|':\~\?][0-9a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|':\~\?]*)") ;; [Lexers] -(def ^:private lex-white-space (lex-regex #"^(\s+)")) +(def ^:private lex-white-space + (exec [white-space (lex-regex #"^(\s+)")] + (return [::white-space white-space]))) (do-template [ ] (def @@ -98,56 +97,36 @@ token (lex-regex +ident-re+)] (return [::tag token]))) -(def ^:private lex-form - (exec [_ (try-m lex-white-space) - form (try-all-m [lex-bool - lex-real - lex-int - lex-char - lex-text - lex-ident - lex-tag - lex-list - lex-tuple - lex-record - lex-comment]) - _ (try-m lex-white-space)] - (return form))) - -(def lex-forms - (exec [forms (repeat-m lex-form)] - (return (filter #(match % - [::comment _] - false - _ - true) - forms)))) - -(def ^:private lex-list - (exec [_ (lex-str "(") - members lex-forms - _ (lex-str ")")] - (return [::list members]))) - -(def ^:private lex-tuple - (exec [_ (lex-str "[") - members lex-forms - _ (lex-str "]")] - (return [::tuple members]))) - -(def ^:private lex-record - (exec [_ (lex-str "{") - members lex-forms - _ (lex-str "}")] - (return [::record members]))) +(do-template [ ] + (def + (exec [_ (lex-str )] + (return []))) + + ^:private lex-open-paren "(" ::open-paren + ^:private lex-close-paren ")" ::close-paren + ^:private lex-open-bracket "[" ::open-bracket + ^:private lex-close-bracket "]" ::close-bracket + ^:private lex-open-brace "{" ::open-brace + ^:private lex-close-brace "}" ::close-brace + ) + +(def ^:private lex-delimiter + (try-all-m [lex-open-paren + lex-close-paren + lex-open-bracket + lex-close-bracket + lex-open-brace + lex-close-brace])) ;; [Interface] -(defn lex [text] - (match (lex-forms text) - [::&util/ok [?state ?forms]] - (if (empty? ?state) - ?forms - (assert false (str "Unconsumed input: " ?state))) - - [::&util/failure ?message] - (assert false ?message))) +(def lex + (try-all-m [lex-white-space + lex-bool + lex-real + lex-int + lex-char + lex-text + lex-ident + lex-tag + lex-comment + lex-delimiter])) diff --git a/src/lux/parser.clj b/src/lux/parser.clj index bb7b0f212..e3a5a08a9 100644 --- a/src/lux/parser.clj +++ b/src/lux/parser.clj @@ -4,20 +4,19 @@ (lux [util :as &util :refer [exec return* return fail fail* repeat-m try-m try-all-m map-m apply-m]] - [lexer :as &lexer] - [type :as &type]))) + [lexer :as &lexer]))) -(declare parse-token) +(declare parse) ;; [Utils] (defmacro ^:private defparser [name match return] - `(def ~name - (fn [[token# & left#]] - (match token# - ~match - (~return left#) - _# - (fail* (str "Unmatched token: " token#)))))) + `(defn ~name [token#] + (match token# + ~match + ~return + + _# + (fail (str "[Parser Error] Unmatched token: " token#))))) ;; [Parsers] (let [first-char #(.charAt % 0)] @@ -34,51 +33,77 @@ ^:private parse-ident ::&lexer/ident ::ident identity )) -(defparser ^:private parse-tuple - [::&lexer/tuple ?parts] - (exec [=parts (map-m (fn [arg] (apply-m parse-token (list arg))) - ?parts)] - (return [::tuple =parts]))) +(defparser parse-comment + [::&lexer/comment _] + (return nil)) -(defparser ^:private parse-record - [::&lexer/record ?parts] - (exec [=kvs (do (assert (even? (count ?parts))) - (map-m #(match % - ([[::&lexer/tag ?label] ?value] :seq) - (exec [=value (apply-m parse-token (list ?value))] - (return [?label =value]))) - (partition 2 ?parts)))] - (return [::record =kvs]))) +(defparser parse-whitespace + [::&lexer/white-space _] + (return nil)) (defparser ^:private parse-tag [::&lexer/tag ?tag] (return [::tag ?tag])) (defparser ^:private parse-form - [::&lexer/list ?elems] - (exec [=elems (map-m (fn [arg] (apply-m parse-token (list arg))) - ?elems)] - (return [::form =elems]))) - -(def ^:private parse-token - (try-all-m [parse-bool - parse-int - parse-real - parse-char - parse-text - parse-ident - parse-tuple - parse-record - parse-tag - parse-form])) - -;; [Interface] -(defn parse [text] - (match ((repeat-m parse-token) text) - [::&util/ok [?state ?forms]] - (if (empty? ?state) - ?forms - (assert false (str "Unconsumed input: " (pr-str ?state)))) - - [::&util/failure ?message] - (assert false ?message))) + [::&lexer/open-paren] + (exec [elems (repeat-m parse) + token &lexer/lex] + (if (= [::&lexer/close-paren] token) + (return [::form (filter identity elems)]) + (fail "[Parser Error] Unbalanced parantheses.")))) + +(do-template [ ] + (defparser + [] + (exec [elems (repeat-m parse) + token &lexer/lex] + (if (= [] token) + (return [ (filter identity elems)]) + (fail (str "[Parser Error] Unbalanced " "."))))) + + ^:private parse-form ::&lexer/open-paren ::&lexer/close-paren "parantheses" ::form + ^:private parse-tuple ::&lexer/open-bracket ::&lexer/close-bracket "brackets" ::tuple + ) + +(defparser ^:private parse-record + [::&lexer/open-brace] + (exec [elems* (repeat-m parse) + token &lexer/lex + :let [elems (filter identity elems*)]] + (cond (not= [::&lexer/close-brace] token) + (fail (str "[Parser Error] Unbalanced braces.")) + + (odd? (count elems)) + (fail (str "[Parser Error] Records must have an even number of elements.")) + + :else + (return [::record (filter identity elems)])))) + +(let [parsers [parse-comment + parse-whitespace + parse-bool + parse-int + parse-real + parse-char + parse-text + parse-tag + parse-ident + parse-form + parse-tuple + parse-record]] + (defn ^:private parse-token [token] + (try-all-m (map #(% token) parsers)))) + +(def ^:private parse + (exec [token &lexer/lex] + (parse-token token))) + +(defn parse-all [] + (exec [ast parse] + (fn [state] + (if (empty? (::&lexer/source state)) + (return* state (if ast (list ast) '())) + ((exec [asts (parse-all)] + (return (cons ast asts))) + state))))) -- cgit v1.2.3