diff options
Diffstat (limited to '')
-rw-r--r-- | src/lang/compiler.clj | 136 | ||||
-rw-r--r-- | src/lang/interpreter.clj | 98 | ||||
-rw-r--r-- | src/lang/lexer.clj | 55 | ||||
-rw-r--r-- | src/lang/parser.clj | 54 | ||||
-rw-r--r-- | src/lang/util.clj | 19 |
5 files changed, 301 insertions, 61 deletions
diff --git a/src/lang/compiler.clj b/src/lang/compiler.clj new file mode 100644 index 000000000..21ef925b5 --- /dev/null +++ b/src/lang/compiler.clj @@ -0,0 +1,136 @@ +(ns lang.compiler + (:refer-clojure :exclude [compile]) + (:require [clojure.core.match :refer [match]] + (lang [util :as &util :refer [exec return* return fail fail* + repeat-m try-m try-all-m map-m + apply-m]] + [parser :as &parser] + [lexer :as &lexer]) + :reload)) + +(declare compile-form) + +;; [Utils] +(def ^:private +state+ + {:globals {} + :stack {} + :forms '()}) + +(defn wrap [x] + (update-in +state+ [:forms] conj x)) + +(defn wrap* [env x] + (-> +state+ + (update-in [:stack] merge env) + (update-in [:forms] conj x))) + +(defmacro ^:private defcompiler [name match return] + `(def ~name + (fn [state#] + (let [~'*token* (first (:forms state#))] + ;; (prn '~name ~'*token*) + (match ~'*token* + ~match + (let [output# (~return (update-in state# [:forms] rest))] + ;; (prn "output#" output#) + output#) + _# + (fail* (str "Unknown syntax: " (pr-str ~'*token*)))))))) + +(defn unwrap-ident [ident] + (match ident + [::&parser/ident ?label] + ?label)) + +(defn unwrap-tagged [ident] + (match ident + [::&parser/tagged ?tag ?data] + [?tag ?data])) + +(defcompiler compile-int + [::&parser/int ?int] + (return ?int)) + +(defcompiler compile-ident + [::&parser/ident ?name] + (return (symbol ?name))) + +(defcompiler compile-tuple + [::&parser/tuple ?elems] + (exec [=elems (map-m (fn [elem] (apply-m compile-form (wrap elem))) + ?elems)] + (return (vec =elems)))) + +(defcompiler compile-tagged + [::&parser/tagged ?tag ?data] + (exec [=data (apply-m compile-form (wrap ?data))] + (return {:tag ?tag :data =data}))) + +(defcompiler compile-fn-call + [::&parser/fn-call ?fn ?args] + (exec [=fn (apply-m compile-form (wrap ?fn)) + =args (map-m (fn [arg] (apply-m compile-form (wrap arg))) + ?args)] + (return (reduce (fn [f a] `(~f ~a)) + =fn =args)))) + +(defcompiler compile-if + [::&parser/if ?test ?then ?else] + (exec [=test (apply-m compile-form (wrap ?test)) + =then (apply-m compile-form (wrap ?then)) + =else (apply-m compile-form (wrap ?else))] + (return `(if ~=test ~=then ~=else)))) + +(defcompiler compile-def + [::&parser/def ?form ?body] + (match ?form + [::&parser/fn-call ?name ?args] + (exec [:let [=name (symbol (unwrap-ident ?name)) + =args (map (comp symbol unwrap-ident) ?args) + fn-env (into {} (for [a =args] [a nil]))] + =body (apply-m compile-form (wrap* fn-env ?body)) + :let [curled-body (reduce (fn [inner arg] `(fn [~arg] ~inner)) + =body (reverse =args)) + ;; _ (prn 'curled-body curled-body) + fn-def (let [[_ ?arg ?body] curled-body] + `(fn ~=name ~?arg ~?body)) + ;; _ (prn 'fn-def fn-def) + ]] + (return fn-def)))) + +(defcompiler compile-defdata + [::&parser/defdata ?form ?cases] + (match ?form + [::&parser/fn-call ?name ?args] + (let [=name (unwrap-ident ?name) + ;; _ (prn '=name =name) + =args (map unwrap-ident ?args) + ;; _ (prn '=args =args) + =cases (map unwrap-tagged ?cases) + ;; _ (prn '=cases =cases) + ] + (return `(fn ~(symbol =name) ~(mapv symbol =args)))))) + +(def compile-form + (try-all-m [compile-int + compile-ident + compile-tuple + compile-tagged + compile-if + compile-def + compile-defdata + compile-fn-call])) + +(defn compile [inputs] + (match ((repeat-m compile-form) inputs) + [::&util/ok [?state ?forms]] + (if (empty? (:forms ?state)) + ?forms + (assert false (str "Unconsumed input: " ?state))) + + [::&util/failure ?message] + (assert false ?message))) + +(comment + + ) diff --git a/src/lang/interpreter.clj b/src/lang/interpreter.clj index e79ec44d0..19fe71106 100644 --- a/src/lang/interpreter.clj +++ b/src/lang/interpreter.clj @@ -1,15 +1,24 @@ (ns lang.interpreter - (:refer-clojure :exclude [eval resolve]) + (:refer-clojure :exclude [eval resolve -' *']) (:require [clojure.core.match :refer [match]] (lang [util :as &util :refer [exec return* return fail fail* repeat-m try-m try-all-m map-m apply-m]] [parser :as &parser] - [lexer :as &lexer]) - :reload)) + [lexer :as &lexer] + [compiler :as &compiler]) + :reload) + ) (declare eval-form) +;; (defonce _init_ +;; (do (alter-var-root #'clojure.core/prn +;; (constantly #(.println System/out (apply pr-str %&)))))) + +(def <=' (fn [x] (fn [y] (<= x y)))) +(def -' (fn [x] (fn [y] (- x y)))) + ;; [Utils] (def ^:private +state+ {:globals {"*" (fn [x] (fn [y] (* x y)))} @@ -19,25 +28,42 @@ (defn wrap [x] (update-in +state+ [:forms] conj x)) +(defn wrap-in [state x] + (assoc state :forms (list x))) + (defn resolve [ident] (fn [state] + ;; (prn 'resolve ident (get-in state [:globals ident]) (get-in state [:globals])) (if-let [value (get-in state [:globals ident])] (return* state value) (fail* (str "Unrecognized identifier: " ident))))) +(defn define [name value] + (fn [state] + ;; (prn 'define name value (assoc-in state [:globals name] value)) + (return* (assoc-in state [:globals name] value) nil))) + (defn fn-call [f args] - (return (reduce #(%1 %2) f args))) + ;; (prn 'fn-call/call f args (first args) (second args)) + ;; (prn 'fn-call/output* (f (first args))) + ;; (prn 'fn-call/output* ((f (first args)) (second args))) + (let [output (reduce #(%1 %2) f args)] + ;; (prn 'fn-call/output output) + (return output))) (defmacro ^:private defeval [name match return] `(def ~name (fn [state#] - (let [token# (first (:forms state#))] - ;; (prn '~name token#) - (match token# + (let [~'*token* (first (:forms state#))] + ;; (prn '~name ~'*token*) + ;; (prn '~name state#) + (match ~'*token* ~match - (~return (update-in state# [:forms] rest)) + (let [output# (~return (update-in state# [:forms] rest))] + ;; (prn "output#" output#) + output#) _# - (fail* (str "Unknown syntax: " (pr-str token#)))))))) + (fail* (str "Unknown syntax: " (pr-str ~'*token*)))))))) (defeval eval-ident [::&parser/ident ?ident] @@ -49,16 +75,34 @@ (defeval eval-def [::&parser/def ?form ?body] - (match ?form - [::&parser/fn-call ?name ?args] - (exec [=body (apply-m eval-form (wrap ?body)) - =args (map-m (fn [arg] (apply-m eval-form (wrap arg))) - ?args)] - (return `(fn ~(vec =args) ~=body))))) + (exec [;; :let [_ (prn 'eval-defdata ?form ?cases)] + =value (apply-m &compiler/compile-form (wrap *token*)) + ;; :let [_ (prn 'eval-def 'DONE =value)] + :let [=name (match ?form + [::&parser/fn-call [::&parser/ident ?name] ?args] + ?name) + =value* (clojure.core/eval =value) + ;; _ (prn '=value* =value*) + ] + ] + (define =name =value*))) + +(defeval eval-defdata + [::&parser/defdata ?form & ?cases] + (exec [;; :let [_ (prn 'eval-defdata ?form ?cases)] + _ (apply-m &compiler/compile-form (wrap `[::&parser/defdata ~?form ~@?cases])) + ;; :let [_ (prn 'eval-defdata 'DONE)] + ] + (return nil))) + +;; [:lang.parser/defdata [:lang.parser/fn-call [:lang.parser/ident "List"] ([:lang.parser/ident "x"])] +;; ([:lang.parser/tagged "Nil" [:lang.parser/tuple ()]] +;; [:lang.parser/tagged "Cons" [:lang.parser/tuple ([:lang.parser/ident "x"] [:lang.parser/fn-call [:lang.parser/ident "List"] ([:lang.parser/ident "x"])])]])] (defeval eval-fn-call [::&parser/fn-call ?fn ?args] - (exec [=fn (apply-m eval-form (wrap ?fn)) + (exec [state &util/get-state + =fn (apply-m eval-form (wrap-in state ?fn)) =args (map-m (fn [arg] (apply-m eval-form (wrap arg))) ?args)] (fn-call =fn =args))) @@ -67,6 +111,7 @@ (try-all-m [eval-ident eval-int eval-def + eval-defdata eval-fn-call])) ;; [::def [::fn-call [::ident "**"] ([::ident "base"] [::ident "exp"])] @@ -75,11 +120,11 @@ ;; [::fn-call [::ident "repeat"] ([::ident "exp"] ;; [::ident "base"])])]] -(defn eval [state] - (match (eval-form state) - [::&util/ok [?state ?datum]] +(defn eval [text] + (match ((repeat-m eval-form) text) + [::&util/ok [?state ?forms]] (if (empty? (:forms ?state)) - ?datum + ?forms (assert false (str "Unconsumed input: " ?state))) [::&util/failure ?message] @@ -88,9 +133,14 @@ (comment (let [source-code (slurp "src/example/test1.lang") tokens (&lexer/lex source-code) - syntax (&parser/parse (list tokens))] - ;; (prn 'syntax syntax) - (eval (update-in +state+ [:forms] concat (list syntax)))) + _ (prn 'tokens tokens) + syntax (&parser/parse tokens) + _ (prn 'syntax syntax)] + (eval (update-in +state+ [:forms] concat syntax))) + + + + ;; (clojure.core/fn [base exp] (fold * 1 (repeat exp base))) @@ -112,4 +162,6 @@ ;; (def (** base exp) ;; (fold * 1 (repeat exp base))) + + ) diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj index 191686cb8..bbb92da95 100644 --- a/src/lang/lexer.clj +++ b/src/lang/lexer.clj @@ -1,5 +1,6 @@ (ns lang.lexer - (:require [clojure.core.match :refer [match]] + (:require [clojure.template :refer [do-template]] + [clojure.core.match :refer [match]] [lang.util :as &util :refer [exec return* return fail fail* repeat-m try-m try-all-m]])) @@ -10,13 +11,13 @@ (fn [text] (if-let [[match] (re-find regex text)] (return* (.substring text (.length match)) match) - (fail* "Pattern failed.")))) + (fail* (str "Pattern failed: " regex " -- " text))))) -(defn ^:private lex-str [str] +(defn ^:private lex-str [prefix] (fn [text] - (if (.startsWith text str) - (return* (.substring text (.length str)) str) - (fail* "String failed.")))) + (if (.startsWith text prefix) + (return* (.substring text (.length prefix)) prefix) + (fail* (str "String failed: " prefix " -- " text))))) ;; [Lexers] (def ^:private lex-white-space (lex-regex #"^(\s+)")) @@ -27,33 +28,43 @@ _ (lex-str ")")] (return [::list members]))) -(def ^:private lex-ident - (exec [token (lex-regex #"^([a-zA-Z!@$%^&*<>\.,/\\\|][a-zA-Z0-9!@$%^&*<>\.,/\\\|]*)")] - (return [::ident token]))) +(def ^:private lex-tuple + (exec [_ (lex-str "[") + members (repeat-m lex-form) + _ (lex-str "]")] + (return [::tuple members]))) + +(def +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'][a-zA-Z0-9\-\+\_\=!@$%^&*<>\.,/\\\|']*)") + +(do-template [<name> <tag> <regex>] + (def <name> + (exec [token (lex-regex <regex>)] + (return [<tag> token]))) + + ^:private lex-int ::int #"^(0|[1-9][0-9]*)" + ^:private lex-ident ::ident +ident-re+) -(def ^:private lex-int - (exec [token (lex-regex #"^([1-9][0-9]*)")] - (return [::int token]))) +(def ^:private lex-tag + (exec [_ (lex-str "#") + token (lex-regex +ident-re+)] + (return [::tag token]))) (def ^:private lex-form (exec [_ (try-m lex-white-space) - form (try-all-m [lex-list + form (try-all-m [lex-int lex-ident - lex-int - ]) + lex-tag + lex-list + lex-tuple]) _ (try-m lex-white-space)] (return form))) ;; [Interface] (defn lex [text] - (match ((exec [_ (try-m lex-white-space) - form lex-list - _ (try-m lex-white-space)] - (return form)) - text) - [::&util/ok [?state ?datum]] + (match ((repeat-m lex-form) text) + [::&util/ok [?state ?forms]] (if (empty? ?state) - ?datum + ?forms (assert false (str "Unconsumed input: " ?state))) [::&util/failure ?message] diff --git a/src/lang/parser.clj b/src/lang/parser.clj index 289453999..985a17861 100644 --- a/src/lang/parser.clj +++ b/src/lang/parser.clj @@ -15,16 +15,22 @@ ~match (~return left#) _# - (fail* "Unmatched token."))))) + (fail* (str "Unmatched token: " token#)))))) ;; [Parsers] +(defparser ^:private parse-int + [::&lexer/int ?int] + (return [::int (Long/parseLong ?int)])) + (defparser ^:private parse-ident [::&lexer/ident ?ident] (return [::ident ?ident])) -(defparser ^:private parse-int - [::&lexer/int ?int] - (return [::int (Long/parseLong ?int)])) +(defparser ^:private parse-tuple + [::&lexer/tuple ?parts] + (exec [=parts (map-m (fn [arg] (apply-m parse-form (list arg))) + ?parts)] + (return [::tuple =parts]))) (defparser ^:private parse-def [::&lexer/list ([[::&lexer/ident "def"] ?name ?body] :seq)] @@ -32,6 +38,30 @@ =body (apply-m parse-form (list ?body))] (return [::def =name =body]))) +(defparser ^:private parse-defdata + [::&lexer/list ([[::&lexer/ident "defdata"] ?type & ?cases] :seq)] + (exec [=type (apply-m parse-form (list ?type)) + =cases (map-m (fn [arg] + (match arg + [::&lexer/list ([[::&lexer/tag ?tag] ?data] :seq)] + (exec [=data (apply-m parse-form (list ?data))] + (return [::tagged ?tag =data])) + )) + ?cases)] + (return [::defdata =type =cases]))) + +(defparser ^:private parse-if + [::&lexer/list ([[::&lexer/ident "if"] ?test ?then ?else] :seq)] + (exec [=test (apply-m parse-form (list ?test)) + =then (apply-m parse-form (list ?then)) + =else (apply-m parse-form (list ?else))] + (return [::if =test =then =else]))) + +(defparser ^:private parse-tagged + [::&lexer/list ([[::&lexer/tag ?tag] ?data] :seq)] + (exec [=data (apply-m parse-form (list ?data))] + (return [::tagged ?tag =data]))) + (defparser ^:private parse-fn-call [::&lexer/list ([?f & ?args] :seq)] (exec [=f (apply-m parse-form (list ?f)) @@ -40,17 +70,21 @@ (return [::fn-call =f =args]))) (def ^:private parse-form - (try-all-m [parse-ident - parse-int + (try-all-m [parse-int + parse-ident + parse-tuple parse-def + parse-defdata + parse-if + parse-tagged parse-fn-call])) ;; [Interface] -(defn parse [tokens] - (match (parse-form tokens) - [::&util/ok [?state ?datum]] +(defn parse [text] + (match ((repeat-m parse-form) text) + [::&util/ok [?state ?forms]] (if (empty? ?state) - ?datum + ?forms (assert false (str "Unconsumed input: " ?state))) [::&util/failure ?message] diff --git a/src/lang/util.clj b/src/lang/util.clj index 1290d91df..d9745e9ab 100644 --- a/src/lang/util.clj +++ b/src/lang/util.clj @@ -58,12 +58,15 @@ (fn [state] (match (monad state) [::ok [?state ?head]] - ((exec [tail (repeat-m monad)] - (return (cons ?head tail))) - ?state) + (do ;; (prn 'repeat-m/?state ?state) + (match ((repeat-m monad) ?state) + [::ok [?state* ?tail]] + (do ;; (prn 'repeat-m/?state* ?state*) + (return* ?state* (cons ?head ?tail))))) - [::failure _] - (return* state '())))) + [::failure ?message] + (do ;; (println "Failed at last:" ?message) + (return* state '()))))) (defn try-all-m [monads] (fn [state] @@ -90,7 +93,7 @@ (fn [state] ;; (prn 'apply-m monad call-state) (let [output (monad call-state)] - ;; (prn 'output output) + ;; (prn 'apply-m/output output) (match output [::ok [?state ?datum]] [::ok [state ?datum]] @@ -105,3 +108,7 @@ (defn pass [m-value] (fn [state] m-value)) + +(def get-state + (fn [state] + (return* state state))) |