From 5bf50ef978eb88e2e61c40f3bb0fea523115e770 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 25 Nov 2014 14:43:44 -0400 Subject: + Can lex basic tokens. + Can parse basic syntax. + Can eval integer multiplication. --- .gitignore | 9 ++++ project.clj | 7 +++ src/example/test1.lang | 6 +++ src/lang/interpreter.clj | 115 +++++++++++++++++++++++++++++++++++++++++++++++ src/lang/lexer.clj | 60 +++++++++++++++++++++++++ src/lang/parser.clj | 76 +++++++++++++++++++++++++++++++ src/lang/util.clj | 107 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 380 insertions(+) create mode 100644 .gitignore create mode 100644 project.clj create mode 100644 src/example/test1.lang create mode 100644 src/lang/interpreter.clj create mode 100644 src/lang/lexer.clj create mode 100644 src/lang/parser.clj create mode 100644 src/lang/util.clj diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..e04714ba8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/target +/classes +/checkouts +pom.xml +pom.xml.asc +*.jar +*.class +/.lein-* +/.nrepl-port diff --git a/project.clj b/project.clj new file mode 100644 index 000000000..6f07b5b15 --- /dev/null +++ b/project.clj @@ -0,0 +1,7 @@ +(defproject lang "0.1.0-SNAPSHOT" + :description "FIXME: write description" + :url "http://example.com/FIXME" + :license {:name "Eclipse Public License" + :url "http://www.eclipse.org/legal/epl-v10.html"} + :dependencies [[org.clojure/clojure "1.6.0"] + [org.clojure/core.match "0.2.1"]]) diff --git a/src/example/test1.lang b/src/example/test1.lang new file mode 100644 index 000000000..f6f244a57 --- /dev/null +++ b/src/example/test1.lang @@ -0,0 +1,6 @@ + +(* 5 6) + +(defdata (List x) + (#Nil []) + (#Cons [x] (List x))) diff --git a/src/lang/interpreter.clj b/src/lang/interpreter.clj new file mode 100644 index 000000000..e79ec44d0 --- /dev/null +++ b/src/lang/interpreter.clj @@ -0,0 +1,115 @@ +(ns lang.interpreter + (:refer-clojure :exclude [eval resolve]) + (:require [clojure.core.match :refer [match]] + (lang [util :as &util :refer [exec return* return fail fail* + repeat-m try-m try-all-m map-m + apply-m]] + [parser :as &parser] + [lexer :as &lexer]) + :reload)) + +(declare eval-form) + +;; [Utils] +(def ^:private +state+ + {:globals {"*" (fn [x] (fn [y] (* x y)))} + :stack {} + :forms '()}) + +(defn wrap [x] + (update-in +state+ [:forms] conj x)) + +(defn resolve [ident] + (fn [state] + (if-let [value (get-in state [:globals ident])] + (return* state value) + (fail* (str "Unrecognized identifier: " ident))))) + +(defn fn-call [f args] + (return (reduce #(%1 %2) f args))) + +(defmacro ^:private defeval [name match return] + `(def ~name + (fn [state#] + (let [token# (first (:forms state#))] + ;; (prn '~name token#) + (match token# + ~match + (~return (update-in state# [:forms] rest)) + _# + (fail* (str "Unknown syntax: " (pr-str token#)))))))) + +(defeval eval-ident + [::&parser/ident ?ident] + (resolve ?ident)) + +(defeval eval-int + [::&parser/int ?int] + (return ?int)) + +(defeval eval-def + [::&parser/def ?form ?body] + (match ?form + [::&parser/fn-call ?name ?args] + (exec [=body (apply-m eval-form (wrap ?body)) + =args (map-m (fn [arg] (apply-m eval-form (wrap arg))) + ?args)] + (return `(fn ~(vec =args) ~=body))))) + +(defeval eval-fn-call + [::&parser/fn-call ?fn ?args] + (exec [=fn (apply-m eval-form (wrap ?fn)) + =args (map-m (fn [arg] (apply-m eval-form (wrap arg))) + ?args)] + (fn-call =fn =args))) + +(def eval-form + (try-all-m [eval-ident + eval-int + eval-def + eval-fn-call])) + +;; [::def [::fn-call [::ident "**"] ([::ident "base"] [::ident "exp"])] +;; [::fn-call [::ident "reduce"] ([::ident "*"] +;; [::int 1] +;; [::fn-call [::ident "repeat"] ([::ident "exp"] +;; [::ident "base"])])]] + +(defn eval [state] + (match (eval-form state) + [::&util/ok [?state ?datum]] + (if (empty? (:forms ?state)) + ?datum + (assert false (str "Unconsumed input: " ?state))) + + [::&util/failure ?message] + (assert false ?message))) + +(comment + (let [source-code (slurp "src/example/test1.lang") + tokens (&lexer/lex source-code) + syntax (&parser/parse (list tokens))] + ;; (prn 'syntax syntax) + (eval (update-in +state+ [:forms] concat (list syntax)))) + + ;; (clojure.core/fn [base exp] (fold * 1 (repeat exp base))) + + ;; (* 5 6) + + ;; (defdata (List x) + ;; (#Nil []) + ;; (#Cons [x] (List x))) + + ;; (def (repeat n val) + ;; (if (> v n) + ;; (#Nil []) + ;; (#Cons [val (repeat (- n 1) val)]))) + + ;; (def (fold f init inputs) + ;; (case input + ;; (#Nil _) init + ;; (#Cons [head tail]) (fold f (f init head) tail))) + + ;; (def (** base exp) + ;; (fold * 1 (repeat exp base))) + ) diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj new file mode 100644 index 000000000..191686cb8 --- /dev/null +++ b/src/lang/lexer.clj @@ -0,0 +1,60 @@ +(ns lang.lexer + (:require [clojure.core.match :refer [match]] + [lang.util :as &util :refer [exec return* return fail fail* + repeat-m try-m try-all-m]])) + +(declare lex-form) + +;; [Utils] +(defn ^:private lex-regex [regex] + (fn [text] + (if-let [[match] (re-find regex text)] + (return* (.substring text (.length match)) match) + (fail* "Pattern failed.")))) + +(defn ^:private lex-str [str] + (fn [text] + (if (.startsWith text str) + (return* (.substring text (.length str)) str) + (fail* "String failed.")))) + +;; [Lexers] +(def ^:private lex-white-space (lex-regex #"^(\s+)")) + +(def ^:private lex-list + (exec [_ (lex-str "(") + members (repeat-m lex-form) + _ (lex-str ")")] + (return [::list members]))) + +(def ^:private lex-ident + (exec [token (lex-regex #"^([a-zA-Z!@$%^&*<>\.,/\\\|][a-zA-Z0-9!@$%^&*<>\.,/\\\|]*)")] + (return [::ident token]))) + +(def ^:private lex-int + (exec [token (lex-regex #"^([1-9][0-9]*)")] + (return [::int token]))) + +(def ^:private lex-form + (exec [_ (try-m lex-white-space) + form (try-all-m [lex-list + lex-ident + lex-int + ]) + _ (try-m lex-white-space)] + (return form))) + +;; [Interface] +(defn lex [text] + (match ((exec [_ (try-m lex-white-space) + form lex-list + _ (try-m lex-white-space)] + (return form)) + text) + [::&util/ok [?state ?datum]] + (if (empty? ?state) + ?datum + (assert false (str "Unconsumed input: " ?state))) + + [::&util/failure ?message] + (assert false ?message))) diff --git a/src/lang/parser.clj b/src/lang/parser.clj new file mode 100644 index 000000000..289453999 --- /dev/null +++ b/src/lang/parser.clj @@ -0,0 +1,76 @@ +(ns lang.parser + (:require [clojure.core.match :refer [match]] + (lang [util :as &util :refer [exec return* return fail fail* + repeat-m try-m try-all-m map-m + apply-m]] + [lexer :as &lexer]))) + +(declare parse-form) + +;; [Utils] +(defmacro ^:private defparser [name match return] + `(def ~name + (fn [[token# & left#]] + (match token# + ~match + (~return left#) + _# + (fail* "Unmatched token."))))) + +;; [Parsers] +(defparser ^:private parse-ident + [::&lexer/ident ?ident] + (return [::ident ?ident])) + +(defparser ^:private parse-int + [::&lexer/int ?int] + (return [::int (Long/parseLong ?int)])) + +(defparser ^:private parse-def + [::&lexer/list ([[::&lexer/ident "def"] ?name ?body] :seq)] + (exec [=name (apply-m parse-form (list ?name)) + =body (apply-m parse-form (list ?body))] + (return [::def =name =body]))) + +(defparser ^:private parse-fn-call + [::&lexer/list ([?f & ?args] :seq)] + (exec [=f (apply-m parse-form (list ?f)) + =args (map-m (fn [arg] (apply-m parse-form (list arg))) + ?args)] + (return [::fn-call =f =args]))) + +(def ^:private parse-form + (try-all-m [parse-ident + parse-int + parse-def + parse-fn-call])) + +;; [Interface] +(defn parse [tokens] + (match (parse-form tokens) + [::&util/ok [?state ?datum]] + (if (empty? ?state) + ?datum + (assert false (str "Unconsumed input: " ?state))) + + [::&util/failure ?message] + (assert false ?message))) + +(comment + ((comp parse list &lexer/lex) (slurp "src/example/test1.lang")) + + (&lexer/lex (slurp "src/example/test1.lang")) + "\n(def (** base exp)\n (reduce * 1 (repeat exp base)))\n" + + [::list ([::ident "def"] + [::list ([::ident "**"] [::ident "base"] [::ident "exp"])] + [::list ([::ident "reduce"] + [::ident "*"] + [::int "1"] + [::list ([::ident "repeat"] + [::ident "exp"] + [::ident "base"])])])] + + (re-find #"^([a-zA-Z!@$%^&*<>\.,/\\\|][a-zA-Z0-9!@$%^&*<>\.,/\\\|]*)" "a9") + (re-find #"^([1-9][0-9]*)" "9") + ) diff --git a/src/lang/util.clj b/src/lang/util.clj new file mode 100644 index 000000000..1290d91df --- /dev/null +++ b/src/lang/util.clj @@ -0,0 +1,107 @@ +(ns lang.util + (:require [clojure.string :as string] + [clojure.core.match :refer [match]])) + +;; [Interface] +;; [Interface/Utils] +(defn fail* [message] + [::failure message]) + +(defn return* [state value] + [::ok [state value]]) + +;; [Interface/Monads] +(defn fail [message] + (fn [_] + [::failure message])) + +(defn return [value] + (fn [state] + [::ok [state value]])) + +(defn bind [m-value step] + #(let [inputs (m-value %)] + ;; (prn 'bind/inputs inputs) + (match inputs + [::ok [?state ?datum]] + ((step ?datum) ?state) + + [::failure _] + inputs))) + +(defmacro exec [steps return] + (assert (not= 0 (count steps)) "The steps can't be empty!") + (assert (= 0 (rem (count steps) 2)) "The number of steps must be even!") + (reduce (fn [inner [label computation]] + (case label + :let `(let ~computation ~inner) + ;; :when (assert false "Can't use :when") + :when `(if ~computation + ~inner + zero) + ;; else + `(bind ~computation (fn [~label] ~inner)))) + return + (reverse (partition 2 steps)))) + +;; [Interface/Combinators] +(defn try-m [monad] + (fn [state] + (match (monad state) + [::ok [?state ?datum]] + (return* ?state ?datum) + + [::failure _] + (return* state nil)))) + +(defn repeat-m [monad] + (fn [state] + (match (monad state) + [::ok [?state ?head]] + ((exec [tail (repeat-m monad)] + (return (cons ?head tail))) + ?state) + + [::failure _] + (return* state '())))) + +(defn try-all-m [monads] + (fn [state] + (if (empty? monads) + (fail* "No alternative worked!") + (let [output ((first monads) state)] + (match output + [::ok _] + output + :else + (if-let [monads* (seq (rest monads))] + ((try-all-m monads*) state) + output) + ))))) + +(defn map-m [f inputs] + (if (empty? inputs) + (return '()) + (exec [output (f (first inputs)) + outputs (map-m f (rest inputs))] + (return (conj outputs output))))) + +(defn apply-m [monad call-state] + (fn [state] + ;; (prn 'apply-m monad call-state) + (let [output (monad call-state)] + ;; (prn 'output output) + (match output + [::ok [?state ?datum]] + [::ok [state ?datum]] + + [::failure _] + output)))) + +(defn comp-m [f-m g-m] + (exec [temp g-m] + (f-m temp))) + +(defn pass [m-value] + (fn [state] + m-value)) -- cgit v1.2.3