aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEduardo Julian2014-11-25 14:43:44 -0400
committerEduardo Julian2014-11-25 14:43:44 -0400
commit5bf50ef978eb88e2e61c40f3bb0fea523115e770 (patch)
tree4651d17fb27e5d04498547456ed1d216c083ca1c
+ Can lex basic tokens.
+ Can parse basic syntax. + Can eval integer multiplication.
-rw-r--r--.gitignore9
-rw-r--r--project.clj7
-rw-r--r--src/example/test1.lang6
-rw-r--r--src/lang/interpreter.clj115
-rw-r--r--src/lang/lexer.clj60
-rw-r--r--src/lang/parser.clj76
-rw-r--r--src/lang/util.clj107
7 files changed, 380 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..e04714ba8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+/target
+/classes
+/checkouts
+pom.xml
+pom.xml.asc
+*.jar
+*.class
+/.lein-*
+/.nrepl-port
diff --git a/project.clj b/project.clj
new file mode 100644
index 000000000..6f07b5b15
--- /dev/null
+++ b/project.clj
@@ -0,0 +1,7 @@
+(defproject lang "0.1.0-SNAPSHOT"
+ :description "FIXME: write description"
+ :url "http://example.com/FIXME"
+ :license {:name "Eclipse Public License"
+ :url "http://www.eclipse.org/legal/epl-v10.html"}
+ :dependencies [[org.clojure/clojure "1.6.0"]
+ [org.clojure/core.match "0.2.1"]])
diff --git a/src/example/test1.lang b/src/example/test1.lang
new file mode 100644
index 000000000..f6f244a57
--- /dev/null
+++ b/src/example/test1.lang
@@ -0,0 +1,6 @@
+
+(* 5 6)
+
+(defdata (List x)
+ (#Nil [])
+ (#Cons [x] (List x)))
diff --git a/src/lang/interpreter.clj b/src/lang/interpreter.clj
new file mode 100644
index 000000000..e79ec44d0
--- /dev/null
+++ b/src/lang/interpreter.clj
@@ -0,0 +1,115 @@
+(ns lang.interpreter
+ (:refer-clojure :exclude [eval resolve])
+ (:require [clojure.core.match :refer [match]]
+ (lang [util :as &util :refer [exec return* return fail fail*
+ repeat-m try-m try-all-m map-m
+ apply-m]]
+ [parser :as &parser]
+ [lexer :as &lexer])
+ :reload))
+
+(declare eval-form)
+
+;; [Utils]
+(def ^:private +state+
+ {:globals {"*" (fn [x] (fn [y] (* x y)))}
+ :stack {}
+ :forms '()})
+
+(defn wrap [x]
+ (update-in +state+ [:forms] conj x))
+
+(defn resolve [ident]
+ (fn [state]
+ (if-let [value (get-in state [:globals ident])]
+ (return* state value)
+ (fail* (str "Unrecognized identifier: " ident)))))
+
+(defn fn-call [f args]
+ (return (reduce #(%1 %2) f args)))
+
+(defmacro ^:private defeval [name match return]
+ `(def ~name
+ (fn [state#]
+ (let [token# (first (:forms state#))]
+ ;; (prn '~name token#)
+ (match token#
+ ~match
+ (~return (update-in state# [:forms] rest))
+ _#
+ (fail* (str "Unknown syntax: " (pr-str token#))))))))
+
+(defeval eval-ident
+ [::&parser/ident ?ident]
+ (resolve ?ident))
+
+(defeval eval-int
+ [::&parser/int ?int]
+ (return ?int))
+
+(defeval eval-def
+ [::&parser/def ?form ?body]
+ (match ?form
+ [::&parser/fn-call ?name ?args]
+ (exec [=body (apply-m eval-form (wrap ?body))
+ =args (map-m (fn [arg] (apply-m eval-form (wrap arg)))
+ ?args)]
+ (return `(fn ~(vec =args) ~=body)))))
+
+(defeval eval-fn-call
+ [::&parser/fn-call ?fn ?args]
+ (exec [=fn (apply-m eval-form (wrap ?fn))
+ =args (map-m (fn [arg] (apply-m eval-form (wrap arg)))
+ ?args)]
+ (fn-call =fn =args)))
+
+(def eval-form
+ (try-all-m [eval-ident
+ eval-int
+ eval-def
+ eval-fn-call]))
+
+;; [::def [::fn-call [::ident "**"] ([::ident "base"] [::ident "exp"])]
+;; [::fn-call [::ident "reduce"] ([::ident "*"]
+;; [::int 1]
+;; [::fn-call [::ident "repeat"] ([::ident "exp"]
+;; [::ident "base"])])]]
+
+(defn eval [state]
+ (match (eval-form state)
+ [::&util/ok [?state ?datum]]
+ (if (empty? (:forms ?state))
+ ?datum
+ (assert false (str "Unconsumed input: " ?state)))
+
+ [::&util/failure ?message]
+ (assert false ?message)))
+
+(comment
+ (let [source-code (slurp "src/example/test1.lang")
+ tokens (&lexer/lex source-code)
+ syntax (&parser/parse (list tokens))]
+ ;; (prn 'syntax syntax)
+ (eval (update-in +state+ [:forms] concat (list syntax))))
+
+ ;; (clojure.core/fn [base exp] (fold * 1 (repeat exp base)))
+
+ ;; (* 5 6)
+
+ ;; (defdata (List x)
+ ;; (#Nil [])
+ ;; (#Cons [x] (List x)))
+
+ ;; (def (repeat n val)
+ ;; (if (> v n)
+ ;; (#Nil [])
+ ;; (#Cons [val (repeat (- n 1) val)])))
+
+ ;; (def (fold f init inputs)
+ ;; (case input
+ ;; (#Nil _) init
+ ;; (#Cons [head tail]) (fold f (f init head) tail)))
+
+ ;; (def (** base exp)
+ ;; (fold * 1 (repeat exp base)))
+ )
diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj
new file mode 100644
index 000000000..191686cb8
--- /dev/null
+++ b/src/lang/lexer.clj
@@ -0,0 +1,60 @@
+(ns lang.lexer
+ (:require [clojure.core.match :refer [match]]
+ [lang.util :as &util :refer [exec return* return fail fail*
+ repeat-m try-m try-all-m]]))
+
+(declare lex-form)
+
+;; [Utils]
+(defn ^:private lex-regex [regex]
+ (fn [text]
+ (if-let [[match] (re-find regex text)]
+ (return* (.substring text (.length match)) match)
+ (fail* "Pattern failed."))))
+
+(defn ^:private lex-str [str]
+ (fn [text]
+ (if (.startsWith text str)
+ (return* (.substring text (.length str)) str)
+ (fail* "String failed."))))
+
+;; [Lexers]
+(def ^:private lex-white-space (lex-regex #"^(\s+)"))
+
+(def ^:private lex-list
+ (exec [_ (lex-str "(")
+ members (repeat-m lex-form)
+ _ (lex-str ")")]
+ (return [::list members])))
+
+(def ^:private lex-ident
+ (exec [token (lex-regex #"^([a-zA-Z!@$%^&*<>\.,/\\\|][a-zA-Z0-9!@$%^&*<>\.,/\\\|]*)")]
+ (return [::ident token])))
+
+(def ^:private lex-int
+ (exec [token (lex-regex #"^([1-9][0-9]*)")]
+ (return [::int token])))
+
+(def ^:private lex-form
+ (exec [_ (try-m lex-white-space)
+ form (try-all-m [lex-list
+ lex-ident
+ lex-int
+ ])
+ _ (try-m lex-white-space)]
+ (return form)))
+
+;; [Interface]
+(defn lex [text]
+ (match ((exec [_ (try-m lex-white-space)
+ form lex-list
+ _ (try-m lex-white-space)]
+ (return form))
+ text)
+ [::&util/ok [?state ?datum]]
+ (if (empty? ?state)
+ ?datum
+ (assert false (str "Unconsumed input: " ?state)))
+
+ [::&util/failure ?message]
+ (assert false ?message)))
diff --git a/src/lang/parser.clj b/src/lang/parser.clj
new file mode 100644
index 000000000..289453999
--- /dev/null
+++ b/src/lang/parser.clj
@@ -0,0 +1,76 @@
+(ns lang.parser
+ (:require [clojure.core.match :refer [match]]
+ (lang [util :as &util :refer [exec return* return fail fail*
+ repeat-m try-m try-all-m map-m
+ apply-m]]
+ [lexer :as &lexer])))
+
+(declare parse-form)
+
+;; [Utils]
+(defmacro ^:private defparser [name match return]
+ `(def ~name
+ (fn [[token# & left#]]
+ (match token#
+ ~match
+ (~return left#)
+ _#
+ (fail* "Unmatched token.")))))
+
+;; [Parsers]
+(defparser ^:private parse-ident
+ [::&lexer/ident ?ident]
+ (return [::ident ?ident]))
+
+(defparser ^:private parse-int
+ [::&lexer/int ?int]
+ (return [::int (Long/parseLong ?int)]))
+
+(defparser ^:private parse-def
+ [::&lexer/list ([[::&lexer/ident "def"] ?name ?body] :seq)]
+ (exec [=name (apply-m parse-form (list ?name))
+ =body (apply-m parse-form (list ?body))]
+ (return [::def =name =body])))
+
+(defparser ^:private parse-fn-call
+ [::&lexer/list ([?f & ?args] :seq)]
+ (exec [=f (apply-m parse-form (list ?f))
+ =args (map-m (fn [arg] (apply-m parse-form (list arg)))
+ ?args)]
+ (return [::fn-call =f =args])))
+
+(def ^:private parse-form
+ (try-all-m [parse-ident
+ parse-int
+ parse-def
+ parse-fn-call]))
+
+;; [Interface]
+(defn parse [tokens]
+ (match (parse-form tokens)
+ [::&util/ok [?state ?datum]]
+ (if (empty? ?state)
+ ?datum
+ (assert false (str "Unconsumed input: " ?state)))
+
+ [::&util/failure ?message]
+ (assert false ?message)))
+
+(comment
+ ((comp parse list &lexer/lex) (slurp "src/example/test1.lang"))
+
+ (&lexer/lex (slurp "src/example/test1.lang"))
+ "\n(def (** base exp)\n (reduce * 1 (repeat exp base)))\n"
+
+ [::list ([::ident "def"]
+ [::list ([::ident "**"] [::ident "base"] [::ident "exp"])]
+ [::list ([::ident "reduce"]
+ [::ident "*"]
+ [::int "1"]
+ [::list ([::ident "repeat"]
+ [::ident "exp"]
+ [::ident "base"])])])]
+
+ (re-find #"^([a-zA-Z!@$%^&*<>\.,/\\\|][a-zA-Z0-9!@$%^&*<>\.,/\\\|]*)" "a9")
+ (re-find #"^([1-9][0-9]*)" "9")
+ )
diff --git a/src/lang/util.clj b/src/lang/util.clj
new file mode 100644
index 000000000..1290d91df
--- /dev/null
+++ b/src/lang/util.clj
@@ -0,0 +1,107 @@
+(ns lang.util
+ (:require [clojure.string :as string]
+ [clojure.core.match :refer [match]]))
+
+;; [Interface]
+;; [Interface/Utils]
+(defn fail* [message]
+ [::failure message])
+
+(defn return* [state value]
+ [::ok [state value]])
+
+;; [Interface/Monads]
+(defn fail [message]
+ (fn [_]
+ [::failure message]))
+
+(defn return [value]
+ (fn [state]
+ [::ok [state value]]))
+
+(defn bind [m-value step]
+ #(let [inputs (m-value %)]
+ ;; (prn 'bind/inputs inputs)
+ (match inputs
+ [::ok [?state ?datum]]
+ ((step ?datum) ?state)
+
+ [::failure _]
+ inputs)))
+
+(defmacro exec [steps return]
+ (assert (not= 0 (count steps)) "The steps can't be empty!")
+ (assert (= 0 (rem (count steps) 2)) "The number of steps must be even!")
+ (reduce (fn [inner [label computation]]
+ (case label
+ :let `(let ~computation ~inner)
+ ;; :when (assert false "Can't use :when")
+ :when `(if ~computation
+ ~inner
+ zero)
+ ;; else
+ `(bind ~computation (fn [~label] ~inner))))
+ return
+ (reverse (partition 2 steps))))
+
+;; [Interface/Combinators]
+(defn try-m [monad]
+ (fn [state]
+ (match (monad state)
+ [::ok [?state ?datum]]
+ (return* ?state ?datum)
+
+ [::failure _]
+ (return* state nil))))
+
+(defn repeat-m [monad]
+ (fn [state]
+ (match (monad state)
+ [::ok [?state ?head]]
+ ((exec [tail (repeat-m monad)]
+ (return (cons ?head tail)))
+ ?state)
+
+ [::failure _]
+ (return* state '()))))
+
+(defn try-all-m [monads]
+ (fn [state]
+ (if (empty? monads)
+ (fail* "No alternative worked!")
+ (let [output ((first monads) state)]
+ (match output
+ [::ok _]
+ output
+ :else
+ (if-let [monads* (seq (rest monads))]
+ ((try-all-m monads*) state)
+ output)
+ )))))
+
+(defn map-m [f inputs]
+ (if (empty? inputs)
+ (return '())
+ (exec [output (f (first inputs))
+ outputs (map-m f (rest inputs))]
+ (return (conj outputs output)))))
+
+(defn apply-m [monad call-state]
+ (fn [state]
+ ;; (prn 'apply-m monad call-state)
+ (let [output (monad call-state)]
+ ;; (prn 'output output)
+ (match output
+ [::ok [?state ?datum]]
+ [::ok [state ?datum]]
+
+ [::failure _]
+ output))))
+
+(defn comp-m [f-m g-m]
+ (exec [temp g-m]
+ (f-m temp)))
+
+(defn pass [m-value]
+ (fn [state]
+ m-value))