aboutsummaryrefslogtreecommitdiff
path: root/src/lang/lexer.clj
diff options
context:
space:
mode:
authorEduardo Julian2015-01-03 11:24:14 -0400
committerEduardo Julian2015-01-03 11:24:14 -0400
commit661c70e4d786e7b2188564beddc586f1a50e4656 (patch)
treefe3aff74ffa526d455b7e22d7015573dc926f5f1 /src/lang/lexer.clj
parent212dd66966a873e3d7183b071f719ef58e4d88fe (diff)
The language officially has a name: Lux (stylized as "lux").
Diffstat (limited to 'src/lang/lexer.clj')
-rw-r--r--src/lang/lexer.clj172
1 files changed, 0 insertions, 172 deletions
diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj
deleted file mode 100644
index 7b23c5947..000000000
--- a/src/lang/lexer.clj
+++ /dev/null
@@ -1,172 +0,0 @@
-(ns lang.lexer
- (:require [clojure.template :refer [do-template]]
- [clojure.core.match :refer [match]]
- [lang.util :as &util :refer [exec return* return fail fail*
- repeat-m try-m try-all-m]]))
-
-(declare lex-forms lex-list lex-tuple lex-record lex-tag)
-
-;; [Utils]
-(defn ^:private lex-regex [regex]
- (fn [text]
- (if-let [[match] (re-find regex text)]
- (return* (.substring text (.length match)) match)
- (fail* (str "Pattern failed: " regex " -- " text)))))
-
-(defn ^:private lex-regex2 [regex]
- (fn [text]
- (if-let [[match tok1 tok2] (re-find regex text)]
- (return* (.substring text (.length match)) [tok1 tok2])
- (fail* (str "Pattern failed: " regex " -- " text)))))
-
-(defn ^:private lex-str [prefix]
- (fn [text]
- (if (.startsWith text prefix)
- (return* (.substring text (.length prefix)) prefix)
- (fail* (str "String failed: " prefix " -- " text)))))
-
-(defn ^:private escape-char [escaped]
- (condp = escaped
- "\\t" (return "\t")
- "\\b" (return "\b")
- "\\n" (return "\n")
- "\\r" (return "\r")
- "\\f" (return "\f")
- "\\\"" (return "\"")
- "\\\\" (return "\\")
- ;; else
- (fail (str "Unknown escape character: " escaped))))
-
-(def ^:private lex-string-body
- (try-all-m [(exec [[prefix escaped] (lex-regex2 #"(?s)^([^\"\\]*)(\\.)")
- ;; :let [_ (prn '[prefix escaped] [prefix escaped])]
- unescaped (escape-char escaped)
- ;; :let [_ (prn 'unescaped unescaped)]
- postfix lex-string-body
- ;; :let [_ (prn 'postfix postfix)]
- ;; :let [_ (prn 'FULL (str prefix unescaped postfix))]
- ]
- (return (str prefix unescaped postfix)))
- (lex-regex #"(?s)^([^\"\\]*)")]))
-
-;; [Lexers]
-(def ^:private lex-white-space (lex-regex #"^(\s+)"))
-
-(def +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|':\~\?][0-9a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|':\~\?]*)")
-
-(do-template [<name> <tag> <regex>]
- (def <name>
- (exec [token (lex-regex <regex>)]
- (return [<tag> token])))
-
- ^:private lex-boolean ::boolean #"^(true|false)"
- ^:private lex-float ::float #"^(0|[1-9][0-9]*)\.[0-9]+"
- ^:private lex-int ::int #"^(0|[1-9][0-9]*)"
- ^:private lex-ident ::ident +ident-re+)
-
-(def ^:private lex-char
- (exec [_ (lex-str "#\"")
- token (try-all-m [(exec [escaped (lex-regex #"^(\\.)")]
- (escape-char escaped))
- (lex-regex #"^(.)")])
- _ (lex-str "\"")]
- (return [::char token])))
-
-(def ^:private lex-string
- (exec [_ (lex-str "\"")
- ;; state &util/get-state
- ;; :let [_ (prn 'PRE state)]
- token lex-string-body
- _ (lex-str "\"")
- ;; state &util/get-state
- ;; :let [_ (prn 'POST state)]
- ]
- (return [::string token])))
-
-(def ^:private lex-single-line-comment
- (exec [_ (lex-str "##")
- comment (lex-regex #"^([^\n]*)")
- _ (lex-regex #"^(\n?)")
- ;; :let [_ (prn 'comment comment)]
- ]
- (return [::comment comment])))
-
-(def ^:private lex-multi-line-comment
- (exec [_ (lex-str "#(")
- ;; :let [_ (prn 'OPEN)]
- ;; comment (lex-regex #"^(#\(.*\)#)")
- comment (try-all-m [(lex-regex #"(?is)^((?!#\().)*?(?=\)#)")
- (exec [pre (lex-regex #"(?is)^(.+?(?=#\())")
- ;; :let [_ (prn 'PRE pre)]
- [_ inner] lex-multi-line-comment
- ;; :let [_ (prn 'INNER inner)]
- post (lex-regex #"(?is)^(.+?(?=\)#))")
- ;:let [_ (prn 'POST post)]
- ]
- (return (str pre "#(" inner ")#" post)))])
- ;; :let [_ (prn 'COMMENT comment)]
- _ (lex-str ")#")
- ;; :let [_ (prn 'CLOSE)]
- ;; :let [_ (prn 'multi-comment comment)]
- ]
- (return [::comment comment])))
-
-(def ^:private lex-tag
- (exec [_ (lex-str "#")
- token (lex-regex +ident-re+)]
- (return [::tag token])))
-
-(def ^:private lex-form
- (exec [_ (try-m lex-white-space)
- form (try-all-m [lex-boolean
- lex-float
- lex-int
- lex-char
- lex-string
- lex-ident
- lex-tag
- lex-list
- lex-tuple
- lex-record
- lex-single-line-comment
- lex-multi-line-comment])
- _ (try-m lex-white-space)]
- (return form)))
-
-(def lex-forms
- (exec [forms (repeat-m lex-form)]
- (return (filter #(match %
- [::comment _]
- false
- _
- true)
- forms))))
-
-(def ^:private lex-list
- (exec [_ (lex-str "(")
- members lex-forms
- _ (lex-str ")")]
- (return [::list members])))
-
-(def ^:private lex-tuple
- (exec [_ (lex-str "[")
- members lex-forms
- _ (lex-str "]")]
- (return [::tuple members])))
-
-(def ^:private lex-record
- (exec [_ (lex-str "{")
- members lex-forms
- _ (lex-str "}")]
- (return [::record members])))
-
-;; [Interface]
-(defn lex [text]
- (match (lex-forms text)
- [::&util/ok [?state ?forms]]
- (if (empty? ?state)
- ?forms
- (assert false (str "Unconsumed input: " ?state)))
-
- [::&util/failure ?message]
- (assert false ?message)))