aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEduardo Julian2014-12-16 00:58:36 -0400
committerEduardo Julian2014-12-16 00:58:36 -0400
commit70fe9266ce1ccb4fe964b831a7ab61f0d6262111 (patch)
treeeab9cca12d932a29b08a9ed0961b8d2600985c75 /src
parent08de30a36ff6599e583d22c4305bfee9878f92ea (diff)
+ Strings now have escape characters.
+ Characters have been added. % Unicode support is missing...
Diffstat (limited to 'src')
-rw-r--r--src/lang.clj6
-rw-r--r--src/lang/analyser.clj10
-rw-r--r--src/lang/compiler.clj13
-rw-r--r--src/lang/lexer.clj50
-rw-r--r--src/lang/parser.clj25
5 files changed, 78 insertions, 26 deletions
diff --git a/src/lang.clj b/src/lang.clj
index 2c0ca47e6..0aaba1b81 100644
--- a/src/lang.clj
+++ b/src/lang.clj
@@ -12,7 +12,6 @@
(comment
;; TODO: Add pattern-matching.
- ;; TODO: Allow strings to have escape characters.
;; TODO: Fold all closure classes into one.
;; TODO: When doing partial application, skip "apply" and just call constructor appropiatedly.
;; TODO: Add extra arities (apply2, apply3, ..., apply16)
@@ -28,7 +27,7 @@
;; TODO: Do tail-call optimization.
;; TODO: Adding metadata to global vars.
;; TODO: Add records.
- ;; TODO:
+ ;; TODO: throw, try, catch, finally
;; TODO:
;; TODO:
@@ -41,7 +40,10 @@
;; _ (prn 'ann-syntax ann-syntax)
class-data (&compiler/compile "test2" ann-syntax)]
(write-file "test2.class" class-data))
+
+
+ ;; ## (_. (_.. System out) (println "this\tis a\nstring"))
;; jar cvf test2.jar *.class test2 && java -cp "test2.jar" test2
)
diff --git a/src/lang/analyser.clj b/src/lang/analyser.clj
index 9251b9523..0cab85f66 100644
--- a/src/lang/analyser.clj
+++ b/src/lang/analyser.clj
@@ -115,17 +115,10 @@
analyse-boolean ::&parser/boolean "java.lang.Boolean"
analyse-int ::&parser/int "java.lang.Integer"
analyse-float ::&parser/float "java.lang.Float"
+ analyse-char ::&parser/char "java.lang.Character"
analyse-string ::&parser/string "java.lang.String"
)
-(defanalyser analyse-boolean
- [::&parser/boolean ?boolean]
- (return (annotated [::literal ?boolean] [::&type/object "java.lang.Boolean" []])))
-
-(defanalyser analyse-string
- [::&parser/string ?string]
- (return (annotated [::literal ?string] [::&type/object "java.lang.String" []])))
-
(defanalyser analyse-variant
[::&parser/tagged ?tag ?value]
(exec [=value (analyse-form* ?value)]
@@ -284,6 +277,7 @@
(try-all-m [analyse-boolean
analyse-int
analyse-float
+ analyse-char
analyse-string
analyse-variant
analyse-tuple
diff --git a/src/lang/compiler.clj b/src/lang/compiler.clj
index e7d1ed582..b29bc38d1 100644
--- a/src/lang/compiler.clj
+++ b/src/lang/compiler.clj
@@ -73,15 +73,24 @@
(cond (instance? java.lang.Integer ?literal)
(doto *writer*
(.visitTypeInsn Opcodes/NEW (->class "java.lang.Integer"))
- (.visitInsn Opcodes/DUP)(.visitLdcInsn ?literal)
+ (.visitInsn Opcodes/DUP)
+ (.visitLdcInsn ?literal)
(.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Integer") "<init>" "(I)V"))
(instance? java.lang.Float ?literal)
(doto *writer*
(.visitTypeInsn Opcodes/NEW (->class "java.lang.Float"))
- (.visitInsn Opcodes/DUP)(.visitLdcInsn ?literal)
+ (.visitInsn Opcodes/DUP)
+ (.visitLdcInsn ?literal)
(.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Float") "<init>" "(F)V"))
+ (instance? java.lang.Character ?literal)
+ (doto *writer*
+ (.visitTypeInsn Opcodes/NEW (->class "java.lang.Character"))
+ (.visitInsn Opcodes/DUP)
+ (.visitLdcInsn ?literal)
+ (.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Character") "<init>" "(C)V"))
+
(instance? java.lang.Boolean ?literal)
(if ?literal
;; (.visitLdcInsn *writer* (int 1))
diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj
index c88f54d57..93bdf51aa 100644
--- a/src/lang/lexer.clj
+++ b/src/lang/lexer.clj
@@ -13,12 +13,42 @@
(return* (.substring text (.length match)) match)
(fail* (str "Pattern failed: " regex " -- " text)))))
+(defn ^:private lex-regex2 [regex]
+ (fn [text]
+ (if-let [[match tok1 tok2] (re-find regex text)]
+ (return* (.substring text (.length match)) [tok1 tok2])
+ (fail* (str "Pattern failed: " regex " -- " text)))))
+
(defn ^:private lex-str [prefix]
(fn [text]
(if (.startsWith text prefix)
(return* (.substring text (.length prefix)) prefix)
(fail* (str "String failed: " prefix " -- " text)))))
+(defn ^:private escape-char [escaped]
+ (condp = escaped
+ "\\t" (return "\t")
+ "\\b" (return "\b")
+ "\\n" (return "\n")
+ "\\r" (return "\r")
+ "\\f" (return "\f")
+ "\\\"" (return "\"")
+ "\\\\" (return "\\")
+ ;; else
+ (fail (str "Unknown escape character: " escaped))))
+
+(def ^:private lex-string-body
+ (try-all-m [(exec [[prefix escaped] (lex-regex2 #"(?s)^([^\"\\]*)(\\.)")
+ ;; :let [_ (prn '[prefix escaped] [prefix escaped])]
+ unescaped (escape-char escaped)
+ ;; :let [_ (prn 'unescaped unescaped)]
+ postfix lex-string-body
+ ;; :let [_ (prn 'postfix postfix)]
+ ;; :let [_ (prn 'FULL (str prefix unescaped postfix))]
+ ]
+ (return (str prefix unescaped postfix)))
+ (lex-regex #"(?s)^([^\"\\]*)")]))
+
;; [Lexers]
(def ^:private lex-white-space (lex-regex #"^(\s+)"))
@@ -34,11 +64,22 @@
^:private lex-int ::int #"^(0|[1-9][0-9]*)"
^:private lex-ident ::ident +ident-re+)
-(def lex-string
+(def ^:private lex-char
+ (exec [_ (lex-str "#\"")
+ token (try-all-m [(exec [escaped (lex-regex #"^(\\.)")]
+ (escape-char escaped))
+ (lex-regex #"^(.)")])
+ _ (lex-str "\"")]
+ (return [::char token])))
+
+(def ^:private lex-string
(exec [_ (lex-str "\"")
- token (lex-regex #"^(.+?(?=\"))")
+ state &util/get-state
+ :let [_ (prn 'PRE state)]
+ token lex-string-body
_ (lex-str "\"")
- ]
+ state &util/get-state
+ :let [_ (prn 'POST state)]]
(return [::string token])))
(def ^:private lex-single-line-comment
@@ -59,7 +100,7 @@
[_ inner] lex-multi-line-comment
;; :let [_ (prn 'INNER inner)]
post (lex-regex #"(?is)^(.+?(?=\)#))")
- ;:let [_ (prn 'POST post)]
+ ;:let [_ (prn 'POST post)]
]
(return (str pre "#(" inner ")#" post)))])
;; :let [_ (prn 'COMMENT comment)]
@@ -79,6 +120,7 @@
form (try-all-m [lex-boolean
lex-float
lex-int
+ lex-char
lex-string
lex-ident
lex-tag
diff --git a/src/lang/parser.clj b/src/lang/parser.clj
index e82a7ef75..90db91172 100644
--- a/src/lang/parser.clj
+++ b/src/lang/parser.clj
@@ -1,5 +1,6 @@
(ns lang.parser
- (:require [clojure.core.match :refer [match]]
+ (:require [clojure.template :refer [do-template]]
+ [clojure.core.match :refer [match]]
(lang [util :as &util :refer [exec return* return fail fail*
repeat-m try-m try-all-m map-m
apply-m]]
@@ -19,17 +20,20 @@
(fail* (str "Unmatched token: " token#))))))
;; [Parsers]
-(defparser ^:private parse-boolean
- [::&lexer/boolean ?boolean]
- (return [::boolean (Boolean/parseBoolean ?boolean)]))
+(do-template [<name> <input-tag> <output-tag> <method>]
+ (defparser <name>
+ [<input-tag> ?value]
+ (return [<output-tag> (<method> ?value)]))
-(defparser ^:private parse-int
- [::&lexer/int ?int]
- (return [::int (Integer/parseInt ?int)]))
+
+ ^:private parse-boolean ::&lexer/boolean ::boolean Boolean/parseBoolean
+ ^:private parse-int ::&lexer/int ::int Integer/parseInt
+ ^:private parse-float ::&lexer/float ::float Float/parseFloat
+ )
-(defparser ^:private parse-float
- [::&lexer/float ?float]
- (return [::float (Float/parseFloat ?float)]))
+(defparser ^:private parse-char
+ [::&lexer/char ?value]
+ (return [::char (.charAt ?value 0)]))
(defn ident->string [ident]
(match ident
@@ -199,6 +203,7 @@
(try-all-m [parse-boolean
parse-int
parse-float
+ parse-char
parse-string
parse-ident
parse-tuple