From 70fe9266ce1ccb4fe964b831a7ab61f0d6262111 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Tue, 16 Dec 2014 00:58:36 -0400 Subject: + Strings now have escape characters. + Characters have been added. % Unicode support is missing... --- src/lang.clj | 6 ++++-- src/lang/analyser.clj | 10 ++-------- src/lang/compiler.clj | 13 +++++++++++-- src/lang/lexer.clj | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- src/lang/parser.clj | 25 +++++++++++++++---------- 5 files changed, 78 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/lang.clj b/src/lang.clj index 2c0ca47e6..0aaba1b81 100644 --- a/src/lang.clj +++ b/src/lang.clj @@ -12,7 +12,6 @@ (comment ;; TODO: Add pattern-matching. - ;; TODO: Allow strings to have escape characters. ;; TODO: Fold all closure classes into one. ;; TODO: When doing partial application, skip "apply" and just call constructor appropiatedly. ;; TODO: Add extra arities (apply2, apply3, ..., apply16) @@ -28,7 +27,7 @@ ;; TODO: Do tail-call optimization. ;; TODO: Adding metadata to global vars. ;; TODO: Add records. - ;; TODO: + ;; TODO: throw, try, catch, finally ;; TODO: ;; TODO: @@ -41,7 +40,10 @@ ;; _ (prn 'ann-syntax ann-syntax) class-data (&compiler/compile "test2" ann-syntax)] (write-file "test2.class" class-data)) + + + ;; ## (_. (_.. System out) (println "this\tis a\nstring")) ;; jar cvf test2.jar *.class test2 && java -cp "test2.jar" test2 ) diff --git a/src/lang/analyser.clj b/src/lang/analyser.clj index 9251b9523..0cab85f66 100644 --- a/src/lang/analyser.clj +++ b/src/lang/analyser.clj @@ -115,17 +115,10 @@ analyse-boolean ::&parser/boolean "java.lang.Boolean" analyse-int ::&parser/int "java.lang.Integer" analyse-float ::&parser/float "java.lang.Float" + analyse-char ::&parser/char "java.lang.Character" analyse-string ::&parser/string "java.lang.String" ) -(defanalyser analyse-boolean - [::&parser/boolean ?boolean] - (return (annotated [::literal ?boolean] [::&type/object "java.lang.Boolean" []]))) - -(defanalyser analyse-string - [::&parser/string ?string] - (return (annotated [::literal ?string] [::&type/object "java.lang.String" []]))) - (defanalyser analyse-variant [::&parser/tagged ?tag ?value] (exec [=value (analyse-form* ?value)] @@ -284,6 +277,7 @@ (try-all-m [analyse-boolean analyse-int analyse-float + analyse-char analyse-string analyse-variant analyse-tuple diff --git a/src/lang/compiler.clj b/src/lang/compiler.clj index e7d1ed582..b29bc38d1 100644 --- a/src/lang/compiler.clj +++ b/src/lang/compiler.clj @@ -73,15 +73,24 @@ (cond (instance? java.lang.Integer ?literal) (doto *writer* (.visitTypeInsn Opcodes/NEW (->class "java.lang.Integer")) - (.visitInsn Opcodes/DUP)(.visitLdcInsn ?literal) + (.visitInsn Opcodes/DUP) + (.visitLdcInsn ?literal) (.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Integer") "" "(I)V")) (instance? java.lang.Float ?literal) (doto *writer* (.visitTypeInsn Opcodes/NEW (->class "java.lang.Float")) - (.visitInsn Opcodes/DUP)(.visitLdcInsn ?literal) + (.visitInsn Opcodes/DUP) + (.visitLdcInsn ?literal) (.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Float") "" "(F)V")) + (instance? java.lang.Character ?literal) + (doto *writer* + (.visitTypeInsn Opcodes/NEW (->class "java.lang.Character")) + (.visitInsn Opcodes/DUP) + (.visitLdcInsn ?literal) + (.visitMethodInsn Opcodes/INVOKESPECIAL (->class "java.lang.Character") "" "(C)V")) + (instance? java.lang.Boolean ?literal) (if ?literal ;; (.visitLdcInsn *writer* (int 1)) diff --git a/src/lang/lexer.clj b/src/lang/lexer.clj index c88f54d57..93bdf51aa 100644 --- a/src/lang/lexer.clj +++ b/src/lang/lexer.clj @@ -13,12 +13,42 @@ (return* (.substring text (.length match)) match) (fail* (str "Pattern failed: " regex " -- " text))))) +(defn ^:private lex-regex2 [regex] + (fn [text] + (if-let [[match tok1 tok2] (re-find regex text)] + (return* (.substring text (.length match)) [tok1 tok2]) + (fail* (str "Pattern failed: " regex " -- " text))))) + (defn ^:private lex-str [prefix] (fn [text] (if (.startsWith text prefix) (return* (.substring text (.length prefix)) prefix) (fail* (str "String failed: " prefix " -- " text))))) +(defn ^:private escape-char [escaped] + (condp = escaped + "\\t" (return "\t") + "\\b" (return "\b") + "\\n" (return "\n") + "\\r" (return "\r") + "\\f" (return "\f") + "\\\"" (return "\"") + "\\\\" (return "\\") + ;; else + (fail (str "Unknown escape character: " escaped)))) + +(def ^:private lex-string-body + (try-all-m [(exec [[prefix escaped] (lex-regex2 #"(?s)^([^\"\\]*)(\\.)") + ;; :let [_ (prn '[prefix escaped] [prefix escaped])] + unescaped (escape-char escaped) + ;; :let [_ (prn 'unescaped unescaped)] + postfix lex-string-body + ;; :let [_ (prn 'postfix postfix)] + ;; :let [_ (prn 'FULL (str prefix unescaped postfix))] + ] + (return (str prefix unescaped postfix))) + (lex-regex #"(?s)^([^\"\\]*)")])) + ;; [Lexers] (def ^:private lex-white-space (lex-regex #"^(\s+)")) @@ -34,11 +64,22 @@ ^:private lex-int ::int #"^(0|[1-9][0-9]*)" ^:private lex-ident ::ident +ident-re+) -(def lex-string +(def ^:private lex-char + (exec [_ (lex-str "#\"") + token (try-all-m [(exec [escaped (lex-regex #"^(\\.)")] + (escape-char escaped)) + (lex-regex #"^(.)")]) + _ (lex-str "\"")] + (return [::char token]))) + +(def ^:private lex-string (exec [_ (lex-str "\"") - token (lex-regex #"^(.+?(?=\"))") + state &util/get-state + :let [_ (prn 'PRE state)] + token lex-string-body _ (lex-str "\"") - ] + state &util/get-state + :let [_ (prn 'POST state)]] (return [::string token]))) (def ^:private lex-single-line-comment @@ -59,7 +100,7 @@ [_ inner] lex-multi-line-comment ;; :let [_ (prn 'INNER inner)] post (lex-regex #"(?is)^(.+?(?=\)#))") - ;:let [_ (prn 'POST post)] + ;:let [_ (prn 'POST post)] ] (return (str pre "#(" inner ")#" post)))]) ;; :let [_ (prn 'COMMENT comment)] @@ -79,6 +120,7 @@ form (try-all-m [lex-boolean lex-float lex-int + lex-char lex-string lex-ident lex-tag diff --git a/src/lang/parser.clj b/src/lang/parser.clj index e82a7ef75..90db91172 100644 --- a/src/lang/parser.clj +++ b/src/lang/parser.clj @@ -1,5 +1,6 @@ (ns lang.parser - (:require [clojure.core.match :refer [match]] + (:require [clojure.template :refer [do-template]] + [clojure.core.match :refer [match]] (lang [util :as &util :refer [exec return* return fail fail* repeat-m try-m try-all-m map-m apply-m]] @@ -19,17 +20,20 @@ (fail* (str "Unmatched token: " token#)))))) ;; [Parsers] -(defparser ^:private parse-boolean - [::&lexer/boolean ?boolean] - (return [::boolean (Boolean/parseBoolean ?boolean)])) +(do-template [ ] + (defparser + [ ?value] + (return [ ( ?value)])) -(defparser ^:private parse-int - [::&lexer/int ?int] - (return [::int (Integer/parseInt ?int)])) + + ^:private parse-boolean ::&lexer/boolean ::boolean Boolean/parseBoolean + ^:private parse-int ::&lexer/int ::int Integer/parseInt + ^:private parse-float ::&lexer/float ::float Float/parseFloat + ) -(defparser ^:private parse-float - [::&lexer/float ?float] - (return [::float (Float/parseFloat ?float)])) +(defparser ^:private parse-char + [::&lexer/char ?value] + (return [::char (.charAt ?value 0)])) (defn ident->string [ident] (match ident @@ -199,6 +203,7 @@ (try-all-m [parse-boolean parse-int parse-float + parse-char parse-string parse-ident parse-tuple -- cgit v1.2.3