aboutsummaryrefslogtreecommitdiff
path: root/src/lang/lexer.clj
blob: a51330e091c8a815cff4e53907aa3806ea90b334 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
(ns lang.lexer
  (:require [clojure.template :refer [do-template]]
            [clojure.core.match :refer [match]]
            [lang.util :as &util :refer [exec return* return fail fail*
                                         repeat-m try-m try-all-m]]))

(declare lex-form)

;; [Utils]
(defn ^:private lex-regex [regex]
  (fn [text]
    (if-let [[match] (re-find regex text)]
      (return* (.substring text (.length match)) match)
      (fail* (str "Pattern failed: " regex " -- " text)))))

(defn ^:private lex-str [prefix]
  (fn [text]
    (if (.startsWith text prefix)
      (return* (.substring text (.length prefix)) prefix)
      (fail* (str "String failed: " prefix " -- " text)))))

;; [Lexers]
(def ^:private lex-white-space (lex-regex #"^(\s+)"))

(def lex-forms
  (exec [forms (repeat-m lex-form)]
    (return (filter #(match %
                       [::comment _]
                       false
                       _
                       true)
                    forms))))

(def ^:private lex-list
  (exec [_ (lex-str "(")
         members lex-forms
         _ (lex-str ")")]
    (return [::list members])))

(def ^:private lex-tuple
  (exec [_ (lex-str "[")
         members lex-forms
         _ (lex-str "]")]
    (return [::tuple members])))

(def +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'][a-zA-Z0-9\-\+\_\=!@$%^&*<>\.,/\\\|']*)")

(do-template [<name> <tag> <regex>]
  (def <name>
    (exec [token (lex-regex <regex>)]
      (return [<tag> token])))

  ^:private lex-int   ::int    #"^(0|[1-9][0-9]*)"
  ^:private lex-ident ::ident  +ident-re+)

(def ^:private lex-single-line-comment
  (exec [_ (lex-str "##")
         comment (lex-regex #"^([^\n]*)")
         _ (lex-regex #"^(\n?)")
         ;; :let [_ (prn 'comment comment)]
         ]
    (return [::comment comment])))

(def ^:private lex-multi-line-comment
  (exec [_ (lex-str "#(")
         ;; :let [_ (prn 'OPEN)]
         ;; comment (lex-regex #"^(#\(.*\)#)")
         comment (try-all-m [(lex-regex #"^((?!#\().)*?(?=\)#)")
                             (exec [pre (lex-regex #"^(.+?(?=#\())")
                                    ;; :let [_ (prn 'PRE pre)]
                                    [_ inner] lex-multi-line-comment
                                    ;; :let [_ (prn 'INNER inner)]
                                    post (lex-regex #"^(.+?(?=\)#))")
                                    ;:let [_ (prn 'POST post)]
                                    ]
                               (return (str pre "#(" inner ")#" post)))])
         ;; :let [_ (prn 'COMMENT comment)]
         _ (lex-str ")#")
         ;; :let [_ (prn 'CLOSE)]
         ;; :let [_ (prn 'multi-comment comment)]
         ]
    (return [::comment comment])))

;; #"^(.*?!(#\()).*#\)"

;; ;; UP TO #(
;; #"^.+?(?=#\()"

;; ;; UP TO )#
;; #"^.+?(?=\)#)"

(def ^:private lex-tag
  (exec [_ (lex-str "#")
         token (lex-regex +ident-re+)]
    (return [::tag token])))

(def ^:private lex-form
  (exec [_ (try-m lex-white-space)
         form (try-all-m [lex-int
                          lex-ident
                          lex-tag
                          lex-list
                          lex-tuple
                          lex-single-line-comment
                          lex-multi-line-comment])
         _ (try-m lex-white-space)]
    (return form)))

;; [Interface]
(defn lex [text]
  (match (lex-forms text)
    [::&util/ok [?state ?forms]]
    (if (empty? ?state)
      ?forms
      (assert false (str "Unconsumed input: " ?state)))
    
    [::&util/failure ?message]
    (assert false ?message)))