aboutsummaryrefslogtreecommitdiff
path: root/src/lang/lexer.clj
blob: bbb92da95999e957f80db6dbb42ff694b1c3b057 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
(ns lang.lexer
  (:require [clojure.template :refer [do-template]]
            [clojure.core.match :refer [match]]
            [lang.util :as &util :refer [exec return* return fail fail*
                                         repeat-m try-m try-all-m]]))

(declare lex-form)

;; [Utils]
(defn ^:private lex-regex [regex]
  (fn [text]
    (if-let [[match] (re-find regex text)]
      (return* (.substring text (.length match)) match)
      (fail* (str "Pattern failed: " regex " -- " text)))))

(defn ^:private lex-str [prefix]
  (fn [text]
    (if (.startsWith text prefix)
      (return* (.substring text (.length prefix)) prefix)
      (fail* (str "String failed: " prefix " -- " text)))))

;; [Lexers]
(def ^:private lex-white-space (lex-regex #"^(\s+)"))

(def ^:private lex-list
  (exec [_ (lex-str "(")
         members (repeat-m lex-form)
         _ (lex-str ")")]
    (return [::list members])))

(def ^:private lex-tuple
  (exec [_ (lex-str "[")
         members (repeat-m lex-form)
         _ (lex-str "]")]
    (return [::tuple members])))

(def +ident-re+ #"^([a-zA-Z\-\+\_\=!@$%^&*<>\.,/\\\|'][a-zA-Z0-9\-\+\_\=!@$%^&*<>\.,/\\\|']*)")

(do-template [<name> <tag> <regex>]
  (def <name>
    (exec [token (lex-regex <regex>)]
      (return [<tag> token])))

  ^:private lex-int   ::int    #"^(0|[1-9][0-9]*)"
  ^:private lex-ident ::ident  +ident-re+)

(def ^:private lex-tag
  (exec [_ (lex-str "#")
         token (lex-regex +ident-re+)]
    (return [::tag token])))

(def ^:private lex-form
  (exec [_ (try-m lex-white-space)
         form (try-all-m [lex-int
                          lex-ident
                          lex-tag
                          lex-list
                          lex-tuple])
         _ (try-m lex-white-space)]
    (return form)))

;; [Interface]
(defn lex [text]
  (match ((repeat-m lex-form) text)
    [::&util/ok [?state ?forms]]
    (if (empty? ?state)
      ?forms
      (assert false (str "Unconsumed input: " ?state)))
    
    [::&util/failure ?message]
    (assert false ?message)))