aboutsummaryrefslogtreecommitdiff
path: root/lux-bootstrapper/src/lux/lexer.clj
blob: 47c985f215c12ab01eae9f076721776b31e20da4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
(ns lux.lexer
  (:require (clojure [template :refer [do-template]]
                     [string :as string])
            (lux [base :as & :refer [defvariant |do return* return |case]]
                 [reader :as &reader])
            [lux.analyser.module :as &module]))

;; [Tags]
(defvariant
  ("White_Space" 1)
  ("Comment" 1)
  ("Bit" 1)
  ("Nat" 1)
  ("Int" 1)
  ("Rev" 1)
  ("Frac" 1)
  ("Text" 1)
  ("Identifier" 1)
  ("Tag" 1)
  ("Open_Paren" 0)
  ("Close_Paren" 0)
  ("Open_Bracket" 0)
  ("Close_Bracket" 0)
  ("Open_Brace" 0)
  ("Close_Brace" 0)
  )

;; [Utils]
(def lex-text
  (|do [[meta _ _] (&reader/read-text "\"")
        :let [[_ _ _column] meta]
        [_ _ ^String content] (&reader/read-regex #"^([^\"]*)")
        _ (&reader/read-text "\"")]
    (return (&/T [meta ($Text content)]))))

(def +ident-re+
  #"^([^0-9\[\]\{\}\(\)\s\"#.][^\[\]\{\}\(\)\s\"#.]*)")

;; [Lexers]
(def ^:private lex-white-space
  (|do [[meta _ white-space] (&reader/read-regex #"^(\s+|$)")]
    (return (&/T [meta ($White_Space white-space)]))))

(def ^:private lex-comment
  (|do [_ (&reader/read-text "...")
        [meta _ comment] (&reader/read-regex #"^(.*)$")]
    (return (&/T [meta ($Comment comment)]))))

(do-template [<name> <tag> <regex>]
  (def <name>
    (|do [[meta _ token] (&reader/read-regex <regex>)]
      (return (&/T [meta (<tag> token)]))))

  lex-bit $Bit #"^#(0|1)"
  )

(do-template [<name> <tag> <regex>]
  (def <name>
    (|do [[meta _ token] (&reader/read-regex <regex>)]
      (return (&/T [meta (<tag> (string/replace token #"," ""))]))))

  lex-nat  $Nat  #"^[0-9][0-9,]*"
  lex-int  $Int  #"^(-|\+)[0-9][0-9,]*"
  lex-rev  $Rev  #"^\.[0-9][0-9,]*"
  lex-frac $Frac #"^(-|\+)[0-9][0-9,]*\.[0-9][0-9,]*((e|E)(-|\+)[0-9][0-9,]*)?"
  )

(def +same-module-mark+ (str &/+name-separator+ &/+name-separator+))

(def ^:private lex-ident
  (&/try-all-% "[Reader Error]"
               (&/|list (|do [[meta _ token] (&reader/read-regex +ident-re+)
                              [_ _ got-it?] (&reader/read-text? &/+name-separator+)]
                          (|case got-it?
                            (&/$Some _)
                            (|do [[_ _ local-token] (&reader/read-regex +ident-re+)
                                  ? (&module/exists? token)]
                              (if ?
                                (return (&/T [meta (&/T [token local-token])]))
                                (|do [unaliased (&module/dealias token)]
                                  (return (&/T [meta (&/T [unaliased local-token])])))))

                            (&/$None)
                            (return (&/T [meta (&/T ["" token])]))))
                        (|do [[meta _ _] (&reader/read-text +same-module-mark+)
                              [_ _ token] (&reader/read-regex +ident-re+)
                              module-name &/get-module-name]
                          (return (&/T [meta (&/T [module-name token])])))
                        (|do [[meta _ _] (&reader/read-text &/+name-separator+)
                              [_ _ token] (&reader/read-regex +ident-re+)]
                          (return (&/T [meta (&/T [&/prelude token])])))
                        )))

(def ^:private lex-identifier
  (|do [[meta ident] lex-ident]
    (return (&/T [meta ($Identifier ident)]))))

(def ^:private lex-tag
  (|do [[meta _ _] (&reader/read-text "#")
        [_ ident] lex-ident]
    (return (&/T [meta ($Tag ident)]))))

(do-template [<name> <text> <tag>]
  (def <name>
    (|do [[meta _ _] (&reader/read-text <text>)]
      (return (&/T [meta <tag>]))))

  ^:private lex-open-paren    "(" $Open_Paren
  ^:private lex-close-paren   ")" $Close_Paren
  ^:private lex-open-bracket  "[" $Open_Bracket
  ^:private lex-close-bracket "]" $Close_Bracket
  ^:private lex-open-brace    "{" $Open_Brace
  ^:private lex-close-brace   "}" $Close_Brace
  )

(def ^:private lex-delimiter
  (&/try-all% (&/|list lex-open-paren
                       lex-close-paren
                       lex-open-bracket
                       lex-close-bracket
                       lex-open-brace
                       lex-close-brace)))

;; [Exports]
(def lex
  (&/try-all-% "[Reader Error]"
               (&/|list lex-white-space
                        lex-comment
                        lex-bit
                        lex-nat
                        lex-frac
                        lex-rev
                        lex-int
                        lex-text
                        lex-identifier
                        lex-tag
                        lex-delimiter)))