aboutsummaryrefslogtreecommitdiff
path: root/source/luxc/lexer.lux
blob: ed86be68f5b088695d8af1521d0a679507109262 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
(use ./util #as &util #refer [do return fail try-all])

## [Utils]
(def (lex-regex regex)
  ...)

(def (lex-regex2 regex)
  ...)

(def (lex-prefix prefix)
  ...)

(def (escape-char escaped)
  (case escaped
    "\\t"  (return "\t")
    "\\b"  (return "\b")
    "\\n"  (return "\n")
    "\\r"  (return "\r")
    "\\f"  (return "\f")
    "\\\"" (return "\"")
    "\\\\" (return "\\")
    _ (fail (fold concat "" (list "[Lexer Error] Unknown escape character: " escaped)))))

(defrec lex-text-body
  (try-all (list (do [[prefix escaped] (lex-regex2 "(?s)^([^\\\"\\\\]*)(\\\\.)")
                      unescaped (escape-char escaped)
                      postfix lex-text-body]
                   (return (str prefix unescaped postfix)))
                 (lex-regex "(?s)^([^\\\"\\\\]*)^"))))

(def +ident-re+ ...)

## [Lexers]
(def lex-white-space
  (do [white-space (lex-regex #"^(\s+)")]
    (return (#White-Space white-space))))

(def lex-single-line-comment
  (do [_ (lex-prefix "##")
       comment (lex-regex #"^([^\n]*)")
       _ (lex-regex #"^(\n?)")]
    (return (#Comment comment))))

(def lex-multi-line-comment
  (do [_ (lex-prefix "#(")
       comment (try-all (list (lex-regex #"(?is)^((?!#\().)*?(?=\)#)")
                              (do [pre (lex-regex #"(?is)^(.+?(?=#\())")
                                   [_ inner] lex-multi-line-comment
                                   post (lex-regex #"(?is)^(.+?(?=\)#))")]
                                (return (fold concat "" (list pre "#(" inner ")#" post))))))
       _ (lex-prefix ")#")]
    (return (#Comment comment))))

(def lex-comment
  (try-all (list lex-single-line-comment
                 lex-multi-line-comment)))

(do-template [<name> <tag> <regex>]
  (def <name>
    (do [token (lex-regex <regex>)]
      (return (<tag> token))))

  lex-bool  #Bool  #"^(true|false)"
  lex-real  #Real  #"^(0|[1-9][0-9]*)\.[0-9]+"
  lex-int   #Int   #"^(0|[1-9][0-9]*)"
  lex-ident #Ident +ident-re+)

(def lex-char
  (do [_ (lex-prefix "#\"")
       token (try-all (list (do [escaped (lex-regex #"^(\\.)")]
                              (escape-char escaped))
                            (lex-regex #"^(.)")))
       _ (lex-prefix "\"")]
    (return (#Char token))))

(def lex-text
  (do [_ (lex-prefix "\"")
       token lex-text-body
       _ (lex-prefix "\"")]
    (return (#Text token))))

(def lex-tag
  (do [_ (lex-prefix "#")
       token (lex-regex +ident-re+)]
    (return (#Tag token))))

(do-template [<name> <delim> <tag>]
  (def <name>
    (do [_ (lex-prefix <delim>)]
      (return <tag>)))

  lex-open-paren    "(" #Open-Paren
  lex-close-paren   ")" #Close-Paren
  lex-open-bracket  "[" #Open-Bracket
  lex-close-bracket "]" #Close-Bracket
  lex-open-brace    "{" #Open-Brace
  lex-close-brace   "}" #Close-Brace
  )

(def lex-delimiter
  (try-all (list lex-open-paren
                 lex-close-paren
                 lex-open-bracket
                 lex-close-bracket
                 lex-open-brace
                 lex-close-brace)))

;; [Interface]
(def #export lex
  (try-all (list lex-white-space
                 lex-comment
                 lex-bool
                 lex-real
                 lex-int
                 lex-char
                 lex-text
                 lex-ident
                 lex-tag
                 lex-delimiter)))