blob: ed86be68f5b088695d8af1521d0a679507109262 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
(use ./util #as &util #refer [do return fail try-all])
## [Utils]
(def (lex-regex regex)
...)
(def (lex-regex2 regex)
...)
(def (lex-prefix prefix)
...)
(def (escape-char escaped)
(case escaped
"\\t" (return "\t")
"\\b" (return "\b")
"\\n" (return "\n")
"\\r" (return "\r")
"\\f" (return "\f")
"\\\"" (return "\"")
"\\\\" (return "\\")
_ (fail (fold concat "" (list "[Lexer Error] Unknown escape character: " escaped)))))
(defrec lex-text-body
(try-all (list (do [[prefix escaped] (lex-regex2 "(?s)^([^\\\"\\\\]*)(\\\\.)")
unescaped (escape-char escaped)
postfix lex-text-body]
(return (str prefix unescaped postfix)))
(lex-regex "(?s)^([^\\\"\\\\]*)^"))))
(def +ident-re+ ...)
## [Lexers]
(def lex-white-space
(do [white-space (lex-regex #"^(\s+)")]
(return (#White-Space white-space))))
(def lex-single-line-comment
(do [_ (lex-prefix "##")
comment (lex-regex #"^([^\n]*)")
_ (lex-regex #"^(\n?)")]
(return (#Comment comment))))
(def lex-multi-line-comment
(do [_ (lex-prefix "#(")
comment (try-all (list (lex-regex #"(?is)^((?!#\().)*?(?=\)#)")
(do [pre (lex-regex #"(?is)^(.+?(?=#\())")
[_ inner] lex-multi-line-comment
post (lex-regex #"(?is)^(.+?(?=\)#))")]
(return (fold concat "" (list pre "#(" inner ")#" post))))))
_ (lex-prefix ")#")]
(return (#Comment comment))))
(def lex-comment
(try-all (list lex-single-line-comment
lex-multi-line-comment)))
(do-template [<name> <tag> <regex>]
(def <name>
(do [token (lex-regex <regex>)]
(return (<tag> token))))
lex-bool #Bool #"^(true|false)"
lex-real #Real #"^(0|[1-9][0-9]*)\.[0-9]+"
lex-int #Int #"^(0|[1-9][0-9]*)"
lex-ident #Ident +ident-re+)
(def lex-char
(do [_ (lex-prefix "#\"")
token (try-all (list (do [escaped (lex-regex #"^(\\.)")]
(escape-char escaped))
(lex-regex #"^(.)")))
_ (lex-prefix "\"")]
(return (#Char token))))
(def lex-text
(do [_ (lex-prefix "\"")
token lex-text-body
_ (lex-prefix "\"")]
(return (#Text token))))
(def lex-tag
(do [_ (lex-prefix "#")
token (lex-regex +ident-re+)]
(return (#Tag token))))
(do-template [<name> <delim> <tag>]
(def <name>
(do [_ (lex-prefix <delim>)]
(return <tag>)))
lex-open-paren "(" #Open-Paren
lex-close-paren ")" #Close-Paren
lex-open-bracket "[" #Open-Bracket
lex-close-bracket "]" #Close-Bracket
lex-open-brace "{" #Open-Brace
lex-close-brace "}" #Close-Brace
)
(def lex-delimiter
(try-all (list lex-open-paren
lex-close-paren
lex-open-bracket
lex-close-bracket
lex-open-brace
lex-close-brace)))
;; [Interface]
(def #export lex
(try-all (list lex-white-space
lex-comment
lex-bool
lex-real
lex-int
lex-char
lex-text
lex-ident
lex-tag
lex-delimiter)))
|