aboutsummaryrefslogtreecommitdiff
path: root/new-luxc/source/luxc
diff options
context:
space:
mode:
authorEduardo Julian2017-04-14 23:08:41 -0400
committerEduardo Julian2017-04-14 23:08:41 -0400
commit368226ff76ea0a9e29b333b82f287305051593cb (patch)
treeb4bd4e33cb56218f5af457ad62e1f8e6cc0130ff /new-luxc/source/luxc
parentf619a094d6044635176bd2736f8fd0ca1c7372d9 (diff)
- The lexer now takes cursors into account.
Diffstat (limited to '')
-rw-r--r--new-luxc/source/luxc/parser.lux345
1 files changed, 228 insertions, 117 deletions
diff --git a/new-luxc/source/luxc/parser.lux b/new-luxc/source/luxc/parser.lux
index 010911128..10f406bc7 100644
--- a/new-luxc/source/luxc/parser.lux
+++ b/new-luxc/source/luxc/parser.lux
@@ -1,11 +1,15 @@
(;module:
lux
(lux (control monad)
- (data [char]
+ (data [bool]
+ [char]
[text]
[number]
(text ["l" lexer #+ Lexer Monad<Lexer> "l/" Monad<Lexer>]
- format))))
+ format)
+ [product]
+ (coll [list "L/" Functor<List> Fold<List>]
+ ["V" vector]))))
(def: default-cursor
Cursor
@@ -13,55 +17,85 @@
#;line +0
#;column +0})
-(def: space^
- (Lexer Text)
- (l;some' l;space))
+(def: (space^ where)
+ (-> Cursor (Lexer [Text Cursor]))
+ (do Monad<Lexer>
+ [head (l;some' (l;one-of "\t\v \r\f"))]
+ (l;either (l;after (l;one-of "\n")
+ (do @
+ [[tail end] (space^ (|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0)))]
+ (wrap [(format head tail)
+ end])))
+ (wrap [head
+ (|> where
+ (update@ #;column (n.+ (text;size head))))]))))
-(def: single-line-comment^
- (Lexer Text)
+(def: (single-line-comment^ where)
+ (-> Cursor (Lexer [Text Cursor]))
(l;enclosed ["##" "\n"]
- (l;some' (l;none-of "\n"))))
+ (do Monad<Lexer>
+ [comment (l;some' (l;none-of "\n"))]
+ (wrap [comment
+ (|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0))]))))
-(def: multi-line-comment^
+(def: comment-bound^
(Lexer Text)
- (let [bound^ (l;alt (l;text ")#")
- (l;text "#("))]
- (l;rec
- (function [multi-line-comment^]
- (do Monad<Lexer>
- [_ (l;text "#(")
- chunks (l;some (l;either (l;some' (l;not bound^))
- (do @
- [sub multi-line-comment^]
- (wrap (format "#(" sub ")#")))))
- _ (l;text ")#")]
- (wrap (text;join-with "" chunks)))))))
+ ($_ l;either
+ (l;text "\n")
+ (l;text ")#")
+ (l;text "#(")))
-(def: comment^
- (Lexer Text)
- (l;either single-line-comment^
- multi-line-comment^))
+(def: (multi-line-comment^ where)
+ (-> Cursor (Lexer [Text Cursor]))
+ (do Monad<Lexer>
+ [_ (l;text "#(")
+ [comment end] (loop [comment ""
+ where (|> where
+ (update@ #;column (n.+ +2)))]
+ ($_ l;either
+ (do @
+ [_ (l;one-of "\n")]
+ (recur (format comment "\n")
+ (|> where
+ (update@ #;line n.inc)
+ (set@ #;column +0))))
+ (do @
+ [chunk (l;some' (l;not comment-bound^))]
+ (recur (format comment chunk)
+ (|> where
+ (update@ #;column (n.+ (text;size chunk))))))
+ (do @
+ [[sub-comment sub-where] (multi-line-comment^ where)]
+ (wrap [(format comment "#(" sub-comment ")#")
+ sub-where]))))
+ _ (l;text ")#")]
+ (wrap [comment
+ (|> end
+ (update@ #;column (n.+ +2)))])))
-(def: padded^
- (All [a] (-> (Lexer a) (Lexer a)))
- (let [padding^ (l;either space^ comment^)]
- (|>. (l;before padding^)
- (l;after padding^))))
+(def: (comment^ where)
+ (-> Cursor (Lexer [Text Cursor]))
+ (l;either (single-line-comment^ where)
+ (multi-line-comment^ where)))
(def: escaped-char^
- (Lexer Char)
+ (Lexer [Text Char])
(l;after (l;char #"\\")
(do Monad<Lexer>
[code l;any]
(case code
- #"t" (wrap #"\t")
- #"v" (wrap #"\v")
- #"b" (wrap #"\b")
- #"n" (wrap #"\n")
- #"r" (wrap #"\r")
- #"f" (wrap #"\f")
- #"\"" (wrap #"\"")
- #"\\" (wrap #"\\")
+ #"t" (wrap ["\\t" #"\t"])
+ #"v" (wrap ["\\v" #"\v"])
+ #"b" (wrap ["\\b" #"\b"])
+ #"n" (wrap ["\\n" #"\n"])
+ #"r" (wrap ["\\r" #"\r"])
+ #"f" (wrap ["\\f" #"\f"])
+ #"\"" (wrap ["\\\"" #"\""])
+ #"\\" (wrap ["\\\\" #"\\"])
#"u"
(do Monad<Lexer>
@@ -69,7 +103,7 @@
(wrap (case (:: number;Hex@Codec<Text,Nat> decode
(format "+" code))
(#;Right value)
- (char;char value)
+ [(format "\\u" code) (char;char value)]
_
(undefined))))
@@ -78,70 +112,137 @@
(l;fail (format "Invalid escaping syntax: " (%c code)))))))
(def: raw-char^
- (Lexer Char)
- (l;either (l;none-of "\\\"\n")
+ (Lexer [Text Char])
+ (l;either (do Monad<Lexer>
+ [char (l;none-of "\\\"\n")]
+ (wrap [(char;as-text char) char]))
escaped-char^))
-(do-template [<name> <tag> <lexer>]
- [(def: <name>
- (Lexer AST)
+(do-template [<name> <tag> <lexer> <codec>]
+ [(def: (<name> where)
+ (-> Cursor (Lexer [AST Cursor]))
(do Monad<Lexer>
- [value <lexer>]
- (wrap [default-cursor (<tag> value)])))]
+ [chunk <lexer>]
+ (case (:: <codec> decode chunk)
+ (#;Left error)
+ (l;fail error)
+
+ (#;Right value)
+ (wrap [[default-cursor (<tag> value)]
+ (|> where
+ (update@ #;column (n.+ (text;size chunk))))]))))]
[bool^ #;BoolS
- (l;either (l;after (l;text "true") (l/wrap true))
- (l;after (l;text "false") (l/wrap false)))]
+ (l;either (l;text "true") (l;text "false"))
+ bool;Codec<Text,Bool>]
[nat^ #;NatS
- (l;codec number;Codec<Text,Nat>
- (do @
- [sign (l;text "+")
- digits (l;many' l;digit)]
- (wrap (format sign digits))))]
+ (l;seq' (l;text "+") (l;many' l;digit))
+ number;Codec<Text,Nat>]
[int^ #;IntS
- (l;codec number;Codec<Text,Int>
- (do @
- [sign (l;opt (l;text "-"))
- digits (l;many' l;digit)]
- (wrap (format (default "" sign)
- digits))))]
+ (l;seq' (l;default "" (l;text "-"))
+ (l;many' l;digit))
+ number;Codec<Text,Int>]
[real^ #;RealS
- (l;codec number;Codec<Text,Real>
- (do @
- [sign (l;opt (l;text "-"))
- whole (l;many' l;digit)
- _ (l;text ".")
- frac (l;many' l;digit)]
- (wrap (format (default "" sign)
- whole
- "."
- frac))))]
+ ($_ l;seq'
+ (l;default "" (l;text "-"))
+ (l;many' l;digit)
+ (l;text ".")
+ (l;many' l;digit))
+ number;Codec<Text,Real>]
[deg^ #;DegS
- (l;codec number;Codec<Text,Deg>
- (do @
- [_ (l;text ".")
- frac (l;many' l;digit)]
- (wrap (format "." frac))))]
- [char^ #;CharS
- (l;enclosed ["#\"" "\""] raw-char^)]
- [text^ #;TextS
- (l;enclosed ["\"" "\""]
- (l;some' raw-char^))]
+ (l;seq' (l;text ".")
+ (l;many' l;digit))
+ number;Codec<Text,Deg>]
)
-(do-template [<name> <tag> <open> <close> <lexer>]
- [(def: (<name> ast^)
- (-> (Lexer AST) (Lexer AST))
+(def: (char^ where)
+ (-> Cursor (Lexer [AST Cursor]))
+ (do Monad<Lexer>
+ [[chunk value] (l;enclosed ["#\"" "\""]
+ raw-char^)]
+ (wrap [[default-cursor (#;CharS value)]
+ (|> where
+ (update@ #;column (function [column]
+ ($_ n.+
+ +3
+ column
+ (text;size chunk)))))])))
+
+(def: (text^ where)
+ (-> Cursor (Lexer [AST Cursor]))
+ (do Monad<Lexer>
+ [_ (l;text "\"")
+ [columns-read text-read] (loop [columns-read +1
+ text-read ""]
+ ($_ l;either
+ (do @
+ [normal (l;many' (l;none-of "\\\"\n"))]
+ (recur (n.+ columns-read (text;size normal))
+ (format text-read normal)))
+ (do @
+ [_ (l;text "\"")]
+ (wrap [(n.inc columns-read)
+ text-read]))
+ (do @
+ [[chunk char] escaped-char^]
+ (wrap [(n.+ columns-read (text;size chunk))
+ (format text-read (char;as-text char))]))))]
+ (wrap [[default-cursor (#;TextS text-read)]
+ (|> where
+ (update@ #;column (n.+ columns-read)))])))
+
+(do-template [<name> <tag> <open> <close>]
+ [(def: (<name> where ast^)
+ (-> Cursor
+ (-> Cursor (Lexer [AST Cursor]))
+ (Lexer [AST Cursor]))
(do Monad<Lexer>
- [elems (l;enclosed [<open> <close>]
- (l;some <lexer>))]
- (wrap [default-cursor (<tag> elems)])))]
+ [_ (l;text <open>)
+ [elems where'] (loop [elems (: (V;Vector AST)
+ V;empty)
+ where where]
+ (l;either (do @
+ [[elem where'] (ast^ where)]
+ (recur (V;add elem elems)
+ where'))
+ (do @
+ [[_ where'] (l;either (space^ where)
+ (comment^ where))
+ _ (l;text <close>)]
+ (wrap [(V;to-list elems)
+ (|> where'
+ (update@ #;column n.inc))]))))]
+ (wrap [[default-cursor (<tag> elems)]
+ where'])))]
- [form^ #;FormS "(" ")" ast^]
- [tuple^ #;TupleS "[" "]" ast^]
- [record^ #;RecordS "{" "}" (l;seq ast^ ast^)]
+ [form^ #;FormS "(" ")"]
+ [tuple^ #;TupleS "[" "]"]
)
+(def: (record^ where ast^)
+ (-> Cursor
+ (-> Cursor (Lexer [AST Cursor]))
+ (Lexer [AST Cursor]))
+ (do Monad<Lexer>
+ [_ (l;text "{")
+ [elems where'] (loop [elems (: (V;Vector [AST AST])
+ V;empty)
+ where where]
+ (l;either (do @
+ [[key where] (ast^ where)
+ [val where'] (ast^ where)]
+ (recur (V;add [key val] elems)
+ where'))
+ (do @
+ [[_ where'] (l;either (space^ where)
+ (comment^ where))
+ _ (l;text "}")]
+ (wrap [(V;to-list elems)
+ (|> where'
+ (update@ #;column n.inc))]))))]
+ (wrap [[default-cursor (#;RecordS elems)]
+ where'])))
+
(def: ident-part^
(Lexer Text)
(do Monad<Lexer>
@@ -156,49 +257,59 @@
tail))))
(def: ident^
- (Lexer Ident)
+ (Lexer [Ident Nat])
($_ l;either
(do Monad<Lexer>
[_ (l;text ";;")
def-name ident-part^]
(l;fail "Cannot handle ;; syntax for identifiers."))
- (l;seq (l;after (l;text ";")
- (l/wrap "lux"))
- ident-part^)
+ (do Monad<Lexer>
+ [_ (l;text ";")
+ def-name ident-part^]
+ (wrap [["lux" def-name]
+ (n.inc (text;size def-name))]))
(do Monad<Lexer>
[first-part ident-part^]
(l;either (do @
[_ (l;char #";")
second-part ident-part^]
- (wrap [first-part second-part]))
- (wrap ["" first-part])))))
+ (wrap [[first-part second-part]
+ ($_ n.+
+ (text;size first-part)
+ +1
+ (text;size second-part))]))
+ (wrap [["" first-part]
+ (text;size first-part)])))))
(do-template [<name> <tag> <lexer>]
- [(def: <name>
- (Lexer AST)
+ [(def: (<name> where)
+ (-> Cursor (Lexer [AST Cursor]))
(do Monad<Lexer>
- [value <lexer>]
- (wrap [default-cursor (<tag> value)])))]
+ [[value length] <lexer>]
+ (wrap [[default-cursor (<tag> value)]
+ (|> where
+ (update@ #;column (n.+ length)))])))]
[symbol^ #;SymbolS ident^]
[tag^ #;TagS (l;after (l;char #"#") ident^)]
)
-(def: #export ast^
- (Lexer AST)
- (l;rec (function [ast^]
- (padded^
- ($_ l;either
- bool^
- nat^
- real^
- int^
- deg^
- char^
- text^
- symbol^
- tag^
- (form^ ast^)
- (tuple^ ast^)
- (record^ ast^)
- )))))
+(def: #export (ast^ where)
+ (-> Cursor (Lexer [AST Cursor]))
+ (do Monad<Lexer>
+ [[_ where] (l;either (space^ where)
+ (comment^ where))]
+ ($_ l;either
+ (bool^ where)
+ (nat^ where)
+ (real^ where)
+ (int^ where)
+ (deg^ where)
+ (char^ where)
+ (text^ where)
+ (symbol^ where)
+ (tag^ where)
+ (form^ where ast^)
+ (tuple^ where ast^)
+ (record^ where ast^)
+ )))