aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/lux/data/text/lexer.lux
diff options
context:
space:
mode:
authorEduardo Julian2017-04-05 18:05:23 -0400
committerEduardo Julian2017-04-05 18:05:23 -0400
commitadf8aeaa2a52760e294e08618e7aca5fc371fc0f (patch)
tree9247cd321dce5e2bae58d42f8ae08fe27a1a3224 /stdlib/source/lux/data/text/lexer.lux
parent6f87d469fa427dbaaaa13c0ef22626801f3f03e9 (diff)
- Moved lux/lexer and lux/lexer/regex to lux/data/text/lexer and lux/data/text/regex.
- Moved lux/pipe to lux/control/pipe. - Moved the @pre and @post macros to lux/control/contract. - Improved error reporting for lux/type/auto. - Added a test for third-order type-checking for lux/type/auto. - Fixed a bug in the tests for lux/data/coll/vector.
Diffstat (limited to 'stdlib/source/lux/data/text/lexer.lux')
-rw-r--r--stdlib/source/lux/data/text/lexer.lux502
1 files changed, 502 insertions, 0 deletions
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux
new file mode 100644
index 000000000..e28cb0a68
--- /dev/null
+++ b/stdlib/source/lux/data/text/lexer.lux
@@ -0,0 +1,502 @@
+(;module:
+ [lux #- not default]
+ (lux (control functor
+ applicative
+ monad
+ codec)
+ (data [text "Text/" Eq<Text> Monoid<Text>]
+ [number "Int/" Codec<Text,Int>]
+ [product]
+ [char "Char/" Order<Char>]
+ maybe
+ ["E" error #- fail]
+ (coll [list "" Functor<List>]))))
+
+## [Types]
+(type: #export (Lexer a)
+ (-> Text (Error [Text a])))
+
+## [Structures]
+(struct: #export _ (Functor Lexer)
+ (def: (map f fa)
+ (lambda [input]
+ (case (fa input)
+ (#E;Error msg) (#E;Error msg)
+ (#E;Success [input' output]) (#E;Success [input' (f output)])))))
+
+(struct: #export _ (Applicative Lexer)
+ (def: functor Functor<Lexer>)
+
+ (def: (wrap a)
+ (lambda [input]
+ (#E;Success [input a])))
+
+ (def: (apply ff fa)
+ (lambda [input]
+ (case (ff input)
+ (#E;Success [input' f])
+ (case (fa input')
+ (#E;Success [input'' a])
+ (#E;Success [input'' (f a)])
+
+ (#E;Error msg)
+ (#E;Error msg))
+
+ (#E;Error msg)
+ (#E;Error msg)))))
+
+(struct: #export _ (Monad Lexer)
+ (def: applicative Applicative<Lexer>)
+
+ (def: (join mma)
+ (lambda [input]
+ (case (mma input)
+ (#E;Error msg) (#E;Error msg)
+ (#E;Success [input' ma]) (ma input'))))
+ )
+
+## [Values]
+## Runner
+(def: #export (run' input lexer)
+ (All [a] (-> Text (Lexer a) (Error [Text a])))
+ (lexer input))
+
+(def: #export (run input lexer)
+ (All [a] (-> Text (Lexer a) (Error a)))
+ (case (lexer input)
+ (#E;Error msg)
+ (#E;Error msg)
+
+ (#E;Success [input' output])
+ (#E;Success output)
+ ))
+
+## Combinators
+(def: #export (fail message)
+ (All [a] (-> Text (Lexer a)))
+ (lambda [input]
+ (#E;Error message)))
+
+(def: #export any
+ {#;doc "Just returns the next character without applying any logic."}
+ (Lexer Char)
+ (lambda [input]
+ (case [(text;nth +0 input) (text;split +1 input)]
+ [(#;Some output) (#;Some [_ input'])]
+ (#E;Success [input' output])
+
+ _
+ (#E;Error "Can't parse character from empty text."))
+ ))
+
+(def: #export (seq left right)
+ {#;doc "Sequencing combinator."}
+ (All [a b] (-> (Lexer a) (Lexer b) (Lexer [a b])))
+ (do Monad<Lexer>
+ [=left left
+ =right right]
+ (wrap [=left =right])))
+
+(def: #export (alt left right)
+ {#;doc "Heterogeneous alternative combinator."}
+ (All [a b] (-> (Lexer a) (Lexer b) (Lexer (| a b))))
+ (lambda [input]
+ (case (left input)
+ (#E;Error msg)
+ (case (right input)
+ (#E;Error msg)
+ (#E;Error msg)
+
+ (#E;Success [input' output])
+ (#E;Success [input' (+1 output)]))
+
+ (#E;Success [input' output])
+ (#E;Success [input' (+0 output)]))))
+
+(def: #export (not! p)
+ {#;doc "Ensure a lexer fails."}
+ (All [a] (-> (Lexer a) (Lexer Unit)))
+ (lambda [input]
+ (case (p input)
+ (#E;Error msg)
+ (#E;Success [input []])
+
+ _
+ (#E;Error "Expected to fail; yet succeeded."))))
+
+(def: #export (not p)
+ {#;doc "Produce a character if the lexer fails."}
+ (All [a] (-> (Lexer a) (Lexer Char)))
+ (lambda [input]
+ (case (p input)
+ (#E;Error msg)
+ (any input)
+
+ _
+ (#E;Error "Expected to fail; yet succeeded."))))
+
+(def: #export (either left right)
+ {#;doc "Homogeneous alternative combinator."}
+ (All [a] (-> (Lexer a) (Lexer a) (Lexer a)))
+ (lambda [input]
+ (case (left input)
+ (#E;Error msg)
+ (right input)
+
+ output
+ output)))
+
+(def: #export (assert message test)
+ {#;doc "Fails with the given message if the test is false."}
+ (-> Text Bool (Lexer Unit))
+ (lambda [input]
+ (if test
+ (#E;Success [input []])
+ (#E;Error message))))
+
+(def: #export (some p)
+ {#;doc "0-or-more combinator."}
+ (All [a] (-> (Lexer a) (Lexer (List a))))
+ (lambda [input]
+ (case (p input)
+ (#E;Error msg)
+ (#E;Success [input (list)])
+
+ (#E;Success [input' x])
+ (run' input'
+ (do Monad<Lexer>
+ [xs (some p)]
+ (wrap (#;Cons x xs)))))
+ ))
+
+(def: #export (many p)
+ {#;doc "1-or-more combinator."}
+ (All [a] (-> (Lexer a) (Lexer (List a))))
+ (do Monad<Lexer>
+ [x p
+ xs (some p)]
+ (wrap (#;Cons x xs))))
+
+(def: #export (exactly n p)
+ {#;doc "Lex exactly N times."}
+ (All [a] (-> Nat (Lexer a) (Lexer (List a))))
+ (if (n.> +0 n)
+ (do Monad<Lexer>
+ [x p
+ xs (exactly (n.dec n) p)]
+ (wrap (#;Cons x xs)))
+ (:: Monad<Lexer> wrap (list))))
+
+(def: #export (at-most n p)
+ {#;doc "Lex at most N times."}
+ (All [a] (-> Nat (Lexer a) (Lexer (List a))))
+ (if (n.> +0 n)
+ (lambda [input]
+ (case (p input)
+ (#E;Error msg)
+ (#E;Success [input (list)])
+
+ (#E;Success [input' x])
+ (run' input'
+ (do Monad<Lexer>
+ [xs (at-most (n.dec n) p)]
+ (wrap (#;Cons x xs))))
+ ))
+ (:: Monad<Lexer> wrap (list))))
+
+(def: #export (at-least n p)
+ {#;doc "Lex at least N times."}
+ (All [a] (-> Nat (Lexer a) (Lexer (List a))))
+ (do Monad<Lexer>
+ [min-xs (exactly n p)
+ extras (some p)]
+ (wrap (list;concat (list min-xs extras)))))
+
+(def: #export (between from to p)
+ {#;doc "Lex between N and M times."}
+ (All [a] (-> Nat Nat (Lexer a) (Lexer (List a))))
+ (do Monad<Lexer>
+ [min-xs (exactly from p)
+ max-xs (at-most (n.- from to) p)]
+ (wrap (list;concat (list min-xs max-xs)))))
+
+(def: #export (opt p)
+ {#;doc "Optionality combinator."}
+ (All [a] (-> (Lexer a) (Lexer (Maybe a))))
+ (lambda [input]
+ (case (p input)
+ (#E;Error msg)
+ (#E;Success [input #;None])
+
+ (#E;Success [input value])
+ (#E;Success [input (#;Some value)])
+ )))
+
+(def: #export (text test)
+ {#;doc "Lex a text if it matches the given sample."}
+ (-> Text (Lexer Text))
+ (lambda [input]
+ (if (text;starts-with? test input)
+ (case (text;split (text;size test) input)
+ #;None (#E;Error "")
+ (#;Some [_ input']) (#E;Success [input' test]))
+ (#E;Error ($_ Text/append "Invalid match: " test " @ " (:: text;Codec<Text,Text> encode input))))
+ ))
+
+(def: #export (sep-by sep lexer)
+ {#;doc "Apply a lexer multiple times, checking that a separator lexer succeeds between each time."}
+ (All [a b] (-> (Lexer b) (Lexer a) (Lexer (List a))))
+ (do Monad<Lexer>
+ [?x (opt lexer)]
+ (case ?x
+ #;None
+ (wrap #;Nil)
+
+ (#;Some x)
+ (do @
+ [xs' (some (seq sep lexer))]
+ (wrap (#;Cons x (map product;right xs'))))
+ )))
+
+(def: #export end
+ {#;doc "Ensure the lexer's input is empty."}
+ (Lexer Unit)
+ (lambda [input]
+ (case input
+ "" (#E;Success [input []])
+ _ (#E;Error ($_ Text/append "The text input has not been fully consumed @ " (:: text;Codec<Text,Text> encode input)))
+ )))
+
+(def: #export peek
+ {#;doc "Lex the next character (without consuming it from the input)."}
+ (Lexer Char)
+ (lambda [input]
+ (case (text;nth +0 input)
+ (#;Some output)
+ (#E;Success [input output])
+
+ _
+ (#E;Error "Can't peek character from empty text."))
+ ))
+
+(def: #export (char test)
+ {#;doc "Lex a character if it matches the given sample."}
+ (-> Char (Lexer Char))
+ (lambda [input]
+ (case [(text;nth +0 input) (text;split +1 input)]
+ [(#;Some char') (#;Some [_ input'])]
+ (if (Char/= test char')
+ (#E;Success [input' test])
+ (#E;Error ($_ Text/append "Expected " (:: char;Codec<Text,Char> encode test) " @ " (:: text;Codec<Text,Text> encode input))))
+
+ _
+ (#E;Error "Can't parse character from empty text."))
+ ))
+
+(def: #export get-input
+ {#;doc "Get all of the remaining input (without consuming it)."}
+ (Lexer Text)
+ (lambda [input]
+ (#E;Success [input input])))
+
+(def: #export (char-range bottom top)
+ {#;doc "Only lex characters within a range."}
+ (-> Char Char (Lexer Char))
+ (do Monad<Lexer>
+ [input get-input
+ char any
+ _ (assert ($_ Text/append "Character is not within range: " (:: char;Codec<Text,Char> encode bottom) "-" (:: char;Codec<Text,Char> encode top) " @ " (:: text;Codec<Text,Text> encode input))
+ (and (Char/>= bottom char)
+ (Char/<= top char)))]
+ (wrap char)))
+
+(do-template [<name> <bottom> <top> <desc>]
+ [(def: #export <name>
+ {#;doc (#;TextA ($_ Text/append "Only lex " <desc> " characters."))}
+ (Lexer Char)
+ (char-range <bottom> <top>))]
+
+ [upper #"A" #"Z" "uppercase"]
+ [lower #"a" #"z" "lowercase"]
+ [digit #"0" #"9" "decimal"]
+ [oct-digit #"0" #"7" "octal"]
+ )
+
+(def: #export alpha
+ {#;doc "Only lex alphabetic characters."}
+ (Lexer Char)
+ (either lower upper))
+
+(def: #export alpha-num
+ {#;doc "Only lex alphanumeric characters."}
+ (Lexer Char)
+ (either alpha digit))
+
+(def: #export hex-digit
+ {#;doc "Only lex hexadecimal digits."}
+ (Lexer Char)
+ ($_ either
+ digit
+ (char-range #"a" #"f")
+ (char-range #"A" #"F")))
+
+(def: #export (one-of options)
+ {#;doc "Only lex characters that are part of a piece of text."}
+ (-> Text (Lexer Char))
+ (lambda [input]
+ (case (text;split +1 input)
+ (#;Some [init input'])
+ (if (text;contains? init options)
+ (case (text;nth +0 init)
+ (#;Some output)
+ (#E;Success [input' output])
+
+ _
+ (#E;Error ""))
+ (#E;Error ($_ Text/append "Character (" init ") is not one of: " options " @ " (:: text;Codec<Text,Text> encode input))))
+
+ _
+ (#E;Error "Can't parse character from empty text."))))
+
+(def: #export (none-of options)
+ {#;doc "Only lex characters that aren't part of a piece of text."}
+ (-> Text (Lexer Char))
+ (lambda [input]
+ (case (text;split +1 input)
+ (#;Some [init input'])
+ (if (;not (text;contains? init options))
+ (case (text;nth +0 init)
+ (#;Some output)
+ (#E;Success [input' output])
+
+ _
+ (#E;Error ""))
+ (#E;Error ($_ Text/append "Character (" init ") is one of: " options " @ " (:: text;Codec<Text,Text> encode input))))
+
+ _
+ (#E;Error "Can't parse character from empty text."))))
+
+(def: #export (satisfies p)
+ {#;doc "Only lex characters that satisfy a predicate."}
+ (-> (-> Char Bool) (Lexer Char))
+ (lambda [input]
+ (case (: (Maybe [Text Char])
+ (do Monad<Maybe>
+ [[init input'] (text;split +1 input)
+ output (text;nth +0 init)]
+ (wrap [input' output])))
+ (#;Some [input' output])
+ (if (p output)
+ (#E;Success [input' output])
+ (#E;Error ($_ Text/append "Character does not satisfy predicate: " (:: text;Codec<Text,Text> encode input))))
+
+ _
+ (#E;Error "Can't parse character from empty text."))))
+
+(def: #export space
+ {#;doc "Only lex white-space."}
+ (Lexer Char)
+ (satisfies char;space?))
+
+(do-template [<name> <base> <doc>]
+ [(def: #export (<name> p)
+ {#;doc <doc>}
+ (-> (Lexer Char) (Lexer Text))
+ (do Monad<Lexer>
+ [cs (<base> p)]
+ (wrap (text;concat (map char;as-text cs)))))]
+
+ [some' some "Lex some characters as a single continuous text."]
+ [many' many "Lex many characters as a single continuous text."]
+ )
+
+(do-template [<name> <base> <doc>]
+ [(def: #export (<name> n p)
+ {#;doc <doc>}
+ (-> Nat (Lexer Char) (Lexer Text))
+ (do Monad<Lexer>
+ [cs (<base> n p)]
+ (wrap (text;concat (map char;as-text cs)))))]
+
+ [exactly' exactly "Lex exactly N characters."]
+ [at-most' at-most "Lex at most N characters."]
+ [at-least' at-least "Lex at least N characters."]
+ )
+
+(def: #export (between' from to p)
+ {#;doc "Lex between N and M characters."}
+ (-> Nat Nat (Lexer Char) (Lexer Text))
+ (do Monad<Lexer>
+ [cs (between from to p)]
+ (wrap (text;concat (map char;as-text cs)))))
+
+(def: #export end?
+ {#;doc "Ask if the lexer's input is empty."}
+ (Lexer Bool)
+ (lambda [input]
+ (#E;Success [input (text;empty? input)])))
+
+(def: #export (_& left right)
+ (All [a b] (-> (Lexer a) (Lexer b) (Lexer b)))
+ (do Monad<Lexer>
+ [_ left]
+ right))
+
+(def: #export (&_ left right)
+ (All [a b] (-> (Lexer a) (Lexer b) (Lexer a)))
+ (do Monad<Lexer>
+ [output left
+ _ right]
+ (wrap output)))
+
+(def: #export (default value lexer)
+ {#;doc "If the given lexer fails, this lexer will succeed with the provided value."}
+ (All [a] (-> a (Lexer a) (Lexer a)))
+ (lambda [input]
+ (case (lexer input)
+ (#E;Error error)
+ (#E;Success [input value])
+
+ (#E;Success input'+value)
+ (#E;Success input'+value))))
+
+(def: #export (codec codec lexer)
+ {#;doc "Lex a token by means of a codec."}
+ (All [a] (-> (Codec Text a) (Lexer Text) (Lexer a)))
+ (lambda [input]
+ (case (lexer input)
+ (#E;Error error)
+ (#E;Error error)
+
+ (#E;Success [input' to-decode])
+ (case (:: codec decode to-decode)
+ (#E;Error error)
+ (#E;Error error)
+
+ (#E;Success value)
+ (#E;Success [input' value])))))
+
+(def: #export (enclosed [start end] lexer)
+ (All [a] (-> [Text Text] (Lexer a) (Lexer a)))
+ (_& (text start)
+ (&_ lexer
+ (text end))))
+
+(def: #export (rec lexer)
+ (All [a] (-> (-> (Lexer a) (Lexer a))
+ (Lexer a)))
+ (lambda [input]
+ (run' input (lexer (rec lexer)))))
+
+(def: #export (local local-input lexer)
+ {#;doc "Run a lexer with the given input, instead of the real one."}
+ (All [a] (-> Text (Lexer a) (Lexer a)))
+ (lambda [real-input]
+ (case (run' local-input lexer)
+ (#E;Error error)
+ (#E;Error error)
+
+ (#E;Success [unconsumed value])
+ (if (Text/= "" unconsumed)
+ (#E;Success [real-input value])
+ (#E;Error ($_ Text/append "Unconsumed input: " unconsumed))))))