diff options
author | Eduardo Julian | 2017-04-05 18:05:23 -0400 |
---|---|---|
committer | Eduardo Julian | 2017-04-05 18:05:23 -0400 |
commit | adf8aeaa2a52760e294e08618e7aca5fc371fc0f (patch) | |
tree | 9247cd321dce5e2bae58d42f8ae08fe27a1a3224 /stdlib/source/lux/data/text/lexer.lux | |
parent | 6f87d469fa427dbaaaa13c0ef22626801f3f03e9 (diff) |
- Moved lux/lexer and lux/lexer/regex to lux/data/text/lexer and lux/data/text/regex.
- Moved lux/pipe to lux/control/pipe.
- Moved the @pre and @post macros to lux/control/contract.
- Improved error reporting for lux/type/auto.
- Added a test for third-order type-checking for lux/type/auto.
- Fixed a bug in the tests for lux/data/coll/vector.
Diffstat (limited to 'stdlib/source/lux/data/text/lexer.lux')
-rw-r--r-- | stdlib/source/lux/data/text/lexer.lux | 502 |
1 files changed, 502 insertions, 0 deletions
diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux new file mode 100644 index 000000000..e28cb0a68 --- /dev/null +++ b/stdlib/source/lux/data/text/lexer.lux @@ -0,0 +1,502 @@ +(;module: + [lux #- not default] + (lux (control functor + applicative + monad + codec) + (data [text "Text/" Eq<Text> Monoid<Text>] + [number "Int/" Codec<Text,Int>] + [product] + [char "Char/" Order<Char>] + maybe + ["E" error #- fail] + (coll [list "" Functor<List>])))) + +## [Types] +(type: #export (Lexer a) + (-> Text (Error [Text a]))) + +## [Structures] +(struct: #export _ (Functor Lexer) + (def: (map f fa) + (lambda [input] + (case (fa input) + (#E;Error msg) (#E;Error msg) + (#E;Success [input' output]) (#E;Success [input' (f output)]))))) + +(struct: #export _ (Applicative Lexer) + (def: functor Functor<Lexer>) + + (def: (wrap a) + (lambda [input] + (#E;Success [input a]))) + + (def: (apply ff fa) + (lambda [input] + (case (ff input) + (#E;Success [input' f]) + (case (fa input') + (#E;Success [input'' a]) + (#E;Success [input'' (f a)]) + + (#E;Error msg) + (#E;Error msg)) + + (#E;Error msg) + (#E;Error msg))))) + +(struct: #export _ (Monad Lexer) + (def: applicative Applicative<Lexer>) + + (def: (join mma) + (lambda [input] + (case (mma input) + (#E;Error msg) (#E;Error msg) + (#E;Success [input' ma]) (ma input')))) + ) + +## [Values] +## Runner +(def: #export (run' input lexer) + (All [a] (-> Text (Lexer a) (Error [Text a]))) + (lexer input)) + +(def: #export (run input lexer) + (All [a] (-> Text (Lexer a) (Error a))) + (case (lexer input) + (#E;Error msg) + (#E;Error msg) + + (#E;Success [input' output]) + (#E;Success output) + )) + +## Combinators +(def: #export (fail message) + (All [a] (-> Text (Lexer a))) + (lambda [input] + (#E;Error message))) + +(def: #export any + {#;doc "Just returns the next character without applying any logic."} + (Lexer Char) + (lambda [input] + (case [(text;nth +0 input) (text;split +1 input)] + [(#;Some output) (#;Some [_ input'])] + (#E;Success [input' output]) + + _ + (#E;Error "Can't parse character from empty text.")) + )) + +(def: #export (seq left right) + {#;doc "Sequencing combinator."} + (All [a b] (-> (Lexer a) (Lexer b) (Lexer [a b]))) + (do Monad<Lexer> + [=left left + =right right] + (wrap [=left =right]))) + +(def: #export (alt left right) + {#;doc "Heterogeneous alternative combinator."} + (All [a b] (-> (Lexer a) (Lexer b) (Lexer (| a b)))) + (lambda [input] + (case (left input) + (#E;Error msg) + (case (right input) + (#E;Error msg) + (#E;Error msg) + + (#E;Success [input' output]) + (#E;Success [input' (+1 output)])) + + (#E;Success [input' output]) + (#E;Success [input' (+0 output)])))) + +(def: #export (not! p) + {#;doc "Ensure a lexer fails."} + (All [a] (-> (Lexer a) (Lexer Unit))) + (lambda [input] + (case (p input) + (#E;Error msg) + (#E;Success [input []]) + + _ + (#E;Error "Expected to fail; yet succeeded.")))) + +(def: #export (not p) + {#;doc "Produce a character if the lexer fails."} + (All [a] (-> (Lexer a) (Lexer Char))) + (lambda [input] + (case (p input) + (#E;Error msg) + (any input) + + _ + (#E;Error "Expected to fail; yet succeeded.")))) + +(def: #export (either left right) + {#;doc "Homogeneous alternative combinator."} + (All [a] (-> (Lexer a) (Lexer a) (Lexer a))) + (lambda [input] + (case (left input) + (#E;Error msg) + (right input) + + output + output))) + +(def: #export (assert message test) + {#;doc "Fails with the given message if the test is false."} + (-> Text Bool (Lexer Unit)) + (lambda [input] + (if test + (#E;Success [input []]) + (#E;Error message)))) + +(def: #export (some p) + {#;doc "0-or-more combinator."} + (All [a] (-> (Lexer a) (Lexer (List a)))) + (lambda [input] + (case (p input) + (#E;Error msg) + (#E;Success [input (list)]) + + (#E;Success [input' x]) + (run' input' + (do Monad<Lexer> + [xs (some p)] + (wrap (#;Cons x xs))))) + )) + +(def: #export (many p) + {#;doc "1-or-more combinator."} + (All [a] (-> (Lexer a) (Lexer (List a)))) + (do Monad<Lexer> + [x p + xs (some p)] + (wrap (#;Cons x xs)))) + +(def: #export (exactly n p) + {#;doc "Lex exactly N times."} + (All [a] (-> Nat (Lexer a) (Lexer (List a)))) + (if (n.> +0 n) + (do Monad<Lexer> + [x p + xs (exactly (n.dec n) p)] + (wrap (#;Cons x xs))) + (:: Monad<Lexer> wrap (list)))) + +(def: #export (at-most n p) + {#;doc "Lex at most N times."} + (All [a] (-> Nat (Lexer a) (Lexer (List a)))) + (if (n.> +0 n) + (lambda [input] + (case (p input) + (#E;Error msg) + (#E;Success [input (list)]) + + (#E;Success [input' x]) + (run' input' + (do Monad<Lexer> + [xs (at-most (n.dec n) p)] + (wrap (#;Cons x xs)))) + )) + (:: Monad<Lexer> wrap (list)))) + +(def: #export (at-least n p) + {#;doc "Lex at least N times."} + (All [a] (-> Nat (Lexer a) (Lexer (List a)))) + (do Monad<Lexer> + [min-xs (exactly n p) + extras (some p)] + (wrap (list;concat (list min-xs extras))))) + +(def: #export (between from to p) + {#;doc "Lex between N and M times."} + (All [a] (-> Nat Nat (Lexer a) (Lexer (List a)))) + (do Monad<Lexer> + [min-xs (exactly from p) + max-xs (at-most (n.- from to) p)] + (wrap (list;concat (list min-xs max-xs))))) + +(def: #export (opt p) + {#;doc "Optionality combinator."} + (All [a] (-> (Lexer a) (Lexer (Maybe a)))) + (lambda [input] + (case (p input) + (#E;Error msg) + (#E;Success [input #;None]) + + (#E;Success [input value]) + (#E;Success [input (#;Some value)]) + ))) + +(def: #export (text test) + {#;doc "Lex a text if it matches the given sample."} + (-> Text (Lexer Text)) + (lambda [input] + (if (text;starts-with? test input) + (case (text;split (text;size test) input) + #;None (#E;Error "") + (#;Some [_ input']) (#E;Success [input' test])) + (#E;Error ($_ Text/append "Invalid match: " test " @ " (:: text;Codec<Text,Text> encode input)))) + )) + +(def: #export (sep-by sep lexer) + {#;doc "Apply a lexer multiple times, checking that a separator lexer succeeds between each time."} + (All [a b] (-> (Lexer b) (Lexer a) (Lexer (List a)))) + (do Monad<Lexer> + [?x (opt lexer)] + (case ?x + #;None + (wrap #;Nil) + + (#;Some x) + (do @ + [xs' (some (seq sep lexer))] + (wrap (#;Cons x (map product;right xs')))) + ))) + +(def: #export end + {#;doc "Ensure the lexer's input is empty."} + (Lexer Unit) + (lambda [input] + (case input + "" (#E;Success [input []]) + _ (#E;Error ($_ Text/append "The text input has not been fully consumed @ " (:: text;Codec<Text,Text> encode input))) + ))) + +(def: #export peek + {#;doc "Lex the next character (without consuming it from the input)."} + (Lexer Char) + (lambda [input] + (case (text;nth +0 input) + (#;Some output) + (#E;Success [input output]) + + _ + (#E;Error "Can't peek character from empty text.")) + )) + +(def: #export (char test) + {#;doc "Lex a character if it matches the given sample."} + (-> Char (Lexer Char)) + (lambda [input] + (case [(text;nth +0 input) (text;split +1 input)] + [(#;Some char') (#;Some [_ input'])] + (if (Char/= test char') + (#E;Success [input' test]) + (#E;Error ($_ Text/append "Expected " (:: char;Codec<Text,Char> encode test) " @ " (:: text;Codec<Text,Text> encode input)))) + + _ + (#E;Error "Can't parse character from empty text.")) + )) + +(def: #export get-input + {#;doc "Get all of the remaining input (without consuming it)."} + (Lexer Text) + (lambda [input] + (#E;Success [input input]))) + +(def: #export (char-range bottom top) + {#;doc "Only lex characters within a range."} + (-> Char Char (Lexer Char)) + (do Monad<Lexer> + [input get-input + char any + _ (assert ($_ Text/append "Character is not within range: " (:: char;Codec<Text,Char> encode bottom) "-" (:: char;Codec<Text,Char> encode top) " @ " (:: text;Codec<Text,Text> encode input)) + (and (Char/>= bottom char) + (Char/<= top char)))] + (wrap char))) + +(do-template [<name> <bottom> <top> <desc>] + [(def: #export <name> + {#;doc (#;TextA ($_ Text/append "Only lex " <desc> " characters."))} + (Lexer Char) + (char-range <bottom> <top>))] + + [upper #"A" #"Z" "uppercase"] + [lower #"a" #"z" "lowercase"] + [digit #"0" #"9" "decimal"] + [oct-digit #"0" #"7" "octal"] + ) + +(def: #export alpha + {#;doc "Only lex alphabetic characters."} + (Lexer Char) + (either lower upper)) + +(def: #export alpha-num + {#;doc "Only lex alphanumeric characters."} + (Lexer Char) + (either alpha digit)) + +(def: #export hex-digit + {#;doc "Only lex hexadecimal digits."} + (Lexer Char) + ($_ either + digit + (char-range #"a" #"f") + (char-range #"A" #"F"))) + +(def: #export (one-of options) + {#;doc "Only lex characters that are part of a piece of text."} + (-> Text (Lexer Char)) + (lambda [input] + (case (text;split +1 input) + (#;Some [init input']) + (if (text;contains? init options) + (case (text;nth +0 init) + (#;Some output) + (#E;Success [input' output]) + + _ + (#E;Error "")) + (#E;Error ($_ Text/append "Character (" init ") is not one of: " options " @ " (:: text;Codec<Text,Text> encode input)))) + + _ + (#E;Error "Can't parse character from empty text.")))) + +(def: #export (none-of options) + {#;doc "Only lex characters that aren't part of a piece of text."} + (-> Text (Lexer Char)) + (lambda [input] + (case (text;split +1 input) + (#;Some [init input']) + (if (;not (text;contains? init options)) + (case (text;nth +0 init) + (#;Some output) + (#E;Success [input' output]) + + _ + (#E;Error "")) + (#E;Error ($_ Text/append "Character (" init ") is one of: " options " @ " (:: text;Codec<Text,Text> encode input)))) + + _ + (#E;Error "Can't parse character from empty text.")))) + +(def: #export (satisfies p) + {#;doc "Only lex characters that satisfy a predicate."} + (-> (-> Char Bool) (Lexer Char)) + (lambda [input] + (case (: (Maybe [Text Char]) + (do Monad<Maybe> + [[init input'] (text;split +1 input) + output (text;nth +0 init)] + (wrap [input' output]))) + (#;Some [input' output]) + (if (p output) + (#E;Success [input' output]) + (#E;Error ($_ Text/append "Character does not satisfy predicate: " (:: text;Codec<Text,Text> encode input)))) + + _ + (#E;Error "Can't parse character from empty text.")))) + +(def: #export space + {#;doc "Only lex white-space."} + (Lexer Char) + (satisfies char;space?)) + +(do-template [<name> <base> <doc>] + [(def: #export (<name> p) + {#;doc <doc>} + (-> (Lexer Char) (Lexer Text)) + (do Monad<Lexer> + [cs (<base> p)] + (wrap (text;concat (map char;as-text cs)))))] + + [some' some "Lex some characters as a single continuous text."] + [many' many "Lex many characters as a single continuous text."] + ) + +(do-template [<name> <base> <doc>] + [(def: #export (<name> n p) + {#;doc <doc>} + (-> Nat (Lexer Char) (Lexer Text)) + (do Monad<Lexer> + [cs (<base> n p)] + (wrap (text;concat (map char;as-text cs)))))] + + [exactly' exactly "Lex exactly N characters."] + [at-most' at-most "Lex at most N characters."] + [at-least' at-least "Lex at least N characters."] + ) + +(def: #export (between' from to p) + {#;doc "Lex between N and M characters."} + (-> Nat Nat (Lexer Char) (Lexer Text)) + (do Monad<Lexer> + [cs (between from to p)] + (wrap (text;concat (map char;as-text cs))))) + +(def: #export end? + {#;doc "Ask if the lexer's input is empty."} + (Lexer Bool) + (lambda [input] + (#E;Success [input (text;empty? input)]))) + +(def: #export (_& left right) + (All [a b] (-> (Lexer a) (Lexer b) (Lexer b))) + (do Monad<Lexer> + [_ left] + right)) + +(def: #export (&_ left right) + (All [a b] (-> (Lexer a) (Lexer b) (Lexer a))) + (do Monad<Lexer> + [output left + _ right] + (wrap output))) + +(def: #export (default value lexer) + {#;doc "If the given lexer fails, this lexer will succeed with the provided value."} + (All [a] (-> a (Lexer a) (Lexer a))) + (lambda [input] + (case (lexer input) + (#E;Error error) + (#E;Success [input value]) + + (#E;Success input'+value) + (#E;Success input'+value)))) + +(def: #export (codec codec lexer) + {#;doc "Lex a token by means of a codec."} + (All [a] (-> (Codec Text a) (Lexer Text) (Lexer a))) + (lambda [input] + (case (lexer input) + (#E;Error error) + (#E;Error error) + + (#E;Success [input' to-decode]) + (case (:: codec decode to-decode) + (#E;Error error) + (#E;Error error) + + (#E;Success value) + (#E;Success [input' value]))))) + +(def: #export (enclosed [start end] lexer) + (All [a] (-> [Text Text] (Lexer a) (Lexer a))) + (_& (text start) + (&_ lexer + (text end)))) + +(def: #export (rec lexer) + (All [a] (-> (-> (Lexer a) (Lexer a)) + (Lexer a))) + (lambda [input] + (run' input (lexer (rec lexer))))) + +(def: #export (local local-input lexer) + {#;doc "Run a lexer with the given input, instead of the real one."} + (All [a] (-> Text (Lexer a) (Lexer a))) + (lambda [real-input] + (case (run' local-input lexer) + (#E;Error error) + (#E;Error error) + + (#E;Success [unconsumed value]) + (if (Text/= "" unconsumed) + (#E;Success [real-input value]) + (#E;Error ($_ Text/append "Unconsumed input: " unconsumed)))))) |