diff options
Diffstat (limited to 'stdlib/source/library/lux/control/parser/text.lux')
-rw-r--r-- | stdlib/source/library/lux/control/parser/text.lux | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/control/parser/text.lux b/stdlib/source/library/lux/control/parser/text.lux new file mode 100644 index 000000000..cfd1ab891 --- /dev/null +++ b/stdlib/source/library/lux/control/parser/text.lux @@ -0,0 +1,377 @@ +(.module: + [library + [lux (#- or and not) + [abstract + [monad (#+ Monad do)]] + [control + ["." try (#+ Try)] + ["." exception (#+ exception:)]] + [data + ["/" text (#+ Char) ("#\." monoid)] + ["." product] + ["." maybe] + [collection + ["." list ("#\." fold)]]] + [macro + ["." code]] + [math + [number + ["n" nat ("#\." decimal)]]]]] + ["." //]) + +(type: #export Offset Nat) + +(def: start_offset Offset 0) + +(type: #export Parser + (//.Parser [Offset Text])) + +(type: #export Slice + {#basis Offset + #distance Offset}) + +(def: (remaining offset tape) + (-> Offset Text Text) + (|> tape (/.split offset) maybe.assume product.right)) + +(exception: #export (unconsumed_input {offset Offset} {tape Text}) + (exception.report + ["Offset" (n\encode offset)] + ["Input size" (n\encode (/.size tape))] + ["Remaining input" (remaining offset tape)])) + +(exception: #export (expected_to_fail {offset Offset} {tape Text}) + (exception.report + ["Offset" (n\encode offset)] + ["Input" (remaining offset tape)])) + +(exception: #export cannot_parse) +(exception: #export cannot_slice) + +(def: #export (run parser input) + (All [a] (-> (Parser a) Text (Try a))) + (case (parser [start_offset input]) + (#try.Failure msg) + (#try.Failure msg) + + (#try.Success [[end_offset _] output]) + (if (n.= end_offset (/.size input)) + (#try.Success output) + (exception.throw ..unconsumed_input [end_offset input])))) + +(def: #export offset + (Parser Offset) + (function (_ (^@ input [offset tape])) + (#try.Success [input offset]))) + +(def: (with_slices parser) + (-> (Parser (List Slice)) (Parser Slice)) + (do //.monad + [offset ..offset + slices parser] + (wrap (list\fold (function (_ [slice::basis slice::distance] + [total::basis total::distance]) + [total::basis ("lux i64 +" slice::distance total::distance)]) + {#basis offset + #distance 0} + slices)))) + +(def: #export any + {#.doc "Just returns the next character without applying any logic."} + (Parser Text) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (#try.Success [[("lux i64 +" 1 offset) tape] (/.from_code output)]) + + _ + (exception.throw ..cannot_parse [])))) + +(def: #export any! + {#.doc "Just returns the next character without applying any logic."} + (Parser Slice) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some _) + (#try.Success [[("lux i64 +" 1 offset) tape] + {#basis offset + #distance 1}]) + + _ + (exception.throw ..cannot_slice [])))) + +(template [<name> <type> <any>] + [(def: #export (<name> p) + {#.doc "Produce a character if the parser fails."} + (All [a] (-> (Parser a) (Parser <type>))) + (function (_ input) + (case (p input) + (#try.Failure msg) + (<any> input) + + _ + (exception.throw ..expected_to_fail input))))] + + [not Text ..any] + [not! Slice ..any!] + ) + +(exception: #export (cannot_match {reference Text}) + (exception.report + ["Reference" (/.format reference)])) + +(def: #export (this reference) + {#.doc "Lex a text if it matches the given sample."} + (-> Text (Parser Any)) + (function (_ [offset tape]) + (case (/.index_of' reference offset tape) + (#.Some where) + (if (n.= offset where) + (#try.Success [[("lux i64 +" (/.size reference) offset) tape] + []]) + (exception.throw ..cannot_match [reference])) + + _ + (exception.throw ..cannot_match [reference])))) + +(def: #export end! + {#.doc "Ensure the parser's input is empty."} + (Parser Any) + (function (_ (^@ input [offset tape])) + (if (n.= offset (/.size tape)) + (#try.Success [input []]) + (exception.throw ..unconsumed_input input)))) + +(def: #export peek + {#.doc "Lex the next character (without consuming it from the input)."} + (Parser Text) + (function (_ (^@ input [offset tape])) + (case (/.nth offset tape) + (#.Some output) + (#try.Success [input (/.from_code output)]) + + _ + (exception.throw ..cannot_parse [])))) + +(def: #export get_input + {#.doc "Get all of the remaining input (without consuming it)."} + (Parser Text) + (function (_ (^@ input [offset tape])) + (#try.Success [input (remaining offset tape)]))) + +(def: #export (range bottom top) + {#.doc "Only lex characters within a range."} + (-> Nat Nat (Parser Text)) + (do //.monad + [char any + #let [char' (maybe.assume (/.nth 0 char))] + _ (//.assert ($_ /\compose "Character is not within range: " (/.from_code bottom) "-" (/.from_code top)) + (.and (n.>= bottom char') + (n.<= top char')))] + (wrap char))) + +(template [<name> <bottom> <top> <desc>] + [(def: #export <name> + {#.doc (code.text ($_ /\compose "Only lex " <desc> " characters."))} + (Parser Text) + (..range (char <bottom>) (char <top>)))] + + [upper "A" "Z" "uppercase"] + [lower "a" "z" "lowercase"] + [decimal "0" "9" "decimal"] + [octal "0" "7" "octal"] + ) + +(def: #export alpha + {#.doc "Only lex alphabetic characters."} + (Parser Text) + (//.either lower upper)) + +(def: #export alpha_num + {#.doc "Only lex alphanumeric characters."} + (Parser Text) + (//.either alpha decimal)) + +(def: #export hexadecimal + {#.doc "Only lex hexadecimal digits."} + (Parser Text) + ($_ //.either + decimal + (range (char "a") (char "f")) + (range (char "A") (char "F")))) + +(template [<name>] + [(exception: #export (<name> {options Text} {character Char}) + (exception.report + ["Options" (/.format options)] + ["Character" (/.format (/.from_code character))]))] + + [character_should_be] + [character_should_not_be] + ) + +(template [<name> <modifier> <exception> <description_modifier>] + [(def: #export (<name> options) + {#.doc (code.text ($_ /\compose "Only lex characters that are" <description_modifier> " part of a piece of text."))} + (-> Text (Parser Text)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (let [output' (/.from_code output)] + (if (<modifier> (/.contains? output' options)) + (#try.Success [[("lux i64 +" 1 offset) tape] output']) + (exception.throw <exception> [options output]))) + + _ + (exception.throw ..cannot_parse []))))] + + [one_of |> ..character_should_be ""] + [none_of .not ..character_should_not_be " not"] + ) + +(template [<name> <modifier> <exception> <description_modifier>] + [(def: #export (<name> options) + {#.doc (code.text ($_ /\compose "Only lex characters that are" <description_modifier> " part of a piece of text."))} + (-> Text (Parser Slice)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (let [output' (/.from_code output)] + (if (<modifier> (/.contains? output' options)) + (#try.Success [[("lux i64 +" 1 offset) tape] + {#basis offset + #distance 1}]) + (exception.throw <exception> [options output]))) + + _ + (exception.throw ..cannot_slice []))))] + + [one_of! |> ..character_should_be ""] + [none_of! .not ..character_should_not_be " not"] + ) + +(exception: #export (character_does_not_satisfy_predicate {character Char}) + (exception.report + ["Character" (/.format (/.from_code character))])) + +(def: #export (satisfies p) + {#.doc "Only lex characters that satisfy a predicate."} + (-> (-> Char Bit) (Parser Text)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (if (p output) + (#try.Success [[("lux i64 +" 1 offset) tape] (/.from_code output)]) + (exception.throw ..character_does_not_satisfy_predicate [output])) + + _ + (exception.throw ..cannot_parse [])))) + +(def: #export space + {#.doc "Only lex white-space."} + (Parser Text) + (..satisfies /.space?)) + +(def: #export (and left right) + (-> (Parser Text) (Parser Text) (Parser Text)) + (do //.monad + [=left left + =right right] + (wrap ($_ /\compose =left =right)))) + +(def: #export (and! left right) + (-> (Parser Slice) (Parser Slice) (Parser Slice)) + (do //.monad + [[left::basis left::distance] left + [right::basis right::distance] right] + (wrap [left::basis ("lux i64 +" left::distance right::distance)]))) + +(template [<name> <base> <doc_modifier>] + [(def: #export (<name> parser) + {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " characters as a single continuous text."))} + (-> (Parser Text) (Parser Text)) + (|> parser <base> (\ //.monad map /.concat)))] + + [some //.some "some"] + [many //.many "many"] + ) + +(template [<name> <base> <doc_modifier>] + [(def: #export (<name> parser) + {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " characters as a single continuous text."))} + (-> (Parser Slice) (Parser Slice)) + (with_slices (<base> parser)))] + + [some! //.some "some"] + [many! //.many "many"] + ) + +(template [<name> <base> <doc_modifier>] + [(def: #export (<name> amount parser) + {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " N characters."))} + (-> Nat (Parser Text) (Parser Text)) + (|> parser (<base> amount) (\ //.monad map /.concat)))] + + [exactly //.exactly "exactly"] + [at_most //.at_most "at most"] + [at_least //.at_least "at least"] + ) + +(template [<name> <base> <doc_modifier>] + [(def: #export (<name> amount parser) + {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " N characters."))} + (-> Nat (Parser Slice) (Parser Slice)) + (with_slices (<base> amount parser)))] + + [exactly! //.exactly "exactly"] + [at_most! //.at_most "at most"] + [at_least! //.at_least "at least"] + ) + +(def: #export (between from to parser) + {#.doc "Lex between N and M characters."} + (-> Nat Nat (Parser Text) (Parser Text)) + (|> parser (//.between from to) (\ //.monad map /.concat))) + +(def: #export (between! from to parser) + {#.doc "Lex between N and M characters."} + (-> Nat Nat (Parser Slice) (Parser Slice)) + (with_slices (//.between from to parser))) + +(def: #export (enclosed [start end] parser) + (All [a] (-> [Text Text] (Parser a) (Parser a))) + (|> parser + (//.before (this end)) + (//.after (this start)))) + +(def: #export (local local_input parser) + {#.doc "Run a parser with the given input, instead of the real one."} + (All [a] (-> Text (Parser a) (Parser a))) + (function (_ real_input) + (case (..run parser local_input) + (#try.Failure error) + (#try.Failure error) + + (#try.Success value) + (#try.Success [real_input value])))) + +(def: #export (slice parser) + (-> (Parser Slice) (Parser Text)) + (do //.monad + [[basis distance] parser] + (function (_ (^@ input [offset tape])) + (case (/.clip basis distance tape) + (#.Some output) + (#try.Success [input output]) + + #.None + (exception.throw ..cannot_slice []))))) + +(def: #export (embed structured text) + (All [s a] + (-> (Parser a) + (//.Parser s Text) + (//.Parser s a))) + (do //.monad + [raw text] + (//.lift (..run structured raw)))) |