aboutsummaryrefslogtreecommitdiff
path: root/stdlib/source/library/lux/control/parser/text.lux
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/source/library/lux/control/parser/text.lux')
-rw-r--r--stdlib/source/library/lux/control/parser/text.lux377
1 files changed, 377 insertions, 0 deletions
diff --git a/stdlib/source/library/lux/control/parser/text.lux b/stdlib/source/library/lux/control/parser/text.lux
new file mode 100644
index 000000000..cfd1ab891
--- /dev/null
+++ b/stdlib/source/library/lux/control/parser/text.lux
@@ -0,0 +1,377 @@
+(.module:
+ [library
+ [lux (#- or and not)
+ [abstract
+ [monad (#+ Monad do)]]
+ [control
+ ["." try (#+ Try)]
+ ["." exception (#+ exception:)]]
+ [data
+ ["/" text (#+ Char) ("#\." monoid)]
+ ["." product]
+ ["." maybe]
+ [collection
+ ["." list ("#\." fold)]]]
+ [macro
+ ["." code]]
+ [math
+ [number
+ ["n" nat ("#\." decimal)]]]]]
+ ["." //])
+
+(type: #export Offset Nat)
+
+(def: start_offset Offset 0)
+
+(type: #export Parser
+ (//.Parser [Offset Text]))
+
+(type: #export Slice
+ {#basis Offset
+ #distance Offset})
+
+(def: (remaining offset tape)
+ (-> Offset Text Text)
+ (|> tape (/.split offset) maybe.assume product.right))
+
+(exception: #export (unconsumed_input {offset Offset} {tape Text})
+ (exception.report
+ ["Offset" (n\encode offset)]
+ ["Input size" (n\encode (/.size tape))]
+ ["Remaining input" (remaining offset tape)]))
+
+(exception: #export (expected_to_fail {offset Offset} {tape Text})
+ (exception.report
+ ["Offset" (n\encode offset)]
+ ["Input" (remaining offset tape)]))
+
+(exception: #export cannot_parse)
+(exception: #export cannot_slice)
+
+(def: #export (run parser input)
+ (All [a] (-> (Parser a) Text (Try a)))
+ (case (parser [start_offset input])
+ (#try.Failure msg)
+ (#try.Failure msg)
+
+ (#try.Success [[end_offset _] output])
+ (if (n.= end_offset (/.size input))
+ (#try.Success output)
+ (exception.throw ..unconsumed_input [end_offset input]))))
+
+(def: #export offset
+ (Parser Offset)
+ (function (_ (^@ input [offset tape]))
+ (#try.Success [input offset])))
+
+(def: (with_slices parser)
+ (-> (Parser (List Slice)) (Parser Slice))
+ (do //.monad
+ [offset ..offset
+ slices parser]
+ (wrap (list\fold (function (_ [slice::basis slice::distance]
+ [total::basis total::distance])
+ [total::basis ("lux i64 +" slice::distance total::distance)])
+ {#basis offset
+ #distance 0}
+ slices))))
+
+(def: #export any
+ {#.doc "Just returns the next character without applying any logic."}
+ (Parser Text)
+ (function (_ [offset tape])
+ (case (/.nth offset tape)
+ (#.Some output)
+ (#try.Success [[("lux i64 +" 1 offset) tape] (/.from_code output)])
+
+ _
+ (exception.throw ..cannot_parse []))))
+
+(def: #export any!
+ {#.doc "Just returns the next character without applying any logic."}
+ (Parser Slice)
+ (function (_ [offset tape])
+ (case (/.nth offset tape)
+ (#.Some _)
+ (#try.Success [[("lux i64 +" 1 offset) tape]
+ {#basis offset
+ #distance 1}])
+
+ _
+ (exception.throw ..cannot_slice []))))
+
+(template [<name> <type> <any>]
+ [(def: #export (<name> p)
+ {#.doc "Produce a character if the parser fails."}
+ (All [a] (-> (Parser a) (Parser <type>)))
+ (function (_ input)
+ (case (p input)
+ (#try.Failure msg)
+ (<any> input)
+
+ _
+ (exception.throw ..expected_to_fail input))))]
+
+ [not Text ..any]
+ [not! Slice ..any!]
+ )
+
+(exception: #export (cannot_match {reference Text})
+ (exception.report
+ ["Reference" (/.format reference)]))
+
+(def: #export (this reference)
+ {#.doc "Lex a text if it matches the given sample."}
+ (-> Text (Parser Any))
+ (function (_ [offset tape])
+ (case (/.index_of' reference offset tape)
+ (#.Some where)
+ (if (n.= offset where)
+ (#try.Success [[("lux i64 +" (/.size reference) offset) tape]
+ []])
+ (exception.throw ..cannot_match [reference]))
+
+ _
+ (exception.throw ..cannot_match [reference]))))
+
+(def: #export end!
+ {#.doc "Ensure the parser's input is empty."}
+ (Parser Any)
+ (function (_ (^@ input [offset tape]))
+ (if (n.= offset (/.size tape))
+ (#try.Success [input []])
+ (exception.throw ..unconsumed_input input))))
+
+(def: #export peek
+ {#.doc "Lex the next character (without consuming it from the input)."}
+ (Parser Text)
+ (function (_ (^@ input [offset tape]))
+ (case (/.nth offset tape)
+ (#.Some output)
+ (#try.Success [input (/.from_code output)])
+
+ _
+ (exception.throw ..cannot_parse []))))
+
+(def: #export get_input
+ {#.doc "Get all of the remaining input (without consuming it)."}
+ (Parser Text)
+ (function (_ (^@ input [offset tape]))
+ (#try.Success [input (remaining offset tape)])))
+
+(def: #export (range bottom top)
+ {#.doc "Only lex characters within a range."}
+ (-> Nat Nat (Parser Text))
+ (do //.monad
+ [char any
+ #let [char' (maybe.assume (/.nth 0 char))]
+ _ (//.assert ($_ /\compose "Character is not within range: " (/.from_code bottom) "-" (/.from_code top))
+ (.and (n.>= bottom char')
+ (n.<= top char')))]
+ (wrap char)))
+
+(template [<name> <bottom> <top> <desc>]
+ [(def: #export <name>
+ {#.doc (code.text ($_ /\compose "Only lex " <desc> " characters."))}
+ (Parser Text)
+ (..range (char <bottom>) (char <top>)))]
+
+ [upper "A" "Z" "uppercase"]
+ [lower "a" "z" "lowercase"]
+ [decimal "0" "9" "decimal"]
+ [octal "0" "7" "octal"]
+ )
+
+(def: #export alpha
+ {#.doc "Only lex alphabetic characters."}
+ (Parser Text)
+ (//.either lower upper))
+
+(def: #export alpha_num
+ {#.doc "Only lex alphanumeric characters."}
+ (Parser Text)
+ (//.either alpha decimal))
+
+(def: #export hexadecimal
+ {#.doc "Only lex hexadecimal digits."}
+ (Parser Text)
+ ($_ //.either
+ decimal
+ (range (char "a") (char "f"))
+ (range (char "A") (char "F"))))
+
+(template [<name>]
+ [(exception: #export (<name> {options Text} {character Char})
+ (exception.report
+ ["Options" (/.format options)]
+ ["Character" (/.format (/.from_code character))]))]
+
+ [character_should_be]
+ [character_should_not_be]
+ )
+
+(template [<name> <modifier> <exception> <description_modifier>]
+ [(def: #export (<name> options)
+ {#.doc (code.text ($_ /\compose "Only lex characters that are" <description_modifier> " part of a piece of text."))}
+ (-> Text (Parser Text))
+ (function (_ [offset tape])
+ (case (/.nth offset tape)
+ (#.Some output)
+ (let [output' (/.from_code output)]
+ (if (<modifier> (/.contains? output' options))
+ (#try.Success [[("lux i64 +" 1 offset) tape] output'])
+ (exception.throw <exception> [options output])))
+
+ _
+ (exception.throw ..cannot_parse []))))]
+
+ [one_of |> ..character_should_be ""]
+ [none_of .not ..character_should_not_be " not"]
+ )
+
+(template [<name> <modifier> <exception> <description_modifier>]
+ [(def: #export (<name> options)
+ {#.doc (code.text ($_ /\compose "Only lex characters that are" <description_modifier> " part of a piece of text."))}
+ (-> Text (Parser Slice))
+ (function (_ [offset tape])
+ (case (/.nth offset tape)
+ (#.Some output)
+ (let [output' (/.from_code output)]
+ (if (<modifier> (/.contains? output' options))
+ (#try.Success [[("lux i64 +" 1 offset) tape]
+ {#basis offset
+ #distance 1}])
+ (exception.throw <exception> [options output])))
+
+ _
+ (exception.throw ..cannot_slice []))))]
+
+ [one_of! |> ..character_should_be ""]
+ [none_of! .not ..character_should_not_be " not"]
+ )
+
+(exception: #export (character_does_not_satisfy_predicate {character Char})
+ (exception.report
+ ["Character" (/.format (/.from_code character))]))
+
+(def: #export (satisfies p)
+ {#.doc "Only lex characters that satisfy a predicate."}
+ (-> (-> Char Bit) (Parser Text))
+ (function (_ [offset tape])
+ (case (/.nth offset tape)
+ (#.Some output)
+ (if (p output)
+ (#try.Success [[("lux i64 +" 1 offset) tape] (/.from_code output)])
+ (exception.throw ..character_does_not_satisfy_predicate [output]))
+
+ _
+ (exception.throw ..cannot_parse []))))
+
+(def: #export space
+ {#.doc "Only lex white-space."}
+ (Parser Text)
+ (..satisfies /.space?))
+
+(def: #export (and left right)
+ (-> (Parser Text) (Parser Text) (Parser Text))
+ (do //.monad
+ [=left left
+ =right right]
+ (wrap ($_ /\compose =left =right))))
+
+(def: #export (and! left right)
+ (-> (Parser Slice) (Parser Slice) (Parser Slice))
+ (do //.monad
+ [[left::basis left::distance] left
+ [right::basis right::distance] right]
+ (wrap [left::basis ("lux i64 +" left::distance right::distance)])))
+
+(template [<name> <base> <doc_modifier>]
+ [(def: #export (<name> parser)
+ {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " characters as a single continuous text."))}
+ (-> (Parser Text) (Parser Text))
+ (|> parser <base> (\ //.monad map /.concat)))]
+
+ [some //.some "some"]
+ [many //.many "many"]
+ )
+
+(template [<name> <base> <doc_modifier>]
+ [(def: #export (<name> parser)
+ {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " characters as a single continuous text."))}
+ (-> (Parser Slice) (Parser Slice))
+ (with_slices (<base> parser)))]
+
+ [some! //.some "some"]
+ [many! //.many "many"]
+ )
+
+(template [<name> <base> <doc_modifier>]
+ [(def: #export (<name> amount parser)
+ {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " N characters."))}
+ (-> Nat (Parser Text) (Parser Text))
+ (|> parser (<base> amount) (\ //.monad map /.concat)))]
+
+ [exactly //.exactly "exactly"]
+ [at_most //.at_most "at most"]
+ [at_least //.at_least "at least"]
+ )
+
+(template [<name> <base> <doc_modifier>]
+ [(def: #export (<name> amount parser)
+ {#.doc (code.text ($_ /\compose "Lex " <doc_modifier> " N characters."))}
+ (-> Nat (Parser Slice) (Parser Slice))
+ (with_slices (<base> amount parser)))]
+
+ [exactly! //.exactly "exactly"]
+ [at_most! //.at_most "at most"]
+ [at_least! //.at_least "at least"]
+ )
+
+(def: #export (between from to parser)
+ {#.doc "Lex between N and M characters."}
+ (-> Nat Nat (Parser Text) (Parser Text))
+ (|> parser (//.between from to) (\ //.monad map /.concat)))
+
+(def: #export (between! from to parser)
+ {#.doc "Lex between N and M characters."}
+ (-> Nat Nat (Parser Slice) (Parser Slice))
+ (with_slices (//.between from to parser)))
+
+(def: #export (enclosed [start end] parser)
+ (All [a] (-> [Text Text] (Parser a) (Parser a)))
+ (|> parser
+ (//.before (this end))
+ (//.after (this start))))
+
+(def: #export (local local_input parser)
+ {#.doc "Run a parser with the given input, instead of the real one."}
+ (All [a] (-> Text (Parser a) (Parser a)))
+ (function (_ real_input)
+ (case (..run parser local_input)
+ (#try.Failure error)
+ (#try.Failure error)
+
+ (#try.Success value)
+ (#try.Success [real_input value]))))
+
+(def: #export (slice parser)
+ (-> (Parser Slice) (Parser Text))
+ (do //.monad
+ [[basis distance] parser]
+ (function (_ (^@ input [offset tape]))
+ (case (/.clip basis distance tape)
+ (#.Some output)
+ (#try.Success [input output])
+
+ #.None
+ (exception.throw ..cannot_slice [])))))
+
+(def: #export (embed structured text)
+ (All [s a]
+ (-> (Parser a)
+ (//.Parser s Text)
+ (//.Parser s a)))
+ (do //.monad
+ [raw text]
+ (//.lift (..run structured raw))))