(.module: [lux (#- or and not) [control [monad (#+ do Monad)] ["p" parser]] [data ["." text ("text/." Monoid)] ["." product] ["." maybe] ["e" error] [collection ["." list]]] [macro ["." code]]]) (type: Offset Nat) (def: start-offset Offset 0) (type: #export Lexer (p.Parser [Offset Text])) (def: (remaining offset tape) (-> Offset Text Text) (|> tape (text.split offset) maybe.assume product.right)) (def: cannot-lex-error Text "Cannot lex from empty text.") (def: (unconsumed-input-error offset tape) (-> Offset Text Text) ($_ text/compose "Unconsumed input: " (remaining offset tape))) (def: #export (run input lexer) (All [a] (-> Text (Lexer a) (e.Error a))) (case (lexer [start-offset input]) (#e.Error msg) (#e.Error msg) (#e.Success [[end-offset _] output]) (if (n/= end-offset (text.size input)) (#e.Success output) (#e.Error (unconsumed-input-error end-offset input))) )) (def: #export any {#.doc "Just returns the next character without applying any logic."} (Lexer Text) (function (_ [offset tape]) (case (text.nth offset tape) (#.Some output) (#e.Success [[(inc offset) tape] (text.from-code output)]) _ (#e.Error cannot-lex-error)) )) (def: #export (not p) {#.doc "Produce a character if the lexer fails."} (All [a] (-> (Lexer a) (Lexer Text))) (function (_ input) (case (p input) (#e.Error msg) (any input) _ (#e.Error "Expected to fail; yet succeeded.")))) (def: #export (this reference) {#.doc "Lex a text if it matches the given sample."} (-> Text (Lexer Any)) (function (_ [offset tape]) (case (text.index-of' reference offset tape) (#.Some where) (if (n/= offset where) (#e.Success [[(n/+ (text.size reference) offset) tape] []]) (#e.Error ($_ text/compose "Could not match: " (text.encode reference) " @ " (maybe.assume (text.clip' offset tape))))) _ (#e.Error ($_ text/compose "Could not match: " (text.encode reference)))))) (def: #export (this? reference) {#.doc "Lex a text if it matches the given sample."} (-> Text (Lexer Bit)) (function (_ (^@ input [offset tape])) (case (text.index-of' reference offset tape) (^multi (#.Some where) (n/= offset where)) (#e.Success [[(n/+ (text.size reference) offset) tape] #1]) _ (#e.Success [input #0])))) (def: #export end {#.doc "Ensure the lexer's input is empty."} (Lexer Any) (function (_ (^@ input [offset tape])) (if (n/= offset (text.size tape)) (#e.Success [input []]) (#e.Error (unconsumed-input-error offset tape))))) (def: #export end? {#.doc "Ask if the lexer's input is empty."} (Lexer Bit) (function (_ (^@ input [offset tape])) (#e.Success [input (n/= offset (text.size tape))]))) (def: #export peek {#.doc "Lex the next character (without consuming it from the input)."} (Lexer Text) (function (_ (^@ input [offset tape])) (case (text.nth offset tape) (#.Some output) (#e.Success [input (text.from-code output)]) _ (#e.Error cannot-lex-error)) )) (def: #export get-input {#.doc "Get all of the remaining input (without consuming it)."} (Lexer Text) (function (_ (^@ input [offset tape])) (#e.Success [input (remaining offset tape)]))) (def: #export (range bottom top) {#.doc "Only lex characters within a range."} (-> Nat Nat (Lexer Text)) (do p.Monad [char any #let [char' (maybe.assume (text.nth 0 char))] _ (p.assert ($_ text/compose "Character is not within range: " (text.from-code bottom) "-" (text.from-code top)) (.and (n/>= bottom char') (n/<= top char')))] (wrap char))) (do-template [ ] [(def: #export {#.doc (code.text ($_ text/compose "Only lex " " characters."))} (Lexer Text) (range (char ) (char )))] [upper "A" "Z" "uppercase"] [lower "a" "z" "lowercase"] [decimal "0" "9" "decimal"] [octal "0" "7" "octal"] ) (def: #export alpha {#.doc "Only lex alphabetic characters."} (Lexer Text) (p.either lower upper)) (def: #export alpha-num {#.doc "Only lex alphanumeric characters."} (Lexer Text) (p.either alpha decimal)) (def: #export hexadecimal {#.doc "Only lex hexadecimal digits."} (Lexer Text) ($_ p.either decimal (range (char "a") (char "f")) (range (char "A") (char "F")))) (def: #export (one-of options) {#.doc "Only lex characters that are part of a piece of text."} (-> Text (Lexer Text)) (function (_ [offset tape]) (case (text.nth offset tape) (#.Some output) (let [output (text.from-code output)] (if (text.contains? output options) (#e.Success [[(inc offset) tape] output]) (#e.Error ($_ text/compose "Character (" output ") is not one of: " options)))) _ (#e.Error cannot-lex-error)))) (def: #export (none-of options) {#.doc "Only lex characters that are not part of a piece of text."} (-> Text (Lexer Text)) (function (_ [offset tape]) (case (text.nth offset tape) (#.Some output) (let [output (text.from-code output)] (if (.not (text.contains? output options)) (#e.Success [[(inc offset) tape] output]) (#e.Error ($_ text/compose "Character (" output ") is one of: " options)))) _ (#e.Error cannot-lex-error)))) (def: #export (satisfies p) {#.doc "Only lex characters that satisfy a predicate."} (-> (-> Nat Bit) (Lexer Text)) (function (_ [offset tape]) (case (text.nth offset tape) (#.Some output) (if (p output) (#e.Success [[(inc offset) tape] (text.from-code output)]) (#e.Error ($_ text/compose "Character does not satisfy predicate: " (text.from-code output)))) _ (#e.Error cannot-lex-error)))) (def: #export space {#.doc "Only lex white-space."} (Lexer Text) (satisfies text.space?)) (def: #export (and left right) (-> (Lexer Text) (Lexer Text) (Lexer Text)) (do p.Monad [=left left =right right] (wrap ($_ text/compose =left =right)))) (do-template [ ] [(def: #export ( p) {#.doc } (-> (Lexer Text) (Lexer Text)) (|> p (:: p.Monad map text.concat)))] [some p.some "Lex some characters as a single continuous text."] [many p.many "Lex many characters as a single continuous text."] ) (do-template [ ] [(def: #export ( n p) {#.doc } (-> Nat (Lexer Text) (Lexer Text)) (do p.Monad [] (|> p ( n) (:: @ map text.concat))))] [exactly p.exactly "Lex exactly N characters."] [at-most p.at-most "Lex at most N characters."] [at-least p.at-least "Lex at least N characters."] ) (def: #export (between from to p) {#.doc "Lex between N and M characters."} (-> Nat Nat (Lexer Text) (Lexer Text)) (|> p (p.between from to) (:: p.Monad map text.concat))) (def: #export (enclosed [start end] lexer) (All [a] (-> [Text Text] (Lexer a) (Lexer a))) (|> lexer (p.before (this end)) (p.after (this start)))) (def: #export (local local-input lexer) {#.doc "Run a lexer with the given input, instead of the real one."} (All [a] (-> Text (Lexer a) (Lexer a))) (function (_ real-input) (case (run local-input lexer) (#e.Error error) (#e.Error error) (#e.Success value) (#e.Success [real-input value]))))