From 692f9751f36fbfc4a5f1148c7b1fadc03495fa6b Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Fri, 19 Apr 2019 01:13:00 -0400 Subject: Moved the text lexers under "lux/control/parser/". --- stdlib/source/lux/control/parser/text.lux | 359 +++++++++++++++++++++ stdlib/source/lux/data/format/json.lux | 6 +- stdlib/source/lux/data/format/xml.lux | 6 +- stdlib/source/lux/data/text/lexer.lux | 358 -------------------- stdlib/source/lux/data/text/regex.lux | 4 +- stdlib/source/lux/macro/poly/json.lux | 4 +- stdlib/source/lux/math/modular.lux | 6 +- stdlib/source/lux/time/date.lux | 6 +- stdlib/source/lux/time/duration.lux | 6 +- stdlib/source/lux/time/instant.lux | 6 +- stdlib/source/lux/tool/compiler/default/syntax.lux | 5 +- stdlib/source/lux/world/net/http/cookie.lux | 6 +- stdlib/source/lux/world/net/http/query.lux | 6 +- stdlib/source/test/lux/control.lux | 9 + stdlib/source/test/lux/control/parser/text.lux | 171 ++++++++++ stdlib/source/test/lux/data.lux | 2 - stdlib/source/test/lux/data/text/lexer.lux | 171 ---------- stdlib/source/test/lux/data/text/regex.lux | 14 +- .../test/lux/tool/compiler/default/syntax.lux | 6 +- 19 files changed, 581 insertions(+), 570 deletions(-) create mode 100644 stdlib/source/lux/control/parser/text.lux delete mode 100644 stdlib/source/lux/data/text/lexer.lux create mode 100644 stdlib/source/test/lux/control/parser/text.lux delete mode 100644 stdlib/source/test/lux/data/text/lexer.lux (limited to 'stdlib') diff --git a/stdlib/source/lux/control/parser/text.lux b/stdlib/source/lux/control/parser/text.lux new file mode 100644 index 000000000..22f49a572 --- /dev/null +++ b/stdlib/source/lux/control/parser/text.lux @@ -0,0 +1,359 @@ +(.module: + [lux (#- or and not) + [abstract + [monad (#+ Monad do)]] + [control + ["." exception (#+ exception:)]] + [data + ["." product] + ["." maybe] + ["." error (#+ Error)] + ["/" text ("#@." monoid)] + [number + ["." nat ("#@." decimal)]] + [collection + ["." list ("#@." fold)]]] + [macro + ["." code]]] + ["." //]) + +(type: #export Offset Nat) + +(def: start-offset Offset 0) + +(type: #export Lexer + (//.Parser [Offset Text])) + +(type: #export Slice + {#basis Offset + #distance Offset}) + +(def: cannot-lex-error Text "Cannot lex from empty text.") + +(def: (remaining offset tape) + (-> Offset Text Text) + (|> tape (/.split offset) maybe.assume product.right)) + +(exception: #export (unconsumed-input {offset Offset} {tape Text}) + (exception.report + ["Offset" (nat@encode offset)] + ["Input size" (nat@encode (/.size tape))] + ["Remaining input" (remaining offset tape)])) + +(def: #export (run input lexer) + (All [a] (-> Text (Lexer a) (Error a))) + (case (lexer [start-offset input]) + (#error.Failure msg) + (#error.Failure msg) + + (#error.Success [[end-offset _] output]) + (if (n/= end-offset (/.size input)) + (#error.Success output) + (exception.throw unconsumed-input [end-offset input])))) + +(def: #export offset + (Lexer Offset) + (function (_ (^@ input [offset tape])) + (#error.Success [input offset]))) + +(def: (with-slices lexer) + (-> (Lexer (List Slice)) (Lexer Slice)) + (do //.monad + [offset ..offset + slices lexer] + (wrap (list@fold (function (_ [slice::basis slice::distance] + [total::basis total::distance]) + [total::basis ("lux i64 +" slice::distance total::distance)]) + {#basis offset + #distance 0} + slices)))) + +(def: #export any + {#.doc "Just returns the next character without applying any logic."} + (Lexer Text) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (#error.Success [[("lux i64 +" 1 offset) tape] (/.from-code output)]) + + _ + (#error.Failure cannot-lex-error)))) + +(def: #export any! + {#.doc "Just returns the next character without applying any logic."} + (Lexer Slice) + (function (_ [offset tape]) + (#error.Success [[("lux i64 +" 1 offset) tape] + {#basis offset + #distance 1}]))) + +(template [ ] + [(def: #export ( p) + {#.doc "Produce a character if the lexer fails."} + (All [a] (-> (Lexer a) (Lexer ))) + (function (_ input) + (case (p input) + (#error.Failure msg) + ( input) + + _ + (#error.Failure "Expected to fail@ yet succeeded."))))] + + [not Text ..any] + [not! Slice ..any!] + ) + +(def: #export (this reference) + {#.doc "Lex a text if it matches the given sample."} + (-> Text (Lexer Any)) + (function (_ [offset tape]) + (case (/.index-of' reference offset tape) + (#.Some where) + (if (n/= offset where) + (#error.Success [[("lux i64 +" (/.size reference) offset) tape] + []]) + (#error.Failure ($_ /@compose "Could not match: " (/.encode reference) " @ " (maybe.assume (/.clip' offset tape))))) + + _ + (#error.Failure ($_ /@compose "Could not match: " (/.encode reference)))))) + +(def: #export (this? reference) + {#.doc "Lex a text if it matches the given sample."} + (-> Text (Lexer Bit)) + (function (_ (^@ input [offset tape])) + (case (/.index-of' reference offset tape) + (^multi (#.Some where) (n/= offset where)) + (#error.Success [[("lux i64 +" (/.size reference) offset) tape] + #1]) + + _ + (#error.Success [input #0])))) + +(def: #export end + {#.doc "Ensure the lexer's input is empty."} + (Lexer Any) + (function (_ (^@ input [offset tape])) + (if (n/= offset (/.size tape)) + (#error.Success [input []]) + (exception.throw unconsumed-input [offset tape])))) + +(def: #export end? + {#.doc "Ask if the lexer's input is empty."} + (Lexer Bit) + (function (_ (^@ input [offset tape])) + (#error.Success [input (n/= offset (/.size tape))]))) + +(def: #export peek + {#.doc "Lex the next character (without consuming it from the input)."} + (Lexer Text) + (function (_ (^@ input [offset tape])) + (case (/.nth offset tape) + (#.Some output) + (#error.Success [input (/.from-code output)]) + + _ + (#error.Failure cannot-lex-error)))) + +(def: #export get-input + {#.doc "Get all of the remaining input (without consuming it)."} + (Lexer Text) + (function (_ (^@ input [offset tape])) + (#error.Success [input (remaining offset tape)]))) + +(def: #export (range bottom top) + {#.doc "Only lex characters within a range."} + (-> Nat Nat (Lexer Text)) + (do //.monad + [char any + #let [char' (maybe.assume (/.nth 0 char))] + _ (//.assert ($_ /@compose "Character is not within range: " (/.from-code bottom) "-" (/.from-code top)) + (.and (n/>= bottom char') + (n/<= top char')))] + (wrap char))) + +(template [ ] + [(def: #export + {#.doc (code.text ($_ /@compose "Only lex " " characters."))} + (Lexer Text) + (range (char ) (char )))] + + [upper "A" "Z" "uppercase"] + [lower "a" "z" "lowercase"] + [decimal "0" "9" "decimal"] + [octal "0" "7" "octal"] + ) + +(def: #export alpha + {#.doc "Only lex alphabetic characters."} + (Lexer Text) + (//.either lower upper)) + +(def: #export alpha-num + {#.doc "Only lex alphanumeric characters."} + (Lexer Text) + (//.either alpha decimal)) + +(def: #export hexadecimal + {#.doc "Only lex hexadecimal digits."} + (Lexer Text) + ($_ //.either + decimal + (range (char "a") (char "f")) + (range (char "A") (char "F")))) + +(template [ ] + [(def: #export ( options) + {#.doc (code.text ($_ /@compose "Only lex characters that are" " part of a piece of text."))} + (-> Text (Lexer Text)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (let [output (/.from-code output)] + (if ( (/.contains? output options)) + (#error.Success [[("lux i64 +" 1 offset) tape] output]) + (#error.Failure ($_ /@compose "Character (" output + ") is should " + "be one of: " options)))) + + _ + (#error.Failure cannot-lex-error))))] + + [one-of "" |>] + [none-of " not" .not] + ) + +(template [ ] + [(def: #export ( options) + {#.doc (code.text ($_ /@compose "Only lex characters that are" " part of a piece of text."))} + (-> Text (Lexer Slice)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (let [output (/.from-code output)] + (if ( (/.contains? output options)) + (#error.Success [[("lux i64 +" 1 offset) tape] + {#basis offset + #distance 1}]) + (#error.Failure ($_ /@compose "Character (" output + ") is should " + "be one of: " options)))) + + _ + (#error.Failure cannot-lex-error))))] + + [one-of! "" |>] + [none-of! " not" .not] + ) + +(def: #export (satisfies p) + {#.doc "Only lex characters that satisfy a predicate."} + (-> (-> Nat Bit) (Lexer Text)) + (function (_ [offset tape]) + (case (/.nth offset tape) + (#.Some output) + (if (p output) + (#error.Success [[("lux i64 +" 1 offset) tape] (/.from-code output)]) + (#error.Failure ($_ /@compose "Character does not satisfy predicate: " (/.from-code output)))) + + _ + (#error.Failure cannot-lex-error)))) + +(def: #export space + {#.doc "Only lex white-space."} + (Lexer Text) + (satisfies /.space?)) + +(def: #export (and left right) + (-> (Lexer Text) (Lexer Text) (Lexer Text)) + (do //.monad + [=left left + =right right] + (wrap ($_ /@compose =left =right)))) + +(def: #export (and! left right) + (-> (Lexer Slice) (Lexer Slice) (Lexer Slice)) + (do //.monad + [[left::basis left::distance] left + [right::basis right::distance] right] + (wrap [left::basis ("lux i64 +" left::distance right::distance)]))) + +(template [ ] + [(def: #export ( lexer) + {#.doc (code.text ($_ /@compose "Lex " " characters as a single continuous text."))} + (-> (Lexer Text) (Lexer Text)) + (|> lexer (:: //.monad map /.concat)))] + + [some //.some "some"] + [many //.many "many"] + ) + +(template [ ] + [(def: #export ( lexer) + {#.doc (code.text ($_ /@compose "Lex " " characters as a single continuous text."))} + (-> (Lexer Slice) (Lexer Slice)) + (with-slices ( lexer)))] + + [some! //.some "some"] + [many! //.many "many"] + ) + +(template [ ] + [(def: #export ( amount lexer) + {#.doc (code.text ($_ /@compose "Lex " " N characters."))} + (-> Nat (Lexer Text) (Lexer Text)) + (|> lexer ( amount) (:: //.monad map /.concat)))] + + [exactly //.exactly "exactly"] + [at-most //.at-most "at most"] + [at-least //.at-least "at least"] + ) + +(template [ ] + [(def: #export ( amount lexer) + {#.doc (code.text ($_ /@compose "Lex " " N characters."))} + (-> Nat (Lexer Slice) (Lexer Slice)) + (with-slices ( amount lexer)))] + + [exactly! //.exactly "exactly"] + [at-most! //.at-most "at most"] + [at-least! //.at-least "at least"] + ) + +(def: #export (between from to lexer) + {#.doc "Lex between N and M characters."} + (-> Nat Nat (Lexer Text) (Lexer Text)) + (|> lexer (//.between from to) (:: //.monad map /.concat))) + +(def: #export (between! from to lexer) + {#.doc "Lex between N and M characters."} + (-> Nat Nat (Lexer Slice) (Lexer Slice)) + (with-slices (//.between from to lexer))) + +(def: #export (enclosed [start end] lexer) + (All [a] (-> [Text Text] (Lexer a) (Lexer a))) + (|> lexer + (//.before (this end)) + (//.after (this start)))) + +(def: #export (local local-input lexer) + {#.doc "Run a lexer with the given input, instead of the real one."} + (All [a] (-> Text (Lexer a) (Lexer a))) + (function (_ real-input) + (case (run local-input lexer) + (#error.Failure error) + (#error.Failure error) + + (#error.Success value) + (#error.Success [real-input value])))) + +(def: #export (slice lexer) + (-> (Lexer Slice) (Lexer Text)) + (do //.monad + [[basis distance] lexer] + (function (_ (^@ input [offset tape])) + (case (/.clip basis ("lux i64 +" basis distance) tape) + (#.Some output) + (#error.Success [input output]) + + #.None + (#error.Failure "Cannot slice."))))) diff --git a/stdlib/source/lux/data/format/json.lux b/stdlib/source/lux/data/format/json.lux index 34fcf4dfb..beb5eca8d 100644 --- a/stdlib/source/lux/data/format/json.lux +++ b/stdlib/source/lux/data/format/json.lux @@ -7,7 +7,8 @@ codec] [control pipe - ["p" parser (#+ Parser) ("#@." monad)] + ["p" parser (#+ Parser) ("#@." monad) + ["l" text]] ["ex" exception (#+ exception:)]] [data ["." bit] @@ -17,8 +18,7 @@ ["." product] [number ["." frac ("#@." decimal)]] - ["." text ("#@." equivalence monoid) - ["l" lexer]] + ["." text ("#@." equivalence monoid)] [collection ["." list ("#@." fold monad)] ["." row (#+ Row row) ("#@." monad)] diff --git a/stdlib/source/lux/data/format/xml.lux b/stdlib/source/lux/data/format/xml.lux index 6cd469e04..29a77c0df 100644 --- a/stdlib/source/lux/data/format/xml.lux +++ b/stdlib/source/lux/data/format/xml.lux @@ -5,7 +5,8 @@ [equivalence (#+ Equivalence)] codec] [control - ["p" parser ("#;." monad)] + ["p" parser ("#;." monad) + ["l" text]] ["ex" exception (#+ exception:)]] [data ["." error (#+ Error)] @@ -13,8 +14,7 @@ ["." name ("#;." equivalence codec)] [number ["." int]] - ["." text ("#;." equivalence monoid) - ["l" lexer]] + ["." text ("#;." equivalence monoid)] [collection ["." list ("#;." monad)] ["d" dictionary]]]]) diff --git a/stdlib/source/lux/data/text/lexer.lux b/stdlib/source/lux/data/text/lexer.lux deleted file mode 100644 index 958011b1c..000000000 --- a/stdlib/source/lux/data/text/lexer.lux +++ /dev/null @@ -1,358 +0,0 @@ -(.module: - [lux (#- or and not) - [abstract - [monad (#+ Monad do)]] - [control - ["p" parser] - ["ex" exception (#+ exception:)]] - [data - ["." product] - ["." maybe] - ["." error (#+ Error)] - [number - ["." nat ("#@." decimal)]] - [collection - ["." list ("#@." fold)]]] - [macro - ["." code]]] - ["." // ("#@." monoid)]) - -(type: #export Offset Nat) - -(def: start-offset Offset 0) - -(type: #export Lexer - (p.Parser [Offset Text])) - -(type: #export Slice - {#basis Offset - #distance Offset}) - -(def: cannot-lex-error Text "Cannot lex from empty text.") - -(def: (remaining offset tape) - (-> Offset Text Text) - (|> tape (//.split offset) maybe.assume product.right)) - -(exception: #export (unconsumed-input {offset Offset} {tape Text}) - (ex.report ["Offset" (nat@encode offset)] - ["Input size" (nat@encode (//.size tape))] - ["Remaining input" (remaining offset tape)])) - -(def: #export (run input lexer) - (All [a] (-> Text (Lexer a) (Error a))) - (case (lexer [start-offset input]) - (#error.Failure msg) - (#error.Failure msg) - - (#error.Success [[end-offset _] output]) - (if (n/= end-offset (//.size input)) - (#error.Success output) - (ex.throw unconsumed-input [end-offset input])))) - -(def: #export offset - (Lexer Offset) - (function (_ (^@ input [offset tape])) - (#error.Success [input offset]))) - -(def: (with-slices lexer) - (-> (Lexer (List Slice)) (Lexer Slice)) - (do p.monad - [offset ..offset - slices lexer] - (wrap (list@fold (function (_ [slice::basis slice::distance] - [total::basis total::distance]) - [total::basis ("lux i64 +" slice::distance total::distance)]) - {#basis offset - #distance 0} - slices)))) - -(def: #export any - {#.doc "Just returns the next character without applying any logic."} - (Lexer Text) - (function (_ [offset tape]) - (case (//.nth offset tape) - (#.Some output) - (#error.Success [[("lux i64 +" 1 offset) tape] (//.from-code output)]) - - _ - (#error.Failure cannot-lex-error)))) - -(def: #export any! - {#.doc "Just returns the next character without applying any logic."} - (Lexer Slice) - (function (_ [offset tape]) - (#error.Success [[("lux i64 +" 1 offset) tape] - {#basis offset - #distance 1}]))) - -(template [ ] - [(def: #export ( p) - {#.doc "Produce a character if the lexer fails."} - (All [a] (-> (Lexer a) (Lexer ))) - (function (_ input) - (case (p input) - (#error.Failure msg) - ( input) - - _ - (#error.Failure "Expected to fail@ yet succeeded."))))] - - [not Text ..any] - [not! Slice ..any!] - ) - -(def: #export (this reference) - {#.doc "Lex a text if it matches the given sample."} - (-> Text (Lexer Any)) - (function (_ [offset tape]) - (case (//.index-of' reference offset tape) - (#.Some where) - (if (n/= offset where) - (#error.Success [[("lux i64 +" (//.size reference) offset) tape] - []]) - (#error.Failure ($_ //@compose "Could not match: " (//.encode reference) " @ " (maybe.assume (//.clip' offset tape))))) - - _ - (#error.Failure ($_ //@compose "Could not match: " (//.encode reference)))))) - -(def: #export (this? reference) - {#.doc "Lex a text if it matches the given sample."} - (-> Text (Lexer Bit)) - (function (_ (^@ input [offset tape])) - (case (//.index-of' reference offset tape) - (^multi (#.Some where) (n/= offset where)) - (#error.Success [[("lux i64 +" (//.size reference) offset) tape] - #1]) - - _ - (#error.Success [input #0])))) - -(def: #export end - {#.doc "Ensure the lexer's input is empty."} - (Lexer Any) - (function (_ (^@ input [offset tape])) - (if (n/= offset (//.size tape)) - (#error.Success [input []]) - (ex.throw unconsumed-input [offset tape])))) - -(def: #export end? - {#.doc "Ask if the lexer's input is empty."} - (Lexer Bit) - (function (_ (^@ input [offset tape])) - (#error.Success [input (n/= offset (//.size tape))]))) - -(def: #export peek - {#.doc "Lex the next character (without consuming it from the input)."} - (Lexer Text) - (function (_ (^@ input [offset tape])) - (case (//.nth offset tape) - (#.Some output) - (#error.Success [input (//.from-code output)]) - - _ - (#error.Failure cannot-lex-error)))) - -(def: #export get-input - {#.doc "Get all of the remaining input (without consuming it)."} - (Lexer Text) - (function (_ (^@ input [offset tape])) - (#error.Success [input (remaining offset tape)]))) - -(def: #export (range bottom top) - {#.doc "Only lex characters within a range."} - (-> Nat Nat (Lexer Text)) - (do p.monad - [char any - #let [char' (maybe.assume (//.nth 0 char))] - _ (p.assert ($_ //@compose "Character is not within range: " (//.from-code bottom) "-" (//.from-code top)) - (.and (n/>= bottom char') - (n/<= top char')))] - (wrap char))) - -(template [ ] - [(def: #export - {#.doc (code.text ($_ //@compose "Only lex " " characters."))} - (Lexer Text) - (range (char ) (char )))] - - [upper "A" "Z" "uppercase"] - [lower "a" "z" "lowercase"] - [decimal "0" "9" "decimal"] - [octal "0" "7" "octal"] - ) - -(def: #export alpha - {#.doc "Only lex alphabetic characters."} - (Lexer Text) - (p.either lower upper)) - -(def: #export alpha-num - {#.doc "Only lex alphanumeric characters."} - (Lexer Text) - (p.either alpha decimal)) - -(def: #export hexadecimal - {#.doc "Only lex hexadecimal digits."} - (Lexer Text) - ($_ p.either - decimal - (range (char "a") (char "f")) - (range (char "A") (char "F")))) - -(template [ ] - [(def: #export ( options) - {#.doc (code.text ($_ //@compose "Only lex characters that are" " part of a piece of text."))} - (-> Text (Lexer Text)) - (function (_ [offset tape]) - (case (//.nth offset tape) - (#.Some output) - (let [output (//.from-code output)] - (if ( (//.contains? output options)) - (#error.Success [[("lux i64 +" 1 offset) tape] output]) - (#error.Failure ($_ //@compose "Character (" output - ") is should " - "be one of: " options)))) - - _ - (#error.Failure cannot-lex-error))))] - - [one-of "" |>] - [none-of " not" .not] - ) - -(template [ ] - [(def: #export ( options) - {#.doc (code.text ($_ //@compose "Only lex characters that are" " part of a piece of text."))} - (-> Text (Lexer Slice)) - (function (_ [offset tape]) - (case (//.nth offset tape) - (#.Some output) - (let [output (//.from-code output)] - (if ( (//.contains? output options)) - (#error.Success [[("lux i64 +" 1 offset) tape] - {#basis offset - #distance 1}]) - (#error.Failure ($_ //@compose "Character (" output - ") is should " - "be one of: " options)))) - - _ - (#error.Failure cannot-lex-error))))] - - [one-of! "" |>] - [none-of! " not" .not] - ) - -(def: #export (satisfies p) - {#.doc "Only lex characters that satisfy a predicate."} - (-> (-> Nat Bit) (Lexer Text)) - (function (_ [offset tape]) - (case (//.nth offset tape) - (#.Some output) - (if (p output) - (#error.Success [[("lux i64 +" 1 offset) tape] (//.from-code output)]) - (#error.Failure ($_ //@compose "Character does not satisfy predicate: " (//.from-code output)))) - - _ - (#error.Failure cannot-lex-error)))) - -(def: #export space - {#.doc "Only lex white-space."} - (Lexer Text) - (satisfies //.space?)) - -(def: #export (and left right) - (-> (Lexer Text) (Lexer Text) (Lexer Text)) - (do p.monad - [=left left - =right right] - (wrap ($_ //@compose =left =right)))) - -(def: #export (and! left right) - (-> (Lexer Slice) (Lexer Slice) (Lexer Slice)) - (do p.monad - [[left::basis left::distance] left - [right::basis right::distance] right] - (wrap [left::basis ("lux i64 +" left::distance right::distance)]))) - -(template [ ] - [(def: #export ( lexer) - {#.doc (code.text ($_ //@compose "Lex " " characters as a single continuous text."))} - (-> (Lexer Text) (Lexer Text)) - (|> lexer (:: p.monad map //.concat)))] - - [some p.some "some"] - [many p.many "many"] - ) - -(template [ ] - [(def: #export ( lexer) - {#.doc (code.text ($_ //@compose "Lex " " characters as a single continuous text."))} - (-> (Lexer Slice) (Lexer Slice)) - (with-slices ( lexer)))] - - [some! p.some "some"] - [many! p.many "many"] - ) - -(template [ ] - [(def: #export ( amount lexer) - {#.doc (code.text ($_ //@compose "Lex " " N characters."))} - (-> Nat (Lexer Text) (Lexer Text)) - (|> lexer ( amount) (:: p.monad map //.concat)))] - - [exactly p.exactly "exactly"] - [at-most p.at-most "at most"] - [at-least p.at-least "at least"] - ) - -(template [ ] - [(def: #export ( amount lexer) - {#.doc (code.text ($_ //@compose "Lex " " N characters."))} - (-> Nat (Lexer Slice) (Lexer Slice)) - (with-slices ( amount lexer)))] - - [exactly! p.exactly "exactly"] - [at-most! p.at-most "at most"] - [at-least! p.at-least "at least"] - ) - -(def: #export (between from to lexer) - {#.doc "Lex between N and M characters."} - (-> Nat Nat (Lexer Text) (Lexer Text)) - (|> lexer (p.between from to) (:: p.monad map //.concat))) - -(def: #export (between! from to lexer) - {#.doc "Lex between N and M characters."} - (-> Nat Nat (Lexer Slice) (Lexer Slice)) - (with-slices (p.between from to lexer))) - -(def: #export (enclosed [start end] lexer) - (All [a] (-> [Text Text] (Lexer a) (Lexer a))) - (|> lexer - (p.before (this end)) - (p.after (this start)))) - -(def: #export (local local-input lexer) - {#.doc "Run a lexer with the given input, instead of the real one."} - (All [a] (-> Text (Lexer a) (Lexer a))) - (function (_ real-input) - (case (run local-input lexer) - (#error.Failure error) - (#error.Failure error) - - (#error.Success value) - (#error.Success [real-input value])))) - -(def: #export (slice lexer) - (-> (Lexer Slice) (Lexer Text)) - (do p.monad - [[basis distance] lexer] - (function (_ (^@ input [offset tape])) - (case (//.clip basis ("lux i64 +" basis distance) tape) - (#.Some output) - (#error.Success [input output]) - - #.None - (#error.Failure "Cannot slice."))))) diff --git a/stdlib/source/lux/data/text/regex.lux b/stdlib/source/lux/data/text/regex.lux index 73b6dab3b..ad648677d 100644 --- a/stdlib/source/lux/data/text/regex.lux +++ b/stdlib/source/lux/data/text/regex.lux @@ -3,7 +3,8 @@ [abstract monad] [control - ["p" parser ("#@." monad)]] + ["p" parser ("#@." monad) + ["l" text]]] [data ["." product] ["." error] @@ -16,7 +17,6 @@ ["." code] ["s" syntax (#+ syntax:)]]] ["." // - ["l" lexer] format]) (def: regex-char^ diff --git a/stdlib/source/lux/macro/poly/json.lux b/stdlib/source/lux/macro/poly/json.lux index e0e122eb1..1253ec328 100644 --- a/stdlib/source/lux/macro/poly/json.lux +++ b/stdlib/source/lux/macro/poly/json.lux @@ -6,7 +6,8 @@ ["." codec]] [control ["p" parser - ["<.>" type]]] + ["<.>" type] + ["l" text]]] [data ["." bit] maybe @@ -18,7 +19,6 @@ ["." nat ("#@." decimal)] ["." frac ("#@." decimal)]] ["." text ("#@." equivalence) - ["l" lexer] format] [format ["/" json (#+ JSON)]] diff --git a/stdlib/source/lux/math/modular.lux b/stdlib/source/lux/math/modular.lux index 24c7b0e72..07fa14c3d 100644 --- a/stdlib/source/lux/math/modular.lux +++ b/stdlib/source/lux/math/modular.lux @@ -5,13 +5,13 @@ [monad (#+ do)]] [control ["ex" exception (#+ exception:)] - ["p" parser]] + ["p" parser + ["l" text (#+ Lexer)]]] [data ["." error (#+ Error)] [number ["." int ("#@." decimal)]] - ["." text ("#@." monoid) - ["l" lexer (#+ Lexer)]]] + ["." text ("#@." monoid)]] [type abstract] [macro diff --git a/stdlib/source/lux/time/date.lux b/stdlib/source/lux/time/date.lux index fa7a40676..383988c14 100644 --- a/stdlib/source/lux/time/date.lux +++ b/stdlib/source/lux/time/date.lux @@ -7,15 +7,15 @@ codec [monad (#+ do)]] [control - ["p" parser ("#@." functor)]] + ["p" parser ("#@." functor) + ["l" text]]] [data ["." error (#+ Error)] ["." maybe] [number ["." nat ("#@." decimal)] ["." int ("#@." decimal)]] - ["." text ("#@." monoid) - ["l" lexer]] + ["." text ("#@." monoid)] [collection ["." row (#+ Row row)]]]] ["." // #_ diff --git a/stdlib/source/lux/time/duration.lux b/stdlib/source/lux/time/duration.lux index d420cfa0d..0bbdd94ea 100644 --- a/stdlib/source/lux/time/duration.lux +++ b/stdlib/source/lux/time/duration.lux @@ -7,13 +7,13 @@ [monoid (#+ Monoid)] [monad (#+ do)]] [control - ["p" parser]] + ["p" parser + ["l" text]]] [data [number ["." nat ("#@." decimal)] ["." int ("#@." decimal number)]] - ["." text ("#@." monoid) - ["l" lexer]] + ["." text ("#@." monoid)] ["e" error]] [type abstract]]) diff --git a/stdlib/source/lux/time/instant.lux b/stdlib/source/lux/time/instant.lux index 51087e4fc..fcde19a3f 100644 --- a/stdlib/source/lux/time/instant.lux +++ b/stdlib/source/lux/time/instant.lux @@ -8,14 +8,14 @@ [monad (#+ do Monad)]] [control [io (#+ IO io)] - ["p" parser]] + ["p" parser + ["l" text]]] [data ["." error (#+ Error)] ["." maybe] [number ["." int ("#@." decimal)]] - ["." text ("#@." monoid) - ["l" lexer]] + ["." text ("#@." monoid)] [collection ["." list ("#@." fold)] ["." row (#+ Row row) ("#@." functor fold)]]] diff --git a/stdlib/source/lux/tool/compiler/default/syntax.lux b/stdlib/source/lux/tool/compiler/default/syntax.lux index 4a3ee89d2..788a492cd 100644 --- a/stdlib/source/lux/tool/compiler/default/syntax.lux +++ b/stdlib/source/lux/tool/compiler/default/syntax.lux @@ -29,7 +29,9 @@ [abstract monad] [control - ["." exception (#+ exception:)]] + ["." exception (#+ exception:)] + [parser + [text (#+ Offset)]]] [data ["." maybe] ["." error (#+ Error)] @@ -39,7 +41,6 @@ ["." rev] ["." frac]] ["." text - [lexer (#+ Offset)] format] [collection ["." list] diff --git a/stdlib/source/lux/world/net/http/cookie.lux b/stdlib/source/lux/world/net/http/cookie.lux index 4806b9714..174231251 100644 --- a/stdlib/source/lux/world/net/http/cookie.lux +++ b/stdlib/source/lux/world/net/http/cookie.lux @@ -2,12 +2,12 @@ [lux #* [control [monad (#+ do)] - ["p" parser ("#;." monad)]] + ["p" parser ("#;." monad) + ["l" text (#+ Lexer)]]] [data ["." error (#+ Error)] [text - format - ["l" lexer (#+ Lexer)]] + format] [format ["." context (#+ Context)]] [collection diff --git a/stdlib/source/lux/world/net/http/query.lux b/stdlib/source/lux/world/net/http/query.lux index 716910c4a..8b0379108 100644 --- a/stdlib/source/lux/world/net/http/query.lux +++ b/stdlib/source/lux/world/net/http/query.lux @@ -3,14 +3,14 @@ [control pipe [monad (#+ do)] - ["p" parser]] + ["p" parser + ["l" text (#+ Lexer)]]] [data ["." error (#+ Error)] [number ["." nat]] ["." text - format - ["l" lexer (#+ Lexer)]] + format] [format ["." context (#+ Context)]] [collection diff --git a/stdlib/source/test/lux/control.lux b/stdlib/source/test/lux/control.lux index 9d95dc969..bacb4cb24 100644 --- a/stdlib/source/test/lux/control.lux +++ b/stdlib/source/test/lux/control.lux @@ -19,6 +19,8 @@ ["#." frp] ["#." actor] ["#." stm]] + ["#." parser #_ + ["#/." text]] [security ["#." privacy] ["#." integrity]] @@ -34,6 +36,12 @@ /stm.test )) +(def: parser + Test + ($_ _.and + /parser/text.test + )) + (def: security Test ($_ _.and @@ -57,5 +65,6 @@ /thread.test /writer.test ..concurrency + ..parser ..security )) diff --git a/stdlib/source/test/lux/control/parser/text.lux b/stdlib/source/test/lux/control/parser/text.lux new file mode 100644 index 000000000..3693b0fd0 --- /dev/null +++ b/stdlib/source/test/lux/control/parser/text.lux @@ -0,0 +1,171 @@ +(.module: + [lux #* + data/text/format + ["_" test (#+ Test)] + [abstract/monad (#+ do)] + [control + pipe + ["p" parser]] + [data + ["." error (#+ Error)] + ["." text ("#@." equivalence)] + [collection + ["." list]]] + [math + ["r" random]]] + {1 + ["." /]}) + +(def: (should-fail input) + (All [a] (-> (Error a) Bit)) + (case input + (#error.Failure _) + true + + _ + false)) + +(def: (should-pass reference sample) + (-> Text (Error Text) Bit) + (|> sample + (:: error.functor map (text@= reference)) + (error.default false))) + +(def: #export test + Test + (<| (_.context (%name (name-of /.Lexer))) + ($_ _.and + (_.test "Can detect the end of the input." + (|> (/.run "" + /.end) + (case> (#.Right _) true _ false))) + (do r.monad + [size (|> r.nat (:: @ map (|>> (n/% 100) (n/max 10)))) + sample (r.unicode size) + non-sample (|> (r.unicode size) + (r.filter (|>> (text@= sample) not)))] + ($_ _.and + (_.test "Won't mistake non-empty text for no more input." + (|> (/.run sample + /.end) + (case> (#.Left _) true _ false))) + (_.test "Can find literal text fragments." + (and (|> (/.run sample + (/.this sample)) + (case> (#.Right []) true _ false)) + (|> (/.run non-sample + (/.this sample)) + (case> (#.Left _) true _ false)))) + )) + ($_ _.and + (_.test "Can lex anything" + (and (should-pass "A" (/.run "A" + /.any)) + (should-fail (/.run "" + /.any)))) + + (_.test "Can lex characters ranges." + (and (should-pass "Y" (/.run "Y" + (/.range (char "X") (char "Z")))) + (should-fail (/.run "M" + (/.range (char "X") (char "Z")))))) + + (_.test "Can lex upper-case and lower-case letters." + (and (should-pass "Y" (/.run "Y" + /.upper)) + (should-fail (/.run "m" + /.upper)) + + (should-pass "y" (/.run "y" + /.lower)) + (should-fail (/.run "M" + /.lower)))) + + (_.test "Can lex numbers." + (and (should-pass "1" (/.run "1" + /.decimal)) + (should-fail (/.run " " + /.decimal)) + + (should-pass "7" (/.run "7" + /.octal)) + (should-fail (/.run "8" + /.octal)) + + (should-pass "1" (/.run "1" + /.hexadecimal)) + (should-pass "a" (/.run "a" + /.hexadecimal)) + (should-pass "A" (/.run "A" + /.hexadecimal)) + (should-fail (/.run " " + /.hexadecimal)) + )) + + (_.test "Can lex alphabetic characters." + (and (should-pass "A" (/.run "A" + /.alpha)) + (should-pass "a" (/.run "a" + /.alpha)) + (should-fail (/.run "1" + /.alpha)))) + + (_.test "Can lex alphanumeric characters." + (and (should-pass "A" (/.run "A" + /.alpha-num)) + (should-pass "a" (/.run "a" + /.alpha-num)) + (should-pass "1" (/.run "1" + /.alpha-num)) + (should-fail (/.run " " + /.alpha-num)))) + + (_.test "Can lex white-space." + (and (should-pass " " (/.run " " + /.space)) + (should-fail (/.run "8" + /.space)))) + ) + ($_ _.and + (_.test "Can combine lexers sequentially." + (and (|> (/.run "YO" + (p.and /.any /.any)) + (case> (#.Right ["Y" "O"]) true + _ false)) + (should-fail (/.run "Y" + (p.and /.any /.any))))) + + (_.test "Can create the opposite of a lexer." + (and (should-pass "a" (/.run "a" + (/.not (p.or /.decimal /.upper)))) + (should-fail (/.run "A" + (/.not (p.or /.decimal /.upper)))))) + + (_.test "Can select from among a set of characters." + (and (should-pass "C" (/.run "C" + (/.one-of "ABC"))) + (should-fail (/.run "D" + (/.one-of "ABC"))))) + + (_.test "Can avoid a set of characters." + (and (should-pass "D" (/.run "D" + (/.none-of "ABC"))) + (should-fail (/.run "C" + (/.none-of "ABC"))))) + + (_.test "Can lex using arbitrary predicates." + (and (should-pass "D" (/.run "D" + (/.satisfies (function (_ c) true)))) + (should-fail (/.run "C" + (/.satisfies (function (_ c) false)))))) + + (_.test "Can apply a lexer multiple times." + (and (should-pass "0123456789ABCDEF" (/.run "0123456789ABCDEF" + (/.many /.hexadecimal))) + (should-fail (/.run "yolo" + (/.many /.hexadecimal))) + + (should-pass "" (/.run "" + (/.some /.hexadecimal))))) + ) + ))) diff --git a/stdlib/source/test/lux/data.lux b/stdlib/source/test/lux/data.lux index 92680f03a..a29358b6b 100644 --- a/stdlib/source/test/lux/data.lux +++ b/stdlib/source/test/lux/data.lux @@ -20,7 +20,6 @@ ["#." ratio] ["#." complex]] ["#." text - ["#/." lexer] ["#/." regex]] [format ["#." json] @@ -42,7 +41,6 @@ (def: text ($_ _.and /text.test - /text/lexer.test /text/regex.test )) diff --git a/stdlib/source/test/lux/data/text/lexer.lux b/stdlib/source/test/lux/data/text/lexer.lux deleted file mode 100644 index 3693b0fd0..000000000 --- a/stdlib/source/test/lux/data/text/lexer.lux +++ /dev/null @@ -1,171 +0,0 @@ -(.module: - [lux #* - data/text/format - ["_" test (#+ Test)] - [abstract/monad (#+ do)] - [control - pipe - ["p" parser]] - [data - ["." error (#+ Error)] - ["." text ("#@." equivalence)] - [collection - ["." list]]] - [math - ["r" random]]] - {1 - ["." /]}) - -(def: (should-fail input) - (All [a] (-> (Error a) Bit)) - (case input - (#error.Failure _) - true - - _ - false)) - -(def: (should-pass reference sample) - (-> Text (Error Text) Bit) - (|> sample - (:: error.functor map (text@= reference)) - (error.default false))) - -(def: #export test - Test - (<| (_.context (%name (name-of /.Lexer))) - ($_ _.and - (_.test "Can detect the end of the input." - (|> (/.run "" - /.end) - (case> (#.Right _) true _ false))) - (do r.monad - [size (|> r.nat (:: @ map (|>> (n/% 100) (n/max 10)))) - sample (r.unicode size) - non-sample (|> (r.unicode size) - (r.filter (|>> (text@= sample) not)))] - ($_ _.and - (_.test "Won't mistake non-empty text for no more input." - (|> (/.run sample - /.end) - (case> (#.Left _) true _ false))) - (_.test "Can find literal text fragments." - (and (|> (/.run sample - (/.this sample)) - (case> (#.Right []) true _ false)) - (|> (/.run non-sample - (/.this sample)) - (case> (#.Left _) true _ false)))) - )) - ($_ _.and - (_.test "Can lex anything" - (and (should-pass "A" (/.run "A" - /.any)) - (should-fail (/.run "" - /.any)))) - - (_.test "Can lex characters ranges." - (and (should-pass "Y" (/.run "Y" - (/.range (char "X") (char "Z")))) - (should-fail (/.run "M" - (/.range (char "X") (char "Z")))))) - - (_.test "Can lex upper-case and lower-case letters." - (and (should-pass "Y" (/.run "Y" - /.upper)) - (should-fail (/.run "m" - /.upper)) - - (should-pass "y" (/.run "y" - /.lower)) - (should-fail (/.run "M" - /.lower)))) - - (_.test "Can lex numbers." - (and (should-pass "1" (/.run "1" - /.decimal)) - (should-fail (/.run " " - /.decimal)) - - (should-pass "7" (/.run "7" - /.octal)) - (should-fail (/.run "8" - /.octal)) - - (should-pass "1" (/.run "1" - /.hexadecimal)) - (should-pass "a" (/.run "a" - /.hexadecimal)) - (should-pass "A" (/.run "A" - /.hexadecimal)) - (should-fail (/.run " " - /.hexadecimal)) - )) - - (_.test "Can lex alphabetic characters." - (and (should-pass "A" (/.run "A" - /.alpha)) - (should-pass "a" (/.run "a" - /.alpha)) - (should-fail (/.run "1" - /.alpha)))) - - (_.test "Can lex alphanumeric characters." - (and (should-pass "A" (/.run "A" - /.alpha-num)) - (should-pass "a" (/.run "a" - /.alpha-num)) - (should-pass "1" (/.run "1" - /.alpha-num)) - (should-fail (/.run " " - /.alpha-num)))) - - (_.test "Can lex white-space." - (and (should-pass " " (/.run " " - /.space)) - (should-fail (/.run "8" - /.space)))) - ) - ($_ _.and - (_.test "Can combine lexers sequentially." - (and (|> (/.run "YO" - (p.and /.any /.any)) - (case> (#.Right ["Y" "O"]) true - _ false)) - (should-fail (/.run "Y" - (p.and /.any /.any))))) - - (_.test "Can create the opposite of a lexer." - (and (should-pass "a" (/.run "a" - (/.not (p.or /.decimal /.upper)))) - (should-fail (/.run "A" - (/.not (p.or /.decimal /.upper)))))) - - (_.test "Can select from among a set of characters." - (and (should-pass "C" (/.run "C" - (/.one-of "ABC"))) - (should-fail (/.run "D" - (/.one-of "ABC"))))) - - (_.test "Can avoid a set of characters." - (and (should-pass "D" (/.run "D" - (/.none-of "ABC"))) - (should-fail (/.run "C" - (/.none-of "ABC"))))) - - (_.test "Can lex using arbitrary predicates." - (and (should-pass "D" (/.run "D" - (/.satisfies (function (_ c) true)))) - (should-fail (/.run "C" - (/.satisfies (function (_ c) false)))))) - - (_.test "Can apply a lexer multiple times." - (and (should-pass "0123456789ABCDEF" (/.run "0123456789ABCDEF" - (/.many /.hexadecimal))) - (should-fail (/.run "yolo" - (/.many /.hexadecimal))) - - (should-pass "" (/.run "" - (/.some /.hexadecimal))))) - ) - ))) diff --git a/stdlib/source/test/lux/data/text/regex.lux b/stdlib/source/test/lux/data/text/regex.lux index a683c446f..eeec23d2f 100644 --- a/stdlib/source/test/lux/data/text/regex.lux +++ b/stdlib/source/test/lux/data/text/regex.lux @@ -5,12 +5,12 @@ [abstract/monad (#+ do)] [control pipe - ["p" parser]] + ["p" parser + ["<.>" text (#+ Lexer)]]] [data [number (#+ hex)] ["." error] - ["." text ("#@." equivalence) - ["." lexer (#+ Lexer)]]] + ["." text ("#@." equivalence)]] [math ["r" random]] ["." macro @@ -20,7 +20,7 @@ (def: (should-pass regex input) (-> (Lexer Text) Text Bit) - (|> (lexer.run input regex) + (|> (.run input regex) (case> (#error.Success parsed) (text@= parsed input) @@ -29,7 +29,7 @@ (def: (text-should-pass test regex input) (-> Text (Lexer Text) Text Bit) - (|> (lexer.run input regex) + (|> (.run input regex) (case> (#error.Success parsed) (text@= test parsed) @@ -38,7 +38,7 @@ (def: (should-fail regex input) (All [a] (-> (Lexer a) Text Bit)) - (|> (lexer.run input regex) + (|> (.run input regex) (case> (#error.Failure _) true @@ -47,7 +47,7 @@ (syntax: (should-check pattern regex input) (macro.with-gensyms [g!message g!_] - (wrap (list (` (|> (lexer.run (~ input) (~ regex)) + (wrap (list (` (|> (.run (~ input) (~ regex)) (case> (^ (#error.Success (~ pattern))) true diff --git a/stdlib/source/test/lux/tool/compiler/default/syntax.lux b/stdlib/source/test/lux/tool/compiler/default/syntax.lux index 9f36c551f..a0005cc64 100644 --- a/stdlib/source/test/lux/tool/compiler/default/syntax.lux +++ b/stdlib/source/test/lux/tool/compiler/default/syntax.lux @@ -6,10 +6,12 @@ ["." name]] ["r" math/random (#+ Random) ("#@." monad)] ["_" test (#+ Test)] + [control + [parser + ["l" text]]] [data ["." error] - ["." text - ["l" lexer]] + ["." text] [collection ["." list] ["." dictionary (#+ Dictionary)]]] -- cgit v1.2.3