aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEduardo Julian2018-08-22 22:13:21 -0400
committerEduardo Julian2018-08-22 22:13:21 -0400
commitd2efa1fd37efa50a460dfa609ddd274d82d082e3 (patch)
treec524d0bbaa258db8a3ce1a5e8f923aa0c9aa136f
parentb60d60ef6c0c70821991991fe716935e73038832 (diff)
Partial implementation of text-escaping.
-rw-r--r--stdlib/source/lux/compiler/default/syntax.lux491
1 files changed, 283 insertions, 208 deletions
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux
index 3b3b3e411..f333917d8 100644
--- a/stdlib/source/lux/compiler/default/syntax.lux
+++ b/stdlib/source/lux/compiler/default/syntax.lux
@@ -50,35 +50,6 @@
(type: #export Syntax
(-> Cursor (Lexer [Cursor Code])))
-(def: (timed' description lexer)
- (All [a]
- (-> Text (Lexer a) (Lexer a)))
- (do p.Monad<Parser>
- [_ (wrap [])
- #let [pre (io.run instant.now)]
- output lexer
- #let [_ (log! (|> instant.now
- io.run
- instant.relative
- (duration.difference (instant.relative pre))
- %duration
- (format "[" description "]: ")))]]
- (wrap output)))
-
-(def: (timed description lexer)
- (-> Text (Lexer [Cursor Code]) (Lexer [Cursor Code]))
- (do p.Monad<Parser>
- [_ (wrap [])
- #let [pre (io.run instant.now)]
- [where output] lexer
- #let [_ (log! (|> instant.now
- io.run
- instant.relative
- (duration.difference (instant.relative pre))
- %duration
- (format (%code output) " [" description "]: ")))]]
- (wrap [where output])))
-
(type: #export Aliases (Dictionary Text Text))
(def: #export no-aliases Aliases (dictionary.new text.Hash<Text>))
@@ -90,10 +61,10 @@
(def: white-space " ")
(def: carriage-return "\r")
(def: new-line "\n")
-(def: new-line^ (l.this new-line))
+## (def: new-line^ (l.this new-line))
(def: text-delimiter "\"")
-(def: text-delimiter^ (l.this text-delimiter))
+## (def: text-delimiter^ (l.this text-delimiter))
(def: open-form "(")
(def: close-form ")")
@@ -112,50 +83,50 @@
(def: single-line-comment-marker (format ..sigil ..sigil))
-## This is the parser for white-space.
-## Whenever a new-line is encountered, the column gets reset to 0, and
-## the line gets incremented.
-## It operates recursively in order to produce the longest continuous
-## chunk of white-space.
-(def: (space^ where)
- (-> Cursor (Lexer Cursor))
- (p.either (do p.Monad<Parser>
- [content (l.many! (l.one-of! white-space))]
- (wrap (update@ #.column (n/+ (get@ #l.distance content)) where)))
- ## New-lines must be handled as a separate case to ensure line
- ## information is handled properly.
- (do p.Monad<Parser>
- [content (l.many! (l.one-of! new-line))]
- (wrap (|> where
- (update@ #.line (n/+ (get@ #l.distance content)))
- (set@ #.column 0))))))
-
-## Single-line comments can start anywhere, but only go up to the
-## next new-line.
-(def: (comment^ where)
- (-> Cursor (Lexer Cursor))
- (do p.Monad<Parser>
- [_ (l.this ..single-line-comment-marker)
- _ (l.some! (l.none-of! new-line))
- _ ..new-line^]
- (wrap (|> where
- (update@ #.line inc)
- (set@ #.column 0)))))
-
-## To simplify parsing, I remove any left-padding that a Code token
-## may have prior to parsing the token itself.
-## Left-padding is assumed to be either white-space or a comment.
-## The cursor gets updated, but the padding gets ignored.
-(def: (left-padding^ where)
- (-> Cursor (Lexer Cursor))
- ($_ p.either
- (do p.Monad<Parser>
- [where (comment^ where)]
- (left-padding^ where))
- (do p.Monad<Parser>
- [where (space^ where)]
- (left-padding^ where))
- (:: p.Monad<Parser> wrap where)))
+## ## This is the parser for white-space.
+## ## Whenever a new-line is encountered, the column gets reset to 0, and
+## ## the line gets incremented.
+## ## It operates recursively in order to produce the longest continuous
+## ## chunk of white-space.
+## (def: (space^ where)
+## (-> Cursor (Lexer Cursor))
+## (p.either (do p.Monad<Parser>
+## [content (l.many! (l.one-of! white-space))]
+## (wrap (update@ #.column (n/+ (get@ #l.distance content)) where)))
+## ## New-lines must be handled as a separate case to ensure line
+## ## information is handled properly.
+## (do p.Monad<Parser>
+## [content (l.many! (l.one-of! new-line))]
+## (wrap (|> where
+## (update@ #.line (n/+ (get@ #l.distance content)))
+## (set@ #.column 0))))))
+
+## ## Single-line comments can start anywhere, but only go up to the
+## ## next new-line.
+## (def: (comment^ where)
+## (-> Cursor (Lexer Cursor))
+## (do p.Monad<Parser>
+## [_ (l.this ..single-line-comment-marker)
+## _ (l.some! (l.none-of! new-line))
+## _ ..new-line^]
+## (wrap (|> where
+## (update@ #.line inc)
+## (set@ #.column 0)))))
+
+## ## To simplify parsing, I remove any left-padding that a Code token
+## ## may have prior to parsing the token itself.
+## ## Left-padding is assumed to be either white-space or a comment.
+## ## The cursor gets updated, but the padding gets ignored.
+## (def: (left-padding^ where)
+## (-> Cursor (Lexer Cursor))
+## ($_ p.either
+## (do p.Monad<Parser>
+## [where (comment^ where)]
+## (left-padding^ where))
+## (do p.Monad<Parser>
+## [where (space^ where)]
+## (left-padding^ where))
+## (:: p.Monad<Parser> wrap where)))
## Escaped character sequences follow the usual syntax of
## back-slash followed by a letter (e.g. \n).
@@ -263,78 +234,78 @@
(wrap [(update@ #.column (n/+ (text.size chunk)) where)
[where (#.Frac value)]]))))
-## This parser looks so complex because text in Lux can be multi-line
-## and there are rules regarding how this is handled.
-(def: #export (text where)
- Syntax
- (do p.Monad<Parser>
- [## Lux text "is delimited by double-quotes", as usual in most
- ## programming languages.
- _ ..text-delimiter^
- ## I must know what column the text body starts at (which is
- ## always 1 column after the left-delimiting quote).
- ## This is important because, when procesing subsequent lines,
- ## they must all start at the same column, being left-padded with
- ## as many spaces as necessary to be column-aligned.
- ## This helps ensure that the formatting on the text in the
- ## source-code matches the formatting of the Text value.
- #let [offset ("lux i64 +" 1 (get@ #.column where))]
- [where' text-read] (: (Lexer [Cursor Text])
- ## I must keep track of how much of the
- ## text body has been read, how far the
- ## cursor has progressed, and whether I'm
- ## processing a subsequent line, or just
- ## processing normal text body.
- (loop [text-read ""
- where (|> where
- (update@ #.column inc))
- must-have-offset? #0]
- (p.either (if must-have-offset?
- ## If I'm at the start of a
- ## new line, I must ensure the
- ## space-offset is at least
- ## as great as the column of
- ## the text's body's column,
- ## to ensure they are aligned.
- (do @
- [_ (p.exactly offset (l.this ..white-space))]
- (recur text-read
- (update@ #.column (n/+ offset) where)
- #0))
- ($_ p.either
- ## Normal text characters.
- (do @
- [normal (l.slice (l.many! (l.none-of! (format ..escape ..text-delimiter ..new-line))))]
- (recur (format text-read normal)
- (update@ #.column (n/+ (text.size normal)) where)
- #0))
- ## Must handle escaped
- ## chars separately.
- (do @
- [[chars-consumed char] escaped-char^]
- (recur (format text-read char)
- (update@ #.column (n/+ chars-consumed) where)
- #0))
- ## The text ends when it
- ## reaches the right-delimiter.
- (do @
- [_ ..text-delimiter^]
- (wrap [(update@ #.column inc where)
- text-read]))))
- ## If a new-line is
- ## encountered, it gets
- ## appended to the value and
- ## the loop is alerted that the
- ## next line must have an offset.
- (do @
- [_ ..new-line^]
- (recur (format text-read new-line)
- (|> where
- (update@ #.line inc)
- (set@ #.column 0))
- #1)))))]
- (wrap [where'
- [where (#.Text text-read)]])))
+## ## This parser looks so complex because text in Lux can be multi-line
+## ## and there are rules regarding how this is handled.
+## (def: #export (text where)
+## Syntax
+## (do p.Monad<Parser>
+## [## Lux text "is delimited by double-quotes", as usual in most
+## ## programming languages.
+## _ ..text-delimiter^
+## ## I must know what column the text body starts at (which is
+## ## always 1 column after the left-delimiting quote).
+## ## This is important because, when procesing subsequent lines,
+## ## they must all start at the same column, being left-padded with
+## ## as many spaces as necessary to be column-aligned.
+## ## This helps ensure that the formatting on the text in the
+## ## source-code matches the formatting of the Text value.
+## #let [offset ("lux i64 +" 1 (get@ #.column where))]
+## [where' text-read] (: (Lexer [Cursor Text])
+## ## I must keep track of how much of the
+## ## text body has been read, how far the
+## ## cursor has progressed, and whether I'm
+## ## processing a subsequent line, or just
+## ## processing normal text body.
+## (loop [text-read ""
+## where (|> where
+## (update@ #.column inc))
+## must-have-offset? #0]
+## (p.either (if must-have-offset?
+## ## If I'm at the start of a
+## ## new line, I must ensure the
+## ## space-offset is at least
+## ## as great as the column of
+## ## the text's body's column,
+## ## to ensure they are aligned.
+## (do @
+## [_ (p.exactly offset (l.this ..white-space))]
+## (recur text-read
+## (update@ #.column (n/+ offset) where)
+## #0))
+## ($_ p.either
+## ## Normal text characters.
+## (do @
+## [normal (l.slice (l.many! (l.none-of! (format ..escape ..text-delimiter ..new-line))))]
+## (recur (format text-read normal)
+## (update@ #.column (n/+ (text.size normal)) where)
+## #0))
+## ## Must handle escaped
+## ## chars separately.
+## (do @
+## [[chars-consumed char] escaped-char^]
+## (recur (format text-read char)
+## (update@ #.column (n/+ chars-consumed) where)
+## #0))
+## ## The text ends when it
+## ## reaches the right-delimiter.
+## (do @
+## [_ ..text-delimiter^]
+## (wrap [(update@ #.column inc where)
+## text-read]))))
+## ## If a new-line is
+## ## encountered, it gets
+## ## appended to the value and
+## ## the loop is alerted that the
+## ## next line must have an offset.
+## (do @
+## [_ ..new-line^]
+## (recur (format text-read new-line)
+## (|> where
+## (update@ #.line inc)
+## (set@ #.column 0))
+## #1)))))]
+## (wrap [where'
+## [where (#.Text text-read)]])))
(def: (composite open close element)
(All [a]
@@ -347,23 +318,20 @@
(do p.Monad<Parser>
[_ open^]
(loop [where (update@ #.column inc where)]
- (p.either (<| (timed' "composite CONS")
- (do @
- [## Must update the cursor as I
- ## go along, to keep things accurate.
- [where' head] (<| (timed' "composite HEAD")
- (element where))]
- (parser/map (product.both id (|>> (#.Cons head)))
- (recur where'))))
- (<| (timed' "composite NIL")
- (do @
- [## Must take into account any
- ## padding present before the
- ## end-delimiter.
- where' (left-padding^ where)
- _ close^]
- (wrap [(update@ #.column inc where')
- #.Nil])))))))))
+ (p.either (do @
+ [## Must update the cursor as I
+ ## go along, to keep things accurate.
+ [where' head] (element where)]
+ (parser/map (product.both id (|>> (#.Cons head)))
+ (recur where')))
+ (do @
+ [## Must take into account any
+ ## padding present before the
+ ## end-delimiter.
+ ## where (left-padding^ where)
+ _ close^]
+ (wrap [(update@ #.column inc where)
+ #.Nil]))))))))
## (do-template [<name> <tag> <open> <close>]
## [(def: (<name> ast where)
@@ -516,34 +484,30 @@
["Line" (%n line)]
["Column" (%n column)]))
+(exception: #export (text-cannot-contain-new-lines {text Text})
+ (ex.report ["Text" (%t text)]))
+
+(exception: #export (invalid-escape-syntax)
+ "")
+
(def: (ast current-module aliases)
(-> Text Aliases Syntax)
(function (ast' where)
(do p.Monad<Parser>
- [where (left-padding^ where)]
+ [## where (left-padding^ where)
+ ]
($_ p.either
- ## (<| (..timed "bit")
- ## (..bit where))
- ## (<| (..timed "nat")
- ## (..nat where))
- (<| (..timed "frac")
- (..frac where))
- (<| (..timed "rev")
- (..rev where))
- (<| (..timed "int")
- (..int where))
- (<| (..timed "text")
- (..text where))
- ## (<| (..timed "identifier")
- ## (..identifier current-module aliases where))
- ## (<| (..timed "tag")
- ## (..tag current-module aliases where))
- ## (<| (..timed "form")
- ## (..form ast' where))
- ## (<| (..timed "tuple")
- ## (..tuple ast' where))
- (<| (..timed "record")
- (..record ast' where))
+ ## (..bit where)
+ ## (..nat where)
+ (..frac where)
+ (..rev where)
+ (..int where)
+ ## (..text where)
+ ## (..identifier current-module aliases where)
+ ## (..tag current-module aliases where)
+ ## (..form ast' where)
+ ## (..tuple ast' where)
+ (..record ast' where)
(do @
[end? l.end?]
(if end?
@@ -552,11 +516,11 @@
))))
(type: Tracker
- {#new-line Offset})
+ {#next-escape Offset})
(def: fresh-tracker
Tracker
- {#new-line 0})
+ {#next-escape 0})
(type: (Simple a)
(-> Tracker Source (Error [Tracker Source a])))
@@ -609,29 +573,142 @@
)
(template: (!clip from to text)
- ## TODO: Optimize away "maybe.assume"
+ ## TODO: Optimize-away "maybe.assume"
(maybe.assume ("lux text clip" text from to)))
-(def: (read-text tracker [where offset source-code])
+(template: (!i/< reference subject)
+ ("lux int <" subject reference))
+
+(do-template [<name> <extension>]
+ [(template: (<name> param subject)
+ (<extension> subject param))]
+
+ [!n/+ "lux i64 +"]
+ [!n/- "lux i64 -"]
+ )
+
+(with-expansions [<finish-text> ($_ "lux text concat" total output (!clip g!post-escape end source-code))]
+ (template: (!find-next-escape diff current-escape end source-code total output)
+ (let [g!post-escape (!n/+ diff current-escape)]
+ (case ("lux text index" source-code (static ..escape) g!post-escape)
+ ## More escaping work needs to be done
+ (#.Some g!next-escape)
+ (if (!i/< (:coerce Int end)
+ (:coerce Int g!next-escape))
+ ## For the current text.
+ (recur end g!next-escape ($_ "lux text concat" total output (!clip g!post-escape g!next-escape source-code)))
+ ## For another text.
+ (#error.Success [g!next-escape (!inc end) <finish-text>]))
+
+ ## No more escaping... ever!
+ _
+ (#error.Success [("lux text size" source-code) (!inc end) <finish-text>])))))
+
+(template: (!guarantee-no-new-lines content body)
+ (case ("lux text index" content (static ..new-line) 0)
+ (#.Some g!_)
+ (ex.throw ..text-cannot-contain-new-lines content)
+
+ g!_
+ body))
+
+(def: (read-escaped-text next-escape end offset source-code)
+ (-> Offset Offset Offset Text (Error [Offset Offset Text]))
+ (with-expansions [<escape-start> (!n/+ 1 next-escape)
+ <escape-end> (!n/+ 5 next-escape)]
+ (loop [end end
+ next-escape next-escape
+ total (!clip offset next-escape source-code)]
+ ## TODO: Optimize-away "maybe.assume"
+ (`` (case (maybe.assume ("lux text char" source-code <escape-start>))
+ (^template [<input> <output>]
+ (^ (char <input>))
+ (!find-next-escape 2 next-escape end source-code total <output>))
+ (["t" "\t"]
+ ["v" "\v"]
+ ["b" "\b"]
+ ["n" (static ..new-line)]
+ ["r" "\r"]
+ ["f" "\f"]
+ [(~~ (static ..escape)) (static ..escape)])
+
+ (^ (char (~~ (static ..text-delimiter))))
+ (case (!find-next-escape 2 next-escape end source-code total (static ..text-delimiter))
+ (#error.Error error)
+ (#error.Error error)
+
+ (#error.Success [next-escape' post-delimiter so-far])
+ (case ("lux text index" source-code (static ..text-delimiter) post-delimiter)
+ (#.Some end')
+ (recur end' next-escape' so-far)
+
+ _
+ (ex.throw invalid-escape-syntax [])))
+
+ ## Handle unicode escapes.
+ (^ (char "u"))
+ (if (!i/< (:coerce Int end)
+ (:coerce Int <escape-end>))
+ (case (:: number.Hex@Codec<Text,Nat> decode (!clip <escape-start> <escape-end> source-code))
+ (#error.Success value)
+ (!find-next-escape 6 next-escape end source-code total (text.from-code value))
+
+ (#error.Error error)
+ (#error.Error error))
+ (ex.throw invalid-escape-syntax []))
+
+ _
+ (ex.throw invalid-escape-syntax []))))))
+
+(def: (read-text next-escape (^@ source [where offset source-code]))
(Simple Code)
- (case ("lux text index" source-code (static ..text-delimiter) offset)
- (#.Some end)
- (#error.Success [tracker
- [(update@ #.column (n/+ ("lux i64 -" end offset)) where)
- (!inc end)
- source-code]
- [where
- (#.Text (!clip offset end source-code))]])
-
- _
- (ex.throw unrecognized-input where)))
+ (if (!i/< (:coerce Int offset)
+ (:coerce Int next-escape))
+ ## Must update next-escape.
+ (case ("lux text index" source-code (static ..escape) offset)
+ ## There is a escape further down the road.
+ (#.Some next-escape')
+ (read-text next-escape' source)
+
+ ## There are no escapes left.
+ _
+ (read-text ("lux text size" source-code) source))
+ (case ("lux text index" source-code (static ..text-delimiter) offset)
+ (#.Some end)
+ (if (!i/< (:coerce Int end)
+ (:coerce Int next-escape))
+ ## Must handle escape
+ (case (read-escaped-text next-escape end offset source-code)
+ (#error.Error error)
+ (#error.Error error)
+
+ (#error.Success [next-escape' offset' content])
+ (<| (!guarantee-no-new-lines content)
+ (#error.Success [next-escape'
+ [(update@ #.column (n/+ (!n/- offset offset')) where)
+ offset'
+ source-code]
+ [where
+ (#.Text content)]])))
+ ## No escape to handle at the moment.
+ (let [content (!clip offset end source-code)]
+ (<| (!guarantee-no-new-lines content)
+ (#error.Success [next-escape
+ [(update@ #.column (n/+ (!n/- offset end)) where)
+ (!inc end)
+ source-code]
+ [where
+ (#.Text content)]]))))
+
+ _
+ (ex.throw unrecognized-input where))))
(def: digit-bottom Nat (!dec (char "0")))
(def: digit-top Nat (!inc (char "9")))
(template: (!digit? char)
- (and ("lux int <" (:coerce Int (static ..digit-bottom)) (:coerce Int char))
- ("lux int <" (:coerce Int char) (:coerce Int (static ..digit-top)))))
+ (and (!i/< (:coerce Int char) (:coerce Int (static ..digit-bottom)))
+ (!i/< (:coerce Int (static ..digit-top)) (:coerce Int char))))
(`` (template: (!digit?+ char)
(or (!digit? char)
@@ -666,7 +743,7 @@
(with-expansions [<output> (case (:: number.Codec<Text,Nat> decode (!clip start end source-code))
(#error.Success output)
(#error.Success [tracker
- [(update@ #.column (n/+ ("lux i64 -" end start)) where)
+ [(update@ #.column (n/+ (!n/- start end)) where)
end
source-code]
[where (#.Nat output)]])
@@ -685,7 +762,7 @@
_
<output>))))
-(with-expansions [<output> (#error.Success [[(update@ #.column (n/+ ("lux i64 -" end start)) where)
+(with-expansions [<output> (#error.Success [[(update@ #.column (n/+ (!n/- start end)) where)
end
source-code]
(!clip start end source-code)])]
@@ -890,5 +967,3 @@
## (#error.Success [[offset' remaining] [where' output]])
## (#error.Success [[where' offset' remaining] output])))
-
-## (yolo)