From b614f2875fb2e98e8867399b7013503f2b1a4e4c Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Sun, 26 Aug 2018 09:12:46 -0400 Subject: Added Frac parsing. --- .../default/phase/extension/analysis/common.lux | 2 +- stdlib/source/lux/compiler/default/syntax.lux | 104 ++++++++++++++------- 2 files changed, 71 insertions(+), 35 deletions(-) (limited to 'stdlib') diff --git a/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux b/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux index c654d9a00..d599af130 100644 --- a/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux +++ b/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux @@ -201,7 +201,7 @@ (bundle.install "concat" (binary Text Text Text)) (bundle.install "index" (trinary Text Text Nat (type (Maybe Nat)))) (bundle.install "size" (unary Text Nat)) - (bundle.install "char" (binary Text Nat (type (Maybe Nat)))) + (bundle.install "char" (binary Text Nat Nat)) (bundle.install "clip" (trinary Text Nat Nat Text)) ))) diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index 8cb41536e..52ac38720 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -40,6 +40,18 @@ ["." list] ["." dictionary (#+ Dictionary)]]]]) +## TODO: Optimize how forms, tuples & records are parsed in the end. +## There is repeated-work going on when parsing the white-space before the +## closing parenthesis/bracket/brace. +## That repeated-work should be avoided. + +## TODO: Implement "lux syntax char case!" as a custom extension. +## That way, it should be possible to obtain the char without wrapping +## it into a java.lang.Long, thereby improving performance. + +## TODO: Make an extension to take advantage of java/lang/String::indexOf +## to get better performance than the current "lux text index" extension. + (type: Char Nat) (do-template [ ] @@ -70,15 +82,6 @@ [!n/- "lux i64 -"] ) -## TODO: Optimize how forms, tuples & records are parsed in the end. -## There is repeated-work going on when parsing the space before the -## closing parenthesis/bracket/brace. -## That repeated-work should be avoided. - -## TODO: Implement "lux syntax char case!" as a custom extension. -## That way, it should be possible to obtain the char without wrapping -## it into a java.lang.Long, thereby improving performance. - (type: #export Syntax (-> Cursor (Lexer [Cursor Code]))) @@ -107,6 +110,17 @@ (def: #export positive-sign "+") (def: #export negative-sign "-") +(def: #export frac-separator ".") + +## The parts of an name are separated by a single mark. +## E.g. module.short. +## Only one such mark may be used in an name, since there +## can only be 2 parts to an name (the module [before the +## mark], and the short [after the mark]). +## There are also some extra rules regarding name syntax, +## encoded on the parser. +(def: #export name-separator ".") + ## These are very simple parsers that just cut chunks of text in ## specific shapes and then use decoders already present in the ## standard library to actually produce the values from the literals. @@ -143,15 +157,6 @@ (wrap [(update@ #.column (n/+ (text.size chunk)) where) [where (#.Frac value)]])))) -## The parts of an name are separated by a single mark. -## E.g. module.short. -## Only one such mark may be used in an name, since there -## can only be 2 parts to an name (the module [before the -## mark], and the short [after the mark]). -## There are also some extra rules regarding name syntax, -## encoded on the parser. -(def: name-separator ".") - (exception: #export (end-of-file {module Text}) (ex.report ["Module" (%t module)])) @@ -324,7 +329,7 @@ (or (!strict-name-char? char) (!digit? char))) -(template: (!discrete-output ) +(template: (!number-output ) (case (:: decode (!clip source-code)) (#error.Success output) (#error.Success [[(update@ #.column (n/+ (!n/- )) where) @@ -335,26 +340,56 @@ (#error.Error error) (#error.Error error))) -(do-template [ ] - [(def: ( start [where offset source-code]) - (-> Offset Parser) - (let [source-code//size ("lux text size" source-code)] - (loop [end offset] - (<| (!with-char+ source-code//size source-code end char (!discrete-output start end )) - (if (!digit?+ char) - (recur (!inc end)) - (!discrete-output start end ))))))] - - [parse-int number.Codec #.Int] - ) +(def: no-exponent Offset 0) + +(with-expansions [ (as-is (!number-output start end number.Codec #.Int)) + (as-is (!number-output start end number.Codec #.Frac)) + (ex.throw unrecognized-input [where "Frac" source-code offset])] + (def: (parse-frac source-code//size start [where offset source-code]) + (-> Nat Offset Parser) + (loop [end offset + exponent ..no-exponent] + (<| (!with-char+ source-code//size source-code end char/0 ) + (cond (!digit?+ char/0) + (recur (!inc end) exponent) + + (and (or (!n/= (char "e") char/0) + (!n/= (char "E") char/0)) + (not (is? ..no-exponent exponent))) + (<| (!with-char+ source-code//size source-code (!inc end) char/1 ) + (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1) + (!n/= (`` (char (~~ (static ..negative-sign)))) char/1)) + (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 ) + (if (!digit?+ char/2) + (recur (!n/+ 3 end) char/0) + )) + )) + + ## else + )))) + + (def: (parse-signed start [where offset source-code]) + (-> Offset Parser) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char ) + (cond (!digit?+ char) + (recur (!inc end)) + + (!n/= (`` (.char (~~ (static ..frac-separator)))) + char) + (parse-frac source-code//size start [where (!inc end) source-code]) + + ## else + )))))) (do-template [ ] [(template: ( source-code//size start where offset source-code) (loop [g!end offset] - (<| (!with-char+ source-code//size source-code g!end g!char (!discrete-output start g!end )) + (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end )) (if (!digit?+ g!char) (recur (!inc g!end)) - (!discrete-output start g!end )))))] + (!number-output start g!end )))))] [!parse-nat number.Codec #.Nat] [!parse-rev number.Codec #.Rev] @@ -364,7 +399,7 @@ (let [g!offset/1 (!inc offset)] (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) (if (!digit? g!char/1) - (parse-int offset [where (!inc/2 offset) source-code]) + (parse-signed offset [where (!inc/2 offset) source-code]) (!parse-full-name offset [where (!inc offset) source-code] where #.Identifier))))) (with-expansions [ (#error.Success [[(update@ #.column (n/+ (!n/- start end)) where) @@ -526,6 +561,7 @@ ## else )))) + ## Coincidentally (= name-separator frac-separator) [(~~ (static ..name-separator))] (let [offset/1 (!inc offset/0)] (<| (!with-char+ source-code//size source-code offset/1 char/1 ) -- cgit v1.2.3