Added Frac parsing.

author: Eduardo Julian 2018-08-26 09:12:46 -0400
committer: Eduardo Julian 2018-08-26 09:12:46 -0400
commit: b614f2875fb2e98e8867399b7013503f2b1a4e4c (patch)
tree: 4223297955b046205c017b58cf31e490b26e8cea /stdlib/source
parent: db4d1dd31ac1860efb73f6326822c192302ecf25 (diff)
2 files changed, 70 insertions, 34 deletions
diff --git a/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux b/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux
index c654d9a00..d599af130 100644
--- a/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux
+++ b/stdlib/source/lux/compiler/default/phase/extension/analysis/common.lux
@@ -201,7 +201,7 @@
           (bundle.install "concat" (binary Text Text Text))
           (bundle.install "index" (trinary Text Text Nat (type (Maybe Nat))))
           (bundle.install "size" (unary Text Nat))
-          (bundle.install "char" (binary Text Nat (type (Maybe Nat))))
+          (bundle.install "char" (binary Text Nat Nat))
           (bundle.install "clip" (trinary Text Nat Nat Text))
           )))
 
diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux
index 8cb41536e..52ac38720 100644
--- a/stdlib/source/lux/compiler/default/syntax.lux
+++ b/stdlib/source/lux/compiler/default/syntax.lux
@@ -40,6 +40,18 @@
      ["." list]
      ["." dictionary (#+ Dictionary)]]]])
 
+## TODO: Optimize how forms, tuples & records are parsed in the end.
+## There is repeated-work going on when parsing the white-space before the
+## closing parenthesis/bracket/brace.
+## That repeated-work should be avoided.
+
+## TODO: Implement "lux syntax char case!" as a custom extension.
+## That way, it should be possible to obtain the char without wrapping
+## it into a java.lang.Long, thereby improving performance.
+
+## TODO: Make an extension to take advantage of java/lang/String::indexOf<int,int>
+## to get better performance than the current "lux text index" extension.
+
 (type: Char Nat)
 
 (do-template [<name> <extension> <diff>]
@@ -70,15 +82,6 @@
   [!n/- "lux i64 -"]
   )
 
-## TODO: Optimize how forms, tuples & records are parsed in the end.
-## There is repeated-work going on when parsing the space before the
-## closing parenthesis/bracket/brace.
-## That repeated-work should be avoided.
-
-## TODO: Implement "lux syntax char case!" as a custom extension.
-## That way, it should be possible to obtain the char without wrapping
-## it into a java.lang.Long, thereby improving performance.
-
 (type: #export Syntax
   (-> Cursor (Lexer [Cursor Code])))
 
@@ -107,6 +110,17 @@
 (def: #export positive-sign "+")
 (def: #export negative-sign "-")
 
+(def: #export frac-separator ".")
+
+## The parts of an name are separated by a single mark.
+## E.g. module.short.
+## Only one such mark may be used in an name, since there
+## can only be 2 parts to an name (the module [before the
+## mark], and the short [after the mark]).
+## There are also some extra rules regarding name syntax,
+## encoded on the parser.
+(def: #export name-separator ".")
+
 ## These are very simple parsers that just cut chunks of text in
 ## specific shapes and then use decoders already present in the
 ## standard library to actually produce the values from the literals.
@@ -143,15 +157,6 @@
       (wrap [(update@ #.column (n/+ (text.size chunk)) where)
              [where (#.Frac value)]]))))
 
-## The parts of an name are separated by a single mark.
-## E.g. module.short.
-## Only one such mark may be used in an name, since there
-## can only be 2 parts to an name (the module [before the
-## mark], and the short [after the mark]).
-## There are also some extra rules regarding name syntax,
-## encoded on the parser.
-(def: name-separator ".")
-
 (exception: #export (end-of-file {module Text})
   (ex.report ["Module" (%t module)]))
 
@@ -324,7 +329,7 @@
   (or (!strict-name-char? char)
       (!digit? char)))
 
-(template: (!discrete-output <start> <end> <codec> <tag>)
+(template: (!number-output <start> <end> <codec> <tag>)
   (case (:: <codec> decode (!clip <start> <end> source-code))
     (#error.Success output)
     (#error.Success [[(update@ #.column (n/+ (!n/- <start> <end>)) where)
@@ -335,26 +340,56 @@
     (#error.Error error)
     (#error.Error error)))
 
-(do-template [<name> <codec> <tag>]
-  [(def: (<name> start [where offset source-code])
-     (-> Offset Parser)
-     (let [source-code//size ("lux text size" source-code)]
-       (loop [end offset]
-         (<| (!with-char+ source-code//size source-code end char (!discrete-output start end <codec> <tag>))
-             (if (!digit?+ char)
-               (recur (!inc end))
-               (!discrete-output start end <codec> <tag>))))))]
+(def: no-exponent Offset 0)
 
-  [parse-int number.Codec<Text,Int> #.Int]
-  )
+(with-expansions [<int-output> (as-is (!number-output start end number.Codec<Text,Int> #.Int))
+                  <frac-output> (as-is (!number-output start end number.Codec<Text,Frac> #.Frac))
+                  <failure> (ex.throw unrecognized-input [where "Frac" source-code offset])]
+  (def: (parse-frac source-code//size start [where offset source-code])
+    (-> Nat Offset Parser)
+    (loop [end offset
+           exponent ..no-exponent]
+      (<| (!with-char+ source-code//size source-code end char/0 <frac-output>)
+          (cond (!digit?+ char/0)
+                (recur (!inc end) exponent)
+
+                (and (or (!n/= (char "e") char/0)
+                         (!n/= (char "E") char/0))
+                     (not (is? ..no-exponent exponent)))
+                (<| (!with-char+ source-code//size source-code (!inc end) char/1 <failure>)
+                    (if (or (!n/= (`` (char (~~ (static ..positive-sign)))) char/1)
+                            (!n/= (`` (char (~~ (static ..negative-sign)))) char/1))
+                      (<| (!with-char+ source-code//size source-code (!n/+ 2 end) char/2 <failure>)
+                          (if (!digit?+ char/2)
+                            (recur (!n/+ 3 end) char/0)
+                            <failure>))
+                      <failure>))
+
+                ## else
+                <frac-output>))))
+
+  (def: (parse-signed start [where offset source-code])
+    (-> Offset Parser)
+    (let [source-code//size ("lux text size" source-code)]
+      (loop [end offset]
+        (<| (!with-char+ source-code//size source-code end char <int-output>)
+            (cond (!digit?+ char)
+                  (recur (!inc end))
+
+                  (!n/= (`` (.char (~~ (static ..frac-separator))))
+                        char)
+                  (parse-frac source-code//size start [where (!inc end) source-code])
+
+                  ## else
+                  <int-output>))))))
 
 (do-template [<name> <codec> <tag>]
   [(template: (<name> source-code//size start where offset source-code)
      (loop [g!end offset]
-       (<| (!with-char+ source-code//size source-code g!end g!char (!discrete-output start g!end <codec> <tag>))
+       (<| (!with-char+ source-code//size source-code g!end g!char (!number-output start g!end <codec> <tag>))
            (if (!digit?+ g!char)
              (recur (!inc g!end))
-             (!discrete-output start g!end <codec> <tag>)))))]
+             (!number-output start g!end <codec> <tag>)))))]
 
   [!parse-nat number.Codec<Text,Nat> #.Nat]
   [!parse-rev number.Codec<Text,Rev> #.Rev]
@@ -364,7 +399,7 @@
   (let [g!offset/1 (!inc offset)]
     (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end)
         (if (!digit? g!char/1)
-          (parse-int offset [where (!inc/2 offset) source-code])
+          (parse-signed offset [where (!inc/2 offset) source-code])
           (!parse-full-name offset [where (!inc offset) source-code] where #.Identifier)))))
 
 (with-expansions [<output> (#error.Success [[(update@ #.column (n/+ (!n/- start end)) where)
@@ -526,6 +561,7 @@
                                    ## else
                                    <failure>))))
 
+                      ## Coincidentally (= name-separator frac-separator)
                       [(~~ (static ..name-separator))]
                       (let [offset/1 (!inc offset/0)]
                         (<| (!with-char+ source-code//size source-code offset/1 char/1 <end>)
author	Eduardo Julian	2018-08-26 09:12:46 -0400
committer	Eduardo Julian	2018-08-26 09:12:46 -0400
commit	b614f2875fb2e98e8867399b7013503f2b1a4e4c (patch)
tree	4223297955b046205c017b58cf31e490b26e8cea /stdlib/source
parent	db4d1dd31ac1860efb73f6326822c192302ecf25 (diff)