From 9b106ea2fc8b55f8081154511b2b59ef821d5991 Mon Sep 17 00:00:00 2001 From: Eduardo Julian Date: Thu, 23 Aug 2018 19:50:42 -0400 Subject: Optimized character extraction by not having to query the length of the source-code every time. --- stdlib/source/lux/compiler/default.lux | 56 +++-- stdlib/source/lux/compiler/default/syntax.lux | 300 +++++++++++++------------- stdlib/source/lux/interpreter.lux | 5 +- 3 files changed, 193 insertions(+), 168 deletions(-) diff --git a/stdlib/source/lux/compiler/default.lux b/stdlib/source/lux/compiler/default.lux index 1744b1143..73b018c95 100644 --- a/stdlib/source/lux/compiler/default.lux +++ b/stdlib/source/lux/compiler/default.lux @@ -5,7 +5,7 @@ ["ex" exception (#+ exception:)]] [data ["." product] - ["." error] + ["." error (#+ Error)] [text ("text/." Hash) format ["." encoding]] @@ -36,10 +36,20 @@ ## [cache/io]) ) -(def: (read current-module aliases) - (-> Text Aliases (analysis.Operation Code)) +(type: Reader + (-> .Source (Error [.Source Code]))) + +(def: (reader current-module aliases) + (-> Text Aliases (analysis.Operation Reader)) + (function (_ [bundle state]) + (let [[cursor offset source-code] (get@ #.source state)] + (#error.Success [[bundle state] + (syntax.parse current-module aliases ("lux text size" source-code))])))) + +(def: (read reader) + (-> Reader (analysis.Operation Code)) (function (_ [bundle compiler]) - (case (syntax.parse current-module aliases (get@ #.source compiler)) + (case (reader (get@ #.source compiler)) (#error.Error error) (#error.Error error) @@ -86,26 +96,30 @@ (|>> module.set-compiled statement.lift-analysis)) - (def: (loop-module-compilation module-name) + (def: (module-compilation-iteration reader) + (-> Reader (All [anchor expression statement] )) + (<| (phase.timed (name-of ..module-compilation-iteration) "ITERATION") + (do phase.Monad + [code (statement.lift-analysis + (do @ + [code (<| (phase.timed (name-of ..module-compilation-iteration) "syntax") + (..read reader)) + #let [[cursor _] code] + _ (analysis.set-cursor cursor)] + (wrap code))) + _ (<| (phase.timed (name-of ..module-compilation-iteration) "PHASE") + (totalS.phase code))] + init.refresh))) + + (def: (module-compilation-loop module-name) (All [anchor expression statement] (-> Text )) - (let [iteration (: (All [anchor expression statement] - ) - (<| (phase.timed (name-of ..loop-module-compilation) "ITERATION") - (do phase.Monad - [code (statement.lift-analysis - (do @ - [code (<| (phase.timed (name-of ..loop-module-compilation) "syntax") - (..read module-name syntax.no-aliases)) - #let [[cursor _] code] - _ (analysis.set-cursor cursor)] - (wrap code))) - _ (<| (phase.timed (name-of ..loop-module-compilation) "PHASE") - (totalS.phase code))] - init.refresh)))] + (do phase.Monad + [reader (statement.lift-analysis + (..reader module-name syntax.no-aliases))] (function (_ state) (loop [state state] - (case (iteration state) + (case (module-compilation-iteration reader state) (#error.Success [state' output]) (recur state') @@ -119,7 +133,7 @@ (-> Text Source )) (do phase.Monad [_ (begin-module-compilation module-name source) - _ (loop-module-compilation module-name)] + _ (module-compilation-loop module-name)] (end-module-compilation module-name))) (def: #export (compile-module platform configuration compiler) diff --git a/stdlib/source/lux/compiler/default/syntax.lux b/stdlib/source/lux/compiler/default/syntax.lux index af7c7ae90..d724a150b 100644 --- a/stdlib/source/lux/compiler/default/syntax.lux +++ b/stdlib/source/lux/compiler/default/syntax.lux @@ -160,36 +160,39 @@ [!n/- "lux i64 -"] ) -(template: (!with-char @source-code @offset @char @else @body) - (if (!i/< (:coerce Int ("lux text size" @source-code)) - ## TODO: Get rid of the above "lux text size" call. - ## The size should be calculated only once and re-used constantly. +(template: (!with-char+ @source-code-size @source-code @offset @char @else @body) + (if (!i/< (:coerce Int @source-code-size) (:coerce Int @offset)) (let [@char ("lux text char" @source-code @offset)] @body) @else)) +(template: (!with-char @source-code @offset @char @else @body) + (!with-char+ ("lux text size" @source-code) @source-code @offset @char @else @body)) + (do-template [ ] [(def: ( parse source) (-> Parser Parser) - (loop [source source - stack (: (List Code) #.Nil)] - (case (parse source) - (#error.Success [source' top]) - (recur source' (#.Cons top stack)) - - (#error.Error error) - (let [[where offset source-code] source] - (<| (!with-char source-code offset char (#error.Error error)) - (`` (case char - (^ (char (~~ (static )))) - (#error.Success [[(update@ #.column inc where) - (!inc offset) - source-code] - [where ( (list.reverse stack))]]) - - _ - (ex.throw unrecognized-input where))))))))] + (let [[_ _ source-code] source + source-code//size ("lux text size" source-code)] + (loop [source source + stack (: (List Code) #.Nil)] + (case (parse source) + (#error.Success [source' top]) + (recur source' (#.Cons top stack)) + + (#error.Error error) + (let [[where offset _] source] + (<| (!with-char+ source-code//size source-code offset char (#error.Error error)) + (`` (case char + (^ (char (~~ (static )))) + (#error.Success [[(update@ #.column inc where) + (!inc offset) + source-code] + [where ( (list.reverse stack))]]) + + _ + (ex.throw unrecognized-input where)))))))))] ## Form and tuple syntax is mostly the same, differing only in the ## delimiters involved. @@ -200,29 +203,31 @@ (def: (parse-record parse source) (-> Parser Parser) - (loop [source source - stack (: (List [Code Code]) #.Nil)] - (case (parse source) - (#error.Success [sourceF field]) - (case (parse sourceF) - (#error.Success [sourceFV value]) - (recur sourceFV (#.Cons [field value] stack)) + (let [[_ _ source-code] source + source-code//size ("lux text size" source-code)] + (loop [source source + stack (: (List [Code Code]) #.Nil)] + (case (parse source) + (#error.Success [sourceF field]) + (case (parse sourceF) + (#error.Success [sourceFV value]) + (recur sourceFV (#.Cons [field value] stack)) + + (#error.Error error) + (let [[where offset _] source] + (<| (!with-char+ source-code//size source-code offset char (#error.Error error)) + (`` (case char + (^ (char (~~ (static ..close-record)))) + (#error.Success [[(update@ #.column inc where) + (!inc offset) + source-code] + [where (#.Record (list.reverse stack))]]) + + _ + (ex.throw unrecognized-input where)))))) (#error.Error error) - (let [[where offset source-code] source] - (<| (!with-char source-code offset char (#error.Error error)) - (`` (case char - (^ (char (~~ (static ..close-record)))) - (#error.Success [[(update@ #.column inc where) - (!inc offset) - source-code] - [where (#.Record (list.reverse stack))]]) - - _ - (ex.throw unrecognized-input where)))))) - - (#error.Error error) - (#error.Error error)))) + (#error.Error error))))) (template: (!guarantee-no-new-lines content body) (case ("lux text index" content (static text.new-line) 0) @@ -298,20 +303,21 @@ (do-template [ ] [(def: ( start [where offset source-code]) (-> Offset Parser) - (loop [end offset] - (<| (!with-char source-code end char (!discrete-output )) - (if (!digit?+ char) - (recur (!inc end)) - (!discrete-output )))))] + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char (!discrete-output )) + (if (!digit?+ char) + (recur (!inc end)) + (!discrete-output ))))))] [parse-nat number.Codec #.Nat] [parse-int number.Codec #.Int] [parse-rev number.Codec #.Rev] ) -(template: (!parse-int offset where source-code @end) +(template: (!parse-int source-code//size offset where source-code @end) (let [g!offset/1 (!inc offset)] - (<| (!with-char source-code g!offset/1 g!char/1 @end) + (<| (!with-char+ source-code//size source-code g!offset/1 g!char/1 @end) (if (!digit? g!char/1) (parse-int offset [where (!inc/2 offset) source-code]) (!parse-full-name offset [where (!inc offset) source-code] where #.Identifier))))) @@ -322,13 +328,14 @@ (!clip start end source-code)])] (def: (parse-name-part start [where offset source-code]) (-> Offset Source (Error [Source Text])) - (loop [end offset] - (<| (!with-char source-code end char ) - (cond (!name-char? char) - (recur (!inc end)) + (let [source-code//size ("lux text size" source-code)] + (loop [end offset] + (<| (!with-char+ source-code//size source-code end char ) + (cond (!name-char? char) + (recur (!inc end)) - ## else - ))))) + ## else + )))))) (template: (!new-line where) (let [[where::file where::line where::column] where] @@ -403,93 +410,96 @@ (#error.Error error) (#error.Error error))) - (def: #export (parse current-module aliases source) - (-> Text Aliases Source (Error [Source Code])) - (let [parse' (parse current-module aliases)] - (loop [[where offset/0 source-code] source] - (<| (!with-char source-code offset/0 char/0 ) - (`` (case char/0 - ## White-space - (^template [ ] - (^ (char )) - (recur [(update@ inc where) - (!inc offset/0) - source-code])) - ([(~~ (static ..space)) #.column] - [(~~ (static text.carriage-return)) #.column]) - - (^ (char (~~ (static text.new-line)))) - (recur [(!new-line where) (!inc offset/0) source-code]) - - ## Form - (^ (char (~~ (static ..open-form)))) - (parse-form parse' ) - - ## Tuple - (^ (char (~~ (static ..open-tuple)))) - (parse-tuple parse' ) - - ## Record - (^ (char (~~ (static ..open-record)))) - (parse-record parse' ) - - ## Text - (^ (char (~~ (static ..text-delimiter)))) - (read-text ) - - ## Special code - (^ (char (~~ (static ..sigil)))) - (let [offset/1 (!inc offset/0)] - (<| (!with-char source-code offset/1 char/1 ) - (case char/1 - (^template [ ] - (^ (char )) - (#error.Success [[(update@ #.column (|>> !inc/2) where) - (!inc offset/1) - source-code] - [where (#.Bit )]])) - (["0" #0] - ["1" #1]) - - ## Single-line comment - (^ (char (~~ (static ..sigil)))) - (case ("lux text index" source-code (static text.new-line) offset/1) - (#.Some end) - (recur [(!new-line where) (!inc end) source-code]) - + (with-expansions [ (as-is (parse current-module aliases source-code//size))] + (def: #export (parse current-module aliases source-code//size) + (-> Text Aliases Nat (-> Source (Error [Source Code]))) + ## The "exec []" is only there to avoid function fusion. + ## This is to preserve the loop as much as possible and keep it tight. + (exec [] + (function (recur [where offset/0 source-code]) + (<| (!with-char+ source-code//size source-code offset/0 char/0 ) + (`` (case char/0 + ## White-space + (^template [ ] + (^ (char )) + (recur [(update@ inc where) + (!inc offset/0) + source-code])) + ([(~~ (static ..space)) #.column] + [(~~ (static text.carriage-return)) #.column]) + + (^ (char (~~ (static text.new-line)))) + (recur [(!new-line where) (!inc offset/0) source-code]) + + ## Form + (^ (char (~~ (static ..open-form)))) + (parse-form ) + + ## Tuple + (^ (char (~~ (static ..open-tuple)))) + (parse-tuple ) + + ## Record + (^ (char (~~ (static ..open-record)))) + (parse-record ) + + ## Text + (^ (char (~~ (static ..text-delimiter)))) + (read-text ) + + ## Special code + (^ (char (~~ (static ..sigil)))) + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 ) + (case char/1 + (^template [ ] + (^ (char )) + (#error.Success [[(update@ #.column (|>> !inc/2) where) + (!inc offset/1) + source-code] + [where (#.Bit )]])) + (["0" #0] + ["1" #1]) + + ## Single-line comment + (^ (char (~~ (static ..sigil)))) + (case ("lux text index" source-code (static text.new-line) offset/1) + (#.Some end) + (recur [(!new-line where) (!inc end) source-code]) + + _ + ) + + (^ (char (~~ (static ..name-separator)))) + (!parse-short-name current-module where #.Identifier) + _ - ) - - (^ (char (~~ (static ..name-separator)))) - (!parse-short-name current-module where #.Identifier) - - _ - (cond (!name-char?|head char/1) ## Tag - (!parse-full-name offset/1 where #.Tag) - - ## else - )))) - - (^ (char (~~ (static ..name-separator)))) - (let [offset/1 (!inc offset/0)] - (<| (!with-char source-code offset/1 char/1 ) - (if (!digit? char/1) - (parse-rev offset/0 [where (!inc offset/1) source-code]) - (!parse-short-name current-module where #.Identifier)))) - - (^template [] - (^ (char )) - (!parse-int offset/0 where source-code )) - ([(~~ (static ..positive-sign))] - [(~~ (static ..negative-sign))]) - - _ - (cond (!digit? char/0) ## Natural number - (parse-nat offset/0 ) - - ## Identifier - (!name-char?|head char/0) - (!parse-full-name offset/0 where #.Identifier) - - ## else - )))))))) + (cond (!name-char?|head char/1) ## Tag + (!parse-full-name offset/1 where #.Tag) + + ## else + )))) + + (^ (char (~~ (static ..name-separator)))) + (let [offset/1 (!inc offset/0)] + (<| (!with-char+ source-code//size source-code offset/1 char/1 ) + (if (!digit? char/1) + (parse-rev offset/0 [where (!inc offset/1) source-code]) + (!parse-short-name current-module where #.Identifier)))) + + (^template [] + (^ (char )) + (!parse-int source-code//size offset/0 where source-code )) + ([(~~ (static ..positive-sign))] + [(~~ (static ..negative-sign))]) + + _ + (cond (!digit? char/0) ## Natural number + (parse-nat offset/0 ) + + ## Identifier + (!name-char?|head char/0) + (!parse-full-name offset/0 where #.Identifier) + + ## else + ))))))))) diff --git a/stdlib/source/lux/interpreter.lux b/stdlib/source/lux/interpreter.lux index 41edcb708..e44084bc0 100644 --- a/stdlib/source/lux/interpreter.lux +++ b/stdlib/source/lux/interpreter.lux @@ -44,7 +44,7 @@ Text (format text.new-line "Welcome to the interpreter!" text.new-line - "Type 'exit' to leave." text.new-line + "Type '" ..exit-command "' to leave." text.new-line text.new-line)) (def: farewell-message @@ -164,7 +164,8 @@ (All [anchor expression statement] (-> (Error [ Text]))) (do error.Monad - [[source' input] (syntax.parse ..module syntax.no-aliases (get@ #source context)) + [#let [[_where _offset _code] (get@ #source context)] + [source' input] (syntax.parse ..module syntax.no-aliases (text.size _code) (get@ #source context)) [state' representation] (let [## TODO: Simplify ASAP state (:share [anchor expression statement] { -- cgit v1.2.3