diff options
-rw-r--r-- | src/lux/optimizer.clj | 164 |
1 files changed, 155 insertions, 9 deletions
diff --git a/src/lux/optimizer.clj b/src/lux/optimizer.clj index 933849be3..e7af21664 100644 --- a/src/lux/optimizer.clj +++ b/src/lux/optimizer.clj @@ -9,6 +9,7 @@ ;; [Tags] (defvariant + ;; These tags just have a one-to-one correspondence with Analysis data-structures. ("bool" 1) ("nat" 1) ("int" 1) @@ -25,29 +26,80 @@ ("captured" 3) ("proc" 3) - ;; Purely for optimizations + ;; These other tags represent higher-order constructs that manifest + ;; themselves as patterns in the code. + ;; Lux doesn't formally provide these features, but some macros + ;; expose ways to implement them in terms of the other (primitive) + ;; features. + ;; The optimizer looks for those usage patterns and transforms them + ;; into explicit constructs, which are then subject to specialized optimizations. + + ;; This is a loop, as expected in imperative programming. ("loop" 1) + ;; This is a simple let-expression, as opposed to the more general pattern-matching. ("let" 3) + ;; This is an access to a record's member. It can be multiple level: + ;; e.g. record.l1.l2.l3 + ;; The record-get token stores the path, for simpler compilation. ("record-get" 2) ) -;; For pattern-matching +;; [Utils] + +;; [[Pattern-Matching Traversal Optimization]] + +;; This represents an alternative way to view pattern-matching. +;; The PM that Lux provides has declarative semantics, with the user +;; specifying how his data is shaped, but not how to traverse it. +;; The optimizer's PM is operational in nature, and relies on +;; specifying a path of traversal, with a variety of operations that +;; can be done along the way. +;; The algorithm relies on looking at pattern-matching as traversing a +;; (possibly) branching path, where each step along the path +;; corresponds to a value, the ends of the path are the jumping-off +;; points for the bodies of branches, and branching decisions can be +;; backtracked, if they don't result in a valid jump. (defvariant + ;; Throw away the current data-node (CDN). It's useless. ("PopPM" 0) + ;; Store the CDN in a register. ("BindPM" 1) + ;; Compare the CDN with a boolean value. ("BoolPM" 1) + ;; Compare the CDN with a natural value. ("NatPM" 1) + ;; Compare the CDN with an integer value. ("IntPM" 1) + ;; Compare the CDN with a real value. ("RealPM" 1) + ;; Compare the CDN with a character value. ("CharPM" 1) + ;; Compare the CDN with a text value. ("TextPM" 1) + ;; Compare the CDN with a variant value. If valid, proceed to test + ;; the variant's inner value. ("VariantPM" 1) + ;; Access a tuple value at a given index, for further examination. ("TuplePM" 1) + ;; Creates an instance of the backtracking info, as a preparatory + ;; step to exploring one of the branching paths. ("AltPM" 2) + ;; Allows to test the CDN, while keeping a copy of it for more + ;; tasting later on. + ;; If necessary when doing multiple tests on a single value, like + ;; when testing multiple parts of a tuple. ("SeqPM" 2) + ;; This is the jumping-off point for the PM part, where the PM + ;; data-structure is thrown away and the program jumps to the + ;; branch's body. ("ExecPM" 1)) -;; [Utils] +;; This function does a simple transformation from the declarative +;; model of PM of the analyser, to the operational model of PM of the +;; optimizer. +;; You may notice that all branches end in PopPM. +;; The reason is that testing does not immediately imply throwing away +;; the data to be tested, which is why a popping step must immediately follow. (defn ^:private transform-pm* [test] (|case test (&a-case/$NoTestAC) @@ -90,12 +142,22 @@ (&a-case/$TupleTestAC _sub-tests) (|case _sub-tests + ;; An empty tuple corresponds to unit, which can't be tested in + ;; any meaningful way, so it's just popped. (&/$Nil) (&/|list $PopPM) + ;; A tuple of a single element is equivalent to the element + ;; itself, to the element's PM is generated. (&/$Cons _only-test (&/$Nil)) (transform-pm* _only-test) + ;; Single tuple PM features the tests of each tuple member + ;; inlined, it's operational equivalent is interleaving the + ;; access to each tuple member, followed by the testing of said + ;; member. + ;; That is way each sequence of access+subtesting gets generated + ;; and later they all get concatenated. _ (|let [tuple-size (&/|length _sub-tests)] (&/|++ (&/flat-map (fn [idx+test*] @@ -108,6 +170,15 @@ _sub-tests)) (&/|list $PopPM)))))) +;; It will be common for pattern-matching on a very nested +;; data-structure to require popping all the intermediate +;; data-structures that were visited once it's all done. +;; However, the PM infrastructure employs a single data-stack to keep +;; all data nodes in the trajectory, and that data-stack can just be +;; thrown again entirely, in just one step. +;; Because of that, any ending POPs prior to throwing away the +;; data-stack would be completely useless. +;; This function cleans them all up, to avoid wasteful computation later. (defn ^:private clean-unnecessary-pops [steps] (|case steps (&/$Cons ($PopPM) _steps) @@ -116,11 +187,19 @@ _ steps)) +;; This transforms a single branch of a PM tree into it's operational +;; equivalent, while also associating the PM of the branch with the +;; jump to the branch's body. (defn ^:private transform-pm [test body-id] (&/fold (fn [right left] ($SeqPM left right)) ($ExecPM body-id) (clean-unnecessary-pops (&/|reverse (transform-pm* test))))) +;; This function fuses together the paths of the PM traversal, adding +;; branching AltPMs where necessary, and fusing similar paths together +;; as much as possible, when early parts of them coincide. +;; The goal is to minimize rework as much as possible by sharing as +;; much of each path as possible. (defn ^:private fuse-pms [pre post] (|case (&/T [pre post]) [($PopPM) ($PopPM)] @@ -193,6 +272,8 @@ ($AltPM pre post) )) +;; This is the top-level function for optimizing PM, which transforms +;; each branch and then fuses them together. (defn ^:private optimize-pm [branches] (|let [;; branches (&/|reverse branches*) bodies (&/|map &/|second branches) @@ -211,6 +292,36 @@ bodies]) ))) +;; [[Function-Folding Optimization]] + +;; The semantics of Lux establish that all functions are of a single +;; argument and the multi-argument functions are actually nested +;; functions being generated and then applied. +;; This, of course, would generate a lot of waste. +;; To avoid it, Lux actually folds function definitions together, +;; thereby creating functions that can be used both +;; one-argument-at-a-time, and also being called with all, or just a +;; partial amount of their arguments. +;; This avoids generating too many artifacts during compilation, since +;; they get "compressed", and it can also lead to faster execution, by +;; enabling optimized function calls later. + +;; Functions and captured variables have "scopes", which tell which +;; function they are, or to which function they belong. +;; During the folding, inner functions dissapear, since their bodies +;; are merged into their outer "parent" functions. +;; Their scopes must change accordingy. +(defn ^:private de-scope [old-scope new-scope scope] + "(-> Scope Scope Scope Scope)" + (if (identical? new-scope scope) + old-scope + scope)) + +;; Also, it must be noted that when folding functions, the indexes of +;; the registers have to be changed accodingly. +;; That is what the following "shifting" functions are for. + +;; Shifts the registers for PM operations. (defn ^:private shift-pattern [pattern] (|case pattern ($BindPM _var-id) @@ -226,12 +337,7 @@ pattern )) -(defn ^:private de-scope [old-scope new-scope scope] - "(-> Scope Scope Scope Scope)" - (if (identical? new-scope scope) - old-scope - scope)) - +;; Shifts the body of a function after a folding is performed. (defn shift-function-body [old-scope new-scope own-body? body] "(-> Scope Scope Bool Optimized Optimized)" (|let [[meta body-] body] @@ -278,6 +384,7 @@ body) body) + ;; This special "apply" rule is for handling better recursive calls. ($apply [meta-0 ($var (&/$Local 0))] args) (if own-body? (&/T [meta ($apply (&/T [meta-0 ($var (&/$Local 0))]) @@ -321,6 +428,15 @@ body ))) +;; [[Record-Manipulation Optimizations]] + +;; If a pattern-matching tree with a single branch is found, and that +;; branch corresponds to a tuple PM, and the body corresponds to a +;; local variable, it's likely that the local refers to some member of +;; the tuple that is being extracted. +;; That is the pattern that is to be expected of record read-access, +;; so this function tries to extract the (possibly nested) path +;; necessary, ending in the data-node of the wanted member. (defn ^:private record-read-path [pms member-idx] "(-> (List PM) Idx (List Idx))" (loop [current-idx 0 @@ -353,6 +469,15 @@ (&/|list)) ))) +;; [[Loop Optimizations]] + +;; Lux doesn't offer any looping constructs, relying instead on +;; recursion. +;; Some common usages of recursion can be written more efficiently +;; just using regular loops/iteration. +;; This optimization looks for tail-calls in the function body, +;; rewriting them as jumps to the beginning of the function, while +;; they also updated the necessary local variables for the next iteration. (defn ^:private optimize-loop [arity optim] "(-> Int Optimized Optimized)" (|let [[meta optim-] optim] @@ -378,6 +503,12 @@ optim ))) +;; [[Initial Optimization]] + +;; Before any big optimization can be done, the incoming Analysis nodes +;; must be transformed into Optimized nodes, amenable to further transformations. +;; This function does the job, while also detecting (and optimizing) +;; some simple surface patterns it may encounter. (let [optimize-closure (fn [optimize closure] (&/|map (fn [capture] (|let [[_name _analysis] capture] @@ -416,19 +547,29 @@ (&a/$case value branches) (|case branches + ;; The pattern for a let-expression is a single branch, + ;; tying the value to a register. (&/$Cons [(&a-case/$StoreTestAC _register) _body] (&/$Nil)) (&/T [meta ($let (pass-0 value) _register (pass-0 _body))]) + ;; The pattern for a record-get is a single branch, with a + ;; tuple pattern and a body corresponding to a + ;; local-variable extracted from the tuple. (&/$Cons [(&a-case/$TupleTestAC _sub-tests) [_ (&a/$var (&/$Local _member-idx))]] (&/$Nil)) (|let [_path (record-read-path _sub-tests _member-idx)] (if (&/|empty? _path) + ;; If the path is empty, that means it was a + ;; false-positive and normal PM optimization should be + ;; done instead. (&/T [meta ($case (pass-0 value) (optimize-pm (&/|map (fn [branch] (|let [[_pattern _body] branch] (&/T [_pattern (pass-0 _body)]))) branches)))]) + ;; Otherwise, we've got ourselves a record-get expression. (&/T [meta ($record-get (pass-0 value) _path)]))) + ;; If no special patterns are found, just do normal PM optimization. _ (&/T [meta ($case (pass-0 value) (optimize-pm (&/|map (fn [branch] @@ -438,9 +579,14 @@ (&a/$lambda scope captured body) (|case (pass-0 body) + ;; If the body of a function is another function, that means + ;; no work was done in-between and both layers can be folded + ;; into one. [_ ($function _arity _scope _captured _body)] (&/T [meta ($function (inc _arity) scope (optimize-closure pass-0 captured) (shift-function-body scope _scope true _body))]) + ;; Otherwise, they're nothing to be done and we've got a + ;; 1-arity function. =body (&/T [meta ($function 1 scope (optimize-closure pass-0 captured) =body)])) |