aboutsummaryrefslogtreecommitdiff
path: root/luxc/src/lux/optimizer.clj
diff options
context:
space:
mode:
Diffstat (limited to 'luxc/src/lux/optimizer.clj')
-rw-r--r--luxc/src/lux/optimizer.clj1202
1 files changed, 1202 insertions, 0 deletions
diff --git a/luxc/src/lux/optimizer.clj b/luxc/src/lux/optimizer.clj
new file mode 100644
index 000000000..5c30dc44f
--- /dev/null
+++ b/luxc/src/lux/optimizer.clj
@@ -0,0 +1,1202 @@
+;; Copyright (c) Eduardo Julian. All rights reserved.
+;; This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+;; If a copy of the MPL was not distributed with this file,
+;; You can obtain one at http://mozilla.org/MPL/2.0/.
+(ns lux.optimizer
+ (:require (lux [base :as & :refer [|let |do return fail return* fail* |case defvariant]])
+ (lux.analyser [base :as &a]
+ [case :as &a-case])))
+
+;; [Tags]
+(defvariant
+ ;; These tags just have a one-to-one correspondence with Analysis data-structures.
+ ("bool" 1)
+ ("nat" 1)
+ ("int" 1)
+ ("frac" 1)
+ ("real" 1)
+ ("char" 1)
+ ("text" 1)
+ ("variant" 3)
+ ("tuple" 1)
+ ("apply" 2)
+ ("case" 2)
+ ("function" 5)
+ ("ann" 2)
+ ("var" 1)
+ ("captured" 3)
+ ("proc" 3)
+
+ ;; These other tags represent higher-order constructs that manifest
+ ;; themselves as patterns in the code.
+ ;; Lux doesn't formally provide these features, but some macros
+ ;; expose ways to implement them in terms of the other (primitive)
+ ;; features.
+ ;; The optimizer looks for those usage patterns and transforms them
+ ;; into explicit constructs, which are then subject to specialized optimizations.
+
+ ;; Loop scope, for doing loop inlining
+ ("loop" 3) ;; {register-offset Int, inits (List Optimized), body Optimized}
+ ;; This is loop iteration, as expected in imperative programming.
+ ("iter" 2) ;; {register-offset Int, vals (List Optimized)}
+ ;; This is a simple let-expression, as opposed to the more general pattern-matching.
+ ("let" 3)
+ ;; This is an access to a record's member. It can be multi-level:
+ ;; e.g. record.l1.l2.l3
+ ;; The record-get token stores the path, for simpler compilation.
+ ("record-get" 2)
+ ;; Regular, run-of-the-mill if expressions.
+ ("if" 3)
+ )
+
+;; [Utils]
+
+;; [[Pattern-Matching Traversal Optimization]]
+
+;; This represents an alternative way to view pattern-matching.
+;; The PM that Lux provides has declarative semantics, with the user
+;; specifying how his data is shaped, but not how to traverse it.
+;; The optimizer's PM is operational in nature, and relies on
+;; specifying a path of traversal, with a variety of operations that
+;; can be done along the way.
+;; The algorithm relies on looking at pattern-matching as traversing a
+;; (possibly) branching path, where each step along the path
+;; corresponds to a value, the ends of the path are the jumping-off
+;; points for the bodies of branches, and branching decisions can be
+;; backtracked, if they don't result in a valid jump.
+(defvariant
+ ;; Throw away the current data-node (CDN). It's useless.
+ ("PopPM" 0)
+ ;; Store the CDN in a register.
+ ("BindPM" 1)
+ ;; Compare the CDN with a boolean value.
+ ("BoolPM" 1)
+ ;; Compare the CDN with a natural value.
+ ("NatPM" 1)
+ ;; Compare the CDN with an integer value.
+ ("IntPM" 1)
+ ;; Compare the CDN with a fractional value.
+ ("FracPM" 1)
+ ;; Compare the CDN with a real value.
+ ("RealPM" 1)
+ ;; Compare the CDN with a character value.
+ ("CharPM" 1)
+ ;; Compare the CDN with a text value.
+ ("TextPM" 1)
+ ;; Compare the CDN with a variant value. If valid, proceed to test
+ ;; the variant's inner value.
+ ("VariantPM" 1)
+ ;; Access a tuple value at a given index, for further examination.
+ ("TuplePM" 1)
+ ;; Creates an instance of the backtracking info, as a preparatory
+ ;; step to exploring one of the branching paths.
+ ("AltPM" 2)
+ ;; Allows to test the CDN, while keeping a copy of it for more
+ ;; tasting later on.
+ ;; If necessary when doing multiple tests on a single value, like
+ ;; when testing multiple parts of a tuple.
+ ("SeqPM" 2)
+ ;; This is the jumping-off point for the PM part, where the PM
+ ;; data-structure is thrown away and the program jumps to the
+ ;; branch's body.
+ ("ExecPM" 1))
+
+(defn de-meta
+ "(-> Optimized Optimized)"
+ [optim]
+ (|let [[meta optim-] optim]
+ (|case optim-
+ ($variant idx is-last? value)
+ ($variant idx is-last? (de-meta value))
+
+ ($tuple elems)
+ ($tuple (&/|map de-meta elems))
+
+ ($case value [_pm _bodies])
+ ($case (de-meta value)
+ (&/T [_pm (&/|map de-meta _bodies)]))
+
+ ($function _register-offset arity scope captured body*)
+ ($function _register-offset
+ arity
+ scope
+ (&/|map (fn [capture]
+ (|let [[_name [_meta ($captured _scope _idx _source)]] capture]
+ (&/T [_name ($captured _scope _idx (de-meta _source))])))
+ captured)
+ (de-meta body*))
+
+ ($ann value-expr type-expr)
+ (de-meta value-expr)
+
+ ($apply func args)
+ ($apply (de-meta func)
+ (&/|map de-meta args))
+
+ ($captured scope idx source)
+ ($captured scope idx (de-meta source))
+
+ ($proc proc-ident args special-args)
+ ($proc proc-ident (&/|map de-meta args) special-args)
+
+ ($loop _register-offset _inits _body)
+ ($loop _register-offset
+ (&/|map de-meta _inits)
+ (de-meta _body))
+
+ ($iter _iter-register-offset args)
+ ($iter _iter-register-offset
+ (&/|map de-meta args))
+
+ ($let _value _register _body)
+ ($let (de-meta _value)
+ _register
+ (de-meta _body))
+
+ ($record-get _value _path)
+ ($record-get (de-meta _value)
+ _path)
+
+ ($if _test _then _else)
+ ($if (de-meta _test)
+ (de-meta _then)
+ (de-meta _else))
+
+ _
+ optim-
+ )))
+
+;; This function does a simple transformation from the declarative
+;; model of PM of the analyser, to the operational model of PM of the
+;; optimizer.
+;; You may notice that all branches end in PopPM.
+;; The reason is that testing does not immediately imply throwing away
+;; the data to be tested, which is why a popping step must immediately follow.
+(defn ^:private transform-pm* [test]
+ (|case test
+ (&a-case/$NoTestAC)
+ (&/|list $PopPM)
+
+ (&a-case/$StoreTestAC _register)
+ (&/|list ($BindPM _register))
+
+ (&a-case/$BoolTestAC _value)
+ (&/|list ($BoolPM _value)
+ $PopPM)
+
+ (&a-case/$NatTestAC _value)
+ (&/|list ($NatPM _value)
+ $PopPM)
+
+ (&a-case/$IntTestAC _value)
+ (&/|list ($IntPM _value)
+ $PopPM)
+
+ (&a-case/$FracTestAC _value)
+ (&/|list ($FracPM _value)
+ $PopPM)
+
+ (&a-case/$RealTestAC _value)
+ (&/|list ($RealPM _value)
+ $PopPM)
+
+ (&a-case/$CharTestAC _value)
+ (&/|list ($CharPM _value)
+ $PopPM)
+
+ (&a-case/$TextTestAC _value)
+ (&/|list ($TextPM _value)
+ $PopPM)
+
+ (&a-case/$VariantTestAC _idx _num-options _sub-test)
+ (&/|++ (&/|list ($VariantPM (if (= _idx (dec _num-options))
+ (&/$Right _idx)
+ (&/$Left _idx))))
+ (&/|++ (transform-pm* _sub-test)
+ (&/|list $PopPM)))
+
+ (&a-case/$TupleTestAC _sub-tests)
+ (|case _sub-tests
+ ;; An empty tuple corresponds to unit, which can't be tested in
+ ;; any meaningful way, so it's just popped.
+ (&/$Nil)
+ (&/|list $PopPM)
+
+ ;; A tuple of a single element is equivalent to the element
+ ;; itself, to the element's PM is generated.
+ (&/$Cons _only-test (&/$Nil))
+ (transform-pm* _only-test)
+
+ ;; Single tuple PM features the tests of each tuple member
+ ;; inlined, it's operational equivalent is interleaving the
+ ;; access to each tuple member, followed by the testing of said
+ ;; member.
+ ;; That is way each sequence of access+subtesting gets generated
+ ;; and later they all get concatenated.
+ _
+ (|let [tuple-size (&/|length _sub-tests)]
+ (&/|++ (&/flat-map (fn [idx+test*]
+ (|let [[idx test*] idx+test*]
+ (&/$Cons ($TuplePM (if (< idx (dec tuple-size))
+ (&/$Left idx)
+ (&/$Right idx)))
+ (transform-pm* test*))))
+ (&/zip2 (&/|range tuple-size)
+ _sub-tests))
+ (&/|list $PopPM))))))
+
+;; It will be common for pattern-matching on a very nested
+;; data-structure to require popping all the intermediate
+;; data-structures that were visited once it's all done.
+;; However, the PM infrastructure employs a single data-stack to keep
+;; all data nodes in the trajectory, and that data-stack can just be
+;; thrown again entirely, in just one step.
+;; Because of that, any ending POPs prior to throwing away the
+;; data-stack would be completely useless.
+;; This function cleans them all up, to avoid wasteful computation later.
+(defn ^:private clean-unnecessary-pops [steps]
+ (|case steps
+ (&/$Cons ($PopPM) _steps)
+ (clean-unnecessary-pops _steps)
+
+ _
+ steps))
+
+;; This transforms a single branch of a PM tree into it's operational
+;; equivalent, while also associating the PM of the branch with the
+;; jump to the branch's body.
+(defn ^:private transform-pm [test body-id]
+ (&/fold (fn [right left] ($SeqPM left right))
+ ($ExecPM body-id)
+ (clean-unnecessary-pops (&/|reverse (transform-pm* test)))))
+
+(defn ^:private pattern->text [pattern]
+ (|case pattern
+ ($PopPM)
+ "$PopPM"
+
+ ($BindPM _id)
+ (str "($BindPM " _id ")")
+
+ ($BoolPM _value)
+ (str "($BoolPM " (pr-str _value) ")")
+
+ ($NatPM _value)
+ (str "($NatPM " (pr-str _value) ")")
+
+ ($IntPM _value)
+ (str "($IntPM " (pr-str _value) ")")
+
+ ($FracPM _value)
+ (str "($FracPM " (pr-str _value) ")")
+
+ ($RealPM _value)
+ (str "($RealPM " (pr-str _value) ")")
+
+ ($CharPM _value)
+ (str "($CharPM " (pr-str _value) ")")
+
+ ($TextPM _value)
+ (str "($TextPM " (pr-str _value) ")")
+
+ ($TuplePM (&/$Left _idx))
+ (str "($TuplePM L" _idx ")")
+
+ ($TuplePM (&/$Right _idx))
+ (str "($TuplePM R" _idx ")")
+
+ ($VariantPM (&/$Left _idx))
+ (str "($VariantPM L" _idx ")")
+
+ ($VariantPM (&/$Right _idx))
+ (str "($VariantPM R" _idx ")")
+
+ ($SeqPM _left _right)
+ (str "($SeqPM " (pattern->text _left) " " (pattern->text _right) ")")
+
+ ($ExecPM _idx)
+ (str "($ExecPM " _idx ")")
+
+ ;; $AltPM is not considered because it's not supposed to be
+ ;; present anywhere at this point in time.
+ ))
+
+;; This function fuses together the paths of the PM traversal, adding
+;; branching AltPMs where necessary, and fusing similar paths together
+;; as much as possible, when early parts of them coincide.
+;; The goal is to minimize rework as much as possible by sharing as
+;; much of each path as possible.
+(defn ^:private fuse-pms [pre post]
+ (|case (&/T [pre post])
+ [($PopPM) ($PopPM)]
+ $PopPM
+
+ [($BindPM _pre-var-id) ($BindPM _post-var-id)]
+ (if (= _pre-var-id _post-var-id)
+ ($BindPM _pre-var-id)
+ ($AltPM pre post))
+
+ [($BoolPM _pre-value) ($BoolPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($BoolPM _pre-value)
+ ($AltPM pre post))
+
+ [($NatPM _pre-value) ($NatPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($NatPM _pre-value)
+ ($AltPM pre post))
+
+ [($IntPM _pre-value) ($IntPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($IntPM _pre-value)
+ ($AltPM pre post))
+
+ [($FracPM _pre-value) ($FracPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($FracPM _pre-value)
+ ($AltPM pre post))
+
+ [($RealPM _pre-value) ($RealPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($RealPM _pre-value)
+ ($AltPM pre post))
+
+ [($CharPM _pre-value) ($CharPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($CharPM _pre-value)
+ ($AltPM pre post))
+
+ [($TextPM _pre-value) ($TextPM _post-value)]
+ (if (= _pre-value _post-value)
+ ($TextPM _pre-value)
+ ($AltPM pre post))
+
+ [($TuplePM (&/$Left _pre-idx)) ($TuplePM (&/$Left _post-idx))]
+ (if (= _pre-idx _post-idx)
+ ($TuplePM (&/$Left _pre-idx))
+ ($AltPM pre post))
+
+ [($TuplePM (&/$Right _pre-idx)) ($TuplePM (&/$Right _post-idx))]
+ (if (= _pre-idx _post-idx)
+ ($TuplePM (&/$Right _pre-idx))
+ ($AltPM pre post))
+
+ [($VariantPM (&/$Left _pre-idx)) ($VariantPM (&/$Left _post-idx))]
+ (if (= _pre-idx _post-idx)
+ ($VariantPM (&/$Left _pre-idx))
+ ($AltPM pre post))
+
+ [($VariantPM (&/$Right _pre-idx)) ($VariantPM (&/$Right _post-idx))]
+ (if (= _pre-idx _post-idx)
+ ($VariantPM (&/$Right _pre-idx))
+ ($AltPM pre post))
+
+ [($SeqPM _pre-pre _pre-post) ($SeqPM _post-pre _post-post)]
+ (|case (fuse-pms _pre-pre _post-pre)
+ ($AltPM _ _)
+ ($AltPM pre post)
+
+ fused-pre
+ ($SeqPM fused-pre (fuse-pms _pre-post _post-post)))
+
+ _
+ ($AltPM pre post)
+ ))
+
+(defn ^:private pattern-vars [pattern]
+ (|case pattern
+ ($BindPM _id)
+ (&/|list (&/T [_id false]))
+
+ ($SeqPM _left _right)
+ (&/|++ (pattern-vars _left) (pattern-vars _right))
+
+ _
+ (&/|list)
+
+ ;; $AltPM is not considered because it's not supposed to be
+ ;; present anywhere at this point in time.
+ ))
+
+(defn ^:private find-unused-vars [var-table body]
+ (|let [[meta body-] body]
+ (|case body-
+ ($var (&/$Local _idx))
+ (&/|update _idx (fn [_] true) var-table)
+
+ ($captured _scope _c-idx [_ ($var (&/$Local _idx))])
+ (&/|update _idx (fn [_] true) var-table)
+
+ ($variant _idx _is-last? _value)
+ (find-unused-vars var-table _value)
+
+ ($tuple _elems)
+ (&/fold find-unused-vars var-table _elems)
+
+ ($ann _value-expr _type-expr)
+ (find-unused-vars var-table _value-expr)
+
+ ($apply _func _args)
+ (&/fold find-unused-vars
+ (find-unused-vars var-table _func)
+ _args)
+
+ ($proc _proc-ident _args _special-args)
+ (&/fold find-unused-vars var-table _args)
+
+ ($loop _register-offset _inits _body)
+ (&/|++ (&/fold find-unused-vars var-table _inits)
+ (find-unused-vars var-table _body))
+
+ ($iter _ _args)
+ (&/fold find-unused-vars var-table _args)
+
+ ($let _value _register _body)
+ (-> var-table
+ (find-unused-vars _value)
+ (find-unused-vars _body))
+
+ ($record-get _value _path)
+ (find-unused-vars var-table _value)
+
+ ($if _test _then _else)
+ (-> var-table
+ (find-unused-vars _test)
+ (find-unused-vars _then)
+ (find-unused-vars _else))
+
+ ($case _value [_pm _bodies])
+ (&/fold find-unused-vars
+ (find-unused-vars var-table _value)
+ _bodies)
+
+ ($function _ _ _ _captured _)
+ (->> _captured
+ (&/|map &/|second)
+ (&/fold find-unused-vars var-table))
+
+ _
+ var-table
+ )))
+
+(defn ^:private clean-unused-pattern-registers [var-table pattern]
+ (|case pattern
+ ($BindPM _idx)
+ (|let [_new-idx (&/|get _idx var-table)]
+ (cond (= _idx _new-idx)
+ pattern
+
+ (>= _new-idx 0)
+ ($BindPM _new-idx)
+
+ :else
+ $PopPM))
+
+ ($SeqPM _left _right)
+ ($SeqPM (clean-unused-pattern-registers var-table _left)
+ (clean-unused-pattern-registers var-table _right))
+
+ _
+ pattern
+
+ ;; $AltPM is not considered because it's not supposed to be
+ ;; present anywhere at this point in time.
+ ))
+
+;; This function assumes that the var-table has an ascending index
+;; order.
+;; For example: (2 3 4 5 6 7 8), instead of (8 7 6 5 4 3 2)
+(defn ^:private adjust-register-indexes* [offset var-table]
+ (|case var-table
+ (&/$Nil)
+ (&/|list)
+
+ (&/$Cons [_idx _used?] _tail)
+ (if _used?
+ (&/$Cons (&/T [_idx (- _idx offset)])
+ (adjust-register-indexes* offset _tail))
+ (&/$Cons (&/T [_idx -1])
+ (adjust-register-indexes* (inc offset) _tail))
+ )))
+
+(defn ^:private adjust-register-indexes [var-table]
+ (adjust-register-indexes* 0 var-table))
+
+(defn ^:private clean-unused-body-registers [var-table body]
+ (|let [[meta body-] body]
+ (|case body-
+ ($var (&/$Local _idx))
+ (|let [new-idx (or (&/|get _idx var-table)
+ _idx)]
+ (&/T [meta ($var (&/$Local new-idx))]))
+
+ ($captured _scope _c-idx [_sub-meta ($var (&/$Local _idx))])
+ (|let [new-idx (or (&/|get _idx var-table)
+ _idx)]
+ (&/T [meta ($captured _scope _c-idx (&/T [_sub-meta ($var (&/$Local new-idx))]))]))
+
+ ($variant _idx _is-last? _value)
+ (&/T [meta ($variant _idx _is-last? (clean-unused-body-registers var-table _value))])
+
+ ($tuple _elems)
+ (&/T [meta ($tuple (&/|map (partial clean-unused-body-registers var-table)
+ _elems))])
+
+ ($ann _value-expr _type-expr)
+ (&/T [meta ($ann (clean-unused-body-registers var-table _value-expr) _type-expr)])
+
+ ($apply _func _args)
+ (&/T [meta ($apply (clean-unused-body-registers var-table _func)
+ (&/|map (partial clean-unused-body-registers var-table)
+ _args))])
+
+ ($proc _proc-ident _args _special-args)
+ (&/T [meta ($proc _proc-ident
+ (&/|map (partial clean-unused-body-registers var-table)
+ _args)
+ _special-args)])
+
+ ($loop _register-offset _inits _body)
+ (&/T [meta ($loop _register-offset
+ (&/|map (partial clean-unused-body-registers var-table)
+ _inits)
+ (clean-unused-body-registers var-table _body))])
+
+ ($iter _iter-register-offset _args)
+ (&/T [meta ($iter _iter-register-offset
+ (&/|map (partial clean-unused-body-registers var-table)
+ _args))])
+
+ ($let _value _register _body)
+ (&/T [meta ($let (clean-unused-body-registers var-table _value)
+ _register
+ (clean-unused-body-registers var-table _body))])
+
+ ($record-get _value _path)
+ (&/T [meta ($record-get (clean-unused-body-registers var-table _value)
+ _path)])
+
+ ($if _test _then _else)
+ (&/T [meta ($if (clean-unused-body-registers var-table _test)
+ (clean-unused-body-registers var-table _then)
+ (clean-unused-body-registers var-table _else))])
+
+ ($case _value [_pm _bodies])
+ (&/T [meta ($case (clean-unused-body-registers var-table _value)
+ (&/T [_pm
+ (&/|map (partial clean-unused-body-registers var-table)
+ _bodies)]))])
+
+ ($function _register-offset _arity _scope _captured _body)
+ (&/T [meta ($function _register-offset
+ _arity
+ _scope
+ (&/|map (fn [capture]
+ (|let [[_name __var] capture]
+ (&/T [_name (clean-unused-body-registers var-table __var)])))
+ _captured)
+ _body)])
+
+ _
+ body
+ )))
+
+(defn ^:private simplify-pattern [pattern]
+ (|case pattern
+ ($SeqPM ($TuplePM _idx) ($SeqPM ($PopPM) pattern*))
+ (simplify-pattern pattern*)
+
+ ($SeqPM ($TuplePM _idx) _right)
+ (|case (simplify-pattern _right)
+ ($SeqPM ($PopPM) pattern*)
+ pattern*
+
+ _right*
+ ($SeqPM ($TuplePM _idx) _right*))
+
+ ($SeqPM _left _right)
+ ($SeqPM _left (simplify-pattern _right))
+
+ _
+ pattern))
+
+(defn ^:private optimize-register-use [pattern body]
+ (|let [p-vars (pattern-vars pattern)
+ p-vars* (find-unused-vars p-vars body)
+ adjusted-vars (adjust-register-indexes p-vars*)
+ clean-pattern (clean-unused-pattern-registers adjusted-vars pattern)
+ simple-pattern (simplify-pattern clean-pattern)
+ clean-body (clean-unused-body-registers adjusted-vars body)]
+ (&/T [simple-pattern clean-body])))
+
+;; This is the top-level function for optimizing PM, which transforms
+;; each branch and then fuses them together.
+(defn ^:private optimize-pm [branches]
+ (|let [;; branches (&/|reverse branches*)
+ pms+bodies (&/map2 (fn [branch _body-id]
+ (|let [[_pattern _body] branch]
+ (optimize-register-use (transform-pm _pattern _body-id)
+ _body)))
+ branches
+ (&/|range (&/|length branches)))
+ pms (&/|map &/|first pms+bodies)
+ bodies (&/|map &/|second pms+bodies)]
+ (|case (&/|reverse pms)
+ (&/$Nil)
+ (assert false)
+
+ (&/$Cons _head-pm _tail-pms)
+ (&/T [(&/fold fuse-pms _head-pm _tail-pms)
+ bodies])
+ )))
+
+;; [[Function-Folding Optimization]]
+
+;; The semantics of Lux establish that all functions are of a single
+;; argument and the multi-argument functions are actually nested
+;; functions being generated and then applied.
+;; This, of course, would generate a lot of waste.
+;; To avoid it, Lux actually folds function definitions together,
+;; thereby creating functions that can be used both
+;; one-argument-at-a-time, and also being called with all, or just a
+;; partial amount of their arguments.
+;; This avoids generating too many artifacts during compilation, since
+;; they get "compressed", and it can also lead to faster execution, by
+;; enabling optimized function calls later.
+
+;; Functions and captured variables have "scopes", which tell which
+;; function they are, or to which function they belong.
+;; During the folding, inner functions dissapear, since their bodies
+;; are merged into their outer "parent" functions.
+;; Their scopes must change accordingy.
+(defn ^:private de-scope
+ "(-> Scope Scope Scope Scope)"
+ [old-scope new-scope scope]
+ (if (identical? new-scope scope)
+ old-scope
+ scope))
+
+;; Also, it must be noted that when folding functions, the indexes of
+;; the registers have to be changed accodingly.
+;; That is what the following "shifting" functions are for.
+
+;; Shifts the registers for PM operations.
+(defn ^:private shift-pattern [pattern]
+ (|case pattern
+ ($BindPM _var-id)
+ ($BindPM (inc _var-id))
+
+ ($SeqPM _left-pm _right-pm)
+ ($SeqPM (shift-pattern _left-pm) (shift-pattern _right-pm))
+
+ ($AltPM _left-pm _right-pm)
+ ($AltPM (shift-pattern _left-pm) (shift-pattern _right-pm))
+
+ _
+ pattern
+ ))
+
+;; Shifts the body of a function after a folding is performed.
+(defn shift-function-body
+ "(-> Scope Scope Bool Optimized Optimized)"
+ [old-scope new-scope own-body? body]
+ (|let [[meta body-] body]
+ (|case body-
+ ($variant idx is-last? value)
+ (&/T [meta ($variant idx is-last? (shift-function-body old-scope new-scope own-body? value))])
+
+ ($tuple elems)
+ (&/T [meta ($tuple (&/|map (partial shift-function-body old-scope new-scope own-body?) elems))])
+
+ ($case value [_pm _bodies])
+ (&/T [meta ($case (shift-function-body old-scope new-scope own-body? value)
+ (&/T [(if own-body?
+ (shift-pattern _pm)
+ _pm)
+ (&/|map (partial shift-function-body old-scope new-scope own-body?) _bodies)]))])
+
+ ($function _register-offset arity scope captured body*)
+ (|let [scope* (de-scope old-scope new-scope scope)]
+ (&/T [meta ($function _register-offset
+ arity
+ scope*
+ (&/|map (fn [capture]
+ (|let [[_name [_meta ($captured _scope _idx _source)]] capture]
+ (&/T [_name (&/T [_meta ($captured scope* _idx (shift-function-body old-scope new-scope own-body? _source))])])))
+ captured)
+ (shift-function-body old-scope new-scope false body*))]))
+
+ ($ann value-expr type-expr)
+ (&/T [meta ($ann (shift-function-body old-scope new-scope own-body? value-expr)
+ type-expr)])
+
+ ($var var-kind)
+ (if own-body?
+ (|case var-kind
+ (&/$Local 0)
+ (&/T [meta ($apply body
+ (&/|list [meta ($var (&/$Local 1))]))])
+
+ (&/$Local idx)
+ (&/T [meta ($var (&/$Local (inc idx)))])
+
+ (&/$Global ?module ?name)
+ body)
+ body)
+
+ ;; This special "apply" rule is for handling recursive calls better.
+ ($apply [meta-0 ($var (&/$Local 0))] args)
+ (if own-body?
+ (&/T [meta ($apply (&/T [meta-0 ($var (&/$Local 0))])
+ (&/$Cons (&/T [meta-0 ($var (&/$Local 1))])
+ (&/|map (partial shift-function-body old-scope new-scope own-body?) args)))])
+ (&/T [meta ($apply (&/T [meta-0 ($var (&/$Local 0))])
+ (&/|map (partial shift-function-body old-scope new-scope own-body?) args))]))
+
+ ($apply func args)
+ (&/T [meta ($apply (shift-function-body old-scope new-scope own-body? func)
+ (&/|map (partial shift-function-body old-scope new-scope own-body?) args))])
+
+ ($captured scope idx source)
+ (if own-body?
+ source
+ (|case scope
+ (&/$Cons _ (&/$Cons _ (&/$Nil)))
+ source
+
+ _
+ (&/T [meta ($captured (de-scope old-scope new-scope scope) idx (shift-function-body old-scope new-scope own-body? source))])))
+
+ ($proc proc-ident args special-args)
+ (&/T [meta ($proc proc-ident (&/|map (partial shift-function-body old-scope new-scope own-body?) args) special-args)])
+
+ ($loop _register-offset _inits _body)
+ (&/T [meta ($loop (if own-body?
+ (inc _register-offset)
+ _register-offset)
+ (&/|map (partial shift-function-body old-scope new-scope own-body?)
+ _inits)
+ (shift-function-body old-scope new-scope own-body? _body))])
+
+ ($iter _iter-register-offset args)
+ (&/T [meta ($iter (if own-body?
+ (inc _iter-register-offset)
+ _iter-register-offset)
+ (&/|map (partial shift-function-body old-scope new-scope own-body?) args))])
+
+ ($let _value _register _body)
+ (&/T [meta ($let (shift-function-body old-scope new-scope own-body? _value)
+ (if own-body?
+ (inc _register)
+ _register)
+ (shift-function-body old-scope new-scope own-body? _body))])
+
+ ($record-get _value _path)
+ (&/T [meta ($record-get (shift-function-body old-scope new-scope own-body? _value)
+ _path)])
+
+ ($if _test _then _else)
+ (&/T [meta ($if (shift-function-body old-scope new-scope own-body? _test)
+ (shift-function-body old-scope new-scope own-body? _then)
+ (shift-function-body old-scope new-scope own-body? _else))])
+
+ _
+ body
+ )))
+
+;; [[Record-Manipulation Optimizations]]
+
+;; If a pattern-matching tree with a single branch is found, and that
+;; branch corresponds to a tuple PM, and the body corresponds to a
+;; local variable, it's likely that the local refers to some member of
+;; the tuple that is being extracted.
+;; That is the pattern that is to be expected of record read-access,
+;; so this function tries to extract the (possibly nested) path
+;; necessary, ending in the data-node of the wanted member.
+(defn ^:private record-read-path
+ "(-> (List PM) Idx (List Idx))"
+ [pms member-idx]
+ (loop [current-idx 0
+ pms pms]
+ (|case pms
+ (&/$Nil)
+ &/$None
+
+ (&/$Cons _pm _pms)
+ (|case _pm
+ (&a-case/$NoTestAC)
+ (recur (inc current-idx)
+ _pms)
+
+ (&a-case/$StoreTestAC _register)
+ (if (= member-idx _register)
+ (&/|list (&/T [current-idx (&/|empty? _pms)]))
+ (recur (inc current-idx)
+ _pms))
+
+ (&a-case/$TupleTestAC _sub-tests)
+ (let [sub-path (record-read-path _sub-tests member-idx)]
+ (if (not (&/|empty? sub-path))
+ (&/$Cons (&/T [current-idx (&/|empty? _pms)]) sub-path)
+ (recur (inc current-idx)
+ _pms)
+ ))
+
+ _
+ (&/|list))
+ )))
+
+;; [[Loop Optimizations]]
+
+;; Lux doesn't offer any looping constructs, relying instead on
+;; recursion.
+;; Some common usages of recursion can be written more efficiently
+;; just using regular loops/iteration.
+;; This optimization looks for tail-calls in the function body,
+;; rewriting them as jumps to the beginning of the function, while
+;; they also updated the necessary local variables for the next iteration.
+(defn ^:private optimize-iter
+ "(-> Int Optimized Optimized)"
+ [arity optim]
+ (|let [[meta optim-] optim]
+ (|case optim-
+ ($apply [meta-0 ($var (&/$Local 0))] _args)
+ (if (= arity (&/|length _args))
+ (&/T [meta ($iter 1 _args)])
+ optim)
+
+ ($case _value [_pattern _bodies])
+ (&/T [meta ($case _value
+ (&/T [_pattern
+ (&/|map (partial optimize-iter arity)
+ _bodies)]))])
+
+ ($let _value _register _body)
+ (&/T [meta ($let _value _register (optimize-iter arity _body))])
+
+ ($if _test _then _else)
+ (&/T [meta ($if _test
+ (optimize-iter arity _then)
+ (optimize-iter arity _else))])
+
+ ($ann _value-expr _type-expr)
+ (&/T [meta ($ann (optimize-iter arity _value-expr) _type-expr)])
+
+ _
+ optim
+ )))
+
+(defn ^:private contains-self-reference?
+ "(-> Optimized Bool)"
+ [body]
+ (|let [[meta body-] body
+ stepwise-test (fn [base arg] (or base (contains-self-reference? arg)))]
+ (|case body-
+ ($variant idx is-last? value)
+ (contains-self-reference? value)
+
+ ($tuple elems)
+ (&/fold stepwise-test false elems)
+
+ ($case value [_pm _bodies])
+ (or (contains-self-reference? value)
+ (&/fold stepwise-test false _bodies))
+
+ ($function _ _ _ captured _)
+ (->> captured
+ (&/|map (fn [capture]
+ (|let [[_name [_meta ($captured _scope _idx _source)]] capture]
+ _source)))
+ (&/fold stepwise-test false))
+
+ ($ann value-expr type-expr)
+ (contains-self-reference? value-expr)
+
+ ($var (&/$Local 0))
+ true
+
+ ($apply func args)
+ (or (contains-self-reference? func)
+ (&/fold stepwise-test false args))
+
+ ($proc proc-ident args special-args)
+ (&/fold stepwise-test false args)
+
+ ($loop _register-offset _inits _body)
+ (or (&/fold stepwise-test false _inits)
+ (contains-self-reference? _body))
+
+ ($iter _ args)
+ (&/fold stepwise-test false args)
+
+ ($let _value _register _body)
+ (or (contains-self-reference? _value)
+ (contains-self-reference? _body))
+
+ ($record-get _value _path)
+ (contains-self-reference? _value)
+
+ ($if _test _then _else)
+ (or (contains-self-reference? _test)
+ (contains-self-reference? _then)
+ (contains-self-reference? _else))
+
+ _
+ false
+ )))
+
+(defn ^:private pm-loop-transform [register-offset direct? pattern]
+ (|case pattern
+ ($BindPM _var-id)
+ ($BindPM (+ register-offset (if direct?
+ (- _var-id 2)
+ (- _var-id 1))))
+
+ ($SeqPM _left-pm _right-pm)
+ ($SeqPM (pm-loop-transform register-offset direct? _left-pm)
+ (pm-loop-transform register-offset direct? _right-pm))
+
+ ($AltPM _left-pm _right-pm)
+ ($AltPM (pm-loop-transform register-offset direct? _left-pm)
+ (pm-loop-transform register-offset direct? _right-pm))
+
+ _
+ pattern
+ ))
+
+;; This function must be run STRICTLY before shift-function body, as
+;; the transformation assumes that SFB will be invoke after it.
+(defn ^:private loop-transform [register-offset direct? body]
+ (|let [adjust-direct (fn [register]
+ ;; The register must be decreased once, since
+ ;; it will be re-increased in
+ ;; shift-function-body.
+ ;; The decrease is meant to keep things stable.
+ (if direct?
+ ;; And, if this adjustment is done
+ ;; directly during a loop-transform (and
+ ;; not indirectly if transforming an inner
+ ;; loop), then it must be decreased again
+ ;; because the 0/self var will no longer
+ ;; exist in the loop's context.
+ (- register 2)
+ (- register 1)))
+ [meta body-] body]
+ (|case body-
+ ($variant idx is-last? value)
+ (&/T [meta ($variant idx is-last? (loop-transform register-offset direct? value))])
+
+ ($tuple elems)
+ (&/T [meta ($tuple (&/|map (partial loop-transform register-offset direct?) elems))])
+
+ ($case value [_pm _bodies])
+ (&/T [meta ($case (loop-transform register-offset direct? value)
+ (&/T [(pm-loop-transform register-offset direct? _pm)
+ (&/|map (partial loop-transform register-offset direct?)
+ _bodies)]))])
+
+ ;; Functions are ignored because they'll be handled properly at shift-function-body
+
+ ($ann value-expr type-expr)
+ (&/T [meta ($ann (loop-transform register-offset direct? value-expr)
+ type-expr)])
+
+ ($var (&/$Local idx))
+ ;; The index must be decreased once, because the var index is
+ ;; 1-based (since 0 is reserved for self-reference).
+ ;; Then it must be decreased again, since it will be increased
+ ;; in the shift-function-body call.
+ ;; Then, I add the offset to ensure the var points to the right register.
+ (&/T [meta ($var (&/$Local (-> (adjust-direct idx)
+ (+ register-offset))))])
+
+ ($apply func args)
+ (&/T [meta ($apply (loop-transform register-offset direct? func)
+ (&/|map (partial loop-transform register-offset direct?) args))])
+
+ ;; Captured-vars are ignored because they'll be handled properly at shift-function-body
+
+ ($proc proc-ident args special-args)
+ (&/T [meta ($proc proc-ident (&/|map (partial loop-transform register-offset direct?) args) special-args)])
+
+ ($loop _register-offset _inits _body)
+ (&/T [meta ($loop (+ register-offset (adjust-direct _register-offset))
+ (&/|map (partial loop-transform register-offset direct?) _inits)
+ (loop-transform register-offset direct? _body))])
+
+ ($iter _iter-register-offset args)
+ (&/T [meta ($iter (+ register-offset (adjust-direct _iter-register-offset))
+ (&/|map (partial loop-transform register-offset direct?) args))])
+
+ ($let _value _register _body)
+ (&/T [meta ($let (loop-transform register-offset direct? _value)
+ (+ register-offset (adjust-direct _register))
+ (loop-transform register-offset direct? _body))])
+
+ ($record-get _value _path)
+ (&/T [meta ($record-get (loop-transform register-offset direct? _value)
+ _path)])
+
+ ($if _test _then _else)
+ (&/T [meta ($if (loop-transform register-offset direct? _test)
+ (loop-transform register-offset direct? _then)
+ (loop-transform register-offset direct? _else))])
+
+ _
+ body
+ )))
+
+(defn ^:private inline-loop [meta register-offset scope captured args body]
+ (->> body
+ (loop-transform register-offset true)
+ (shift-function-body scope (&/|tail scope) true)
+ ($loop register-offset args)
+ (list meta)
+ (&/T)))
+
+;; [[Initial Optimization]]
+
+;; Before any big optimization can be done, the incoming Analysis nodes
+;; must be transformed into Optimized nodes, amenable to further transformations.
+;; This function does the job, while also detecting (and optimizing)
+;; some simple surface patterns it may encounter.
+(let [optimize-closure (fn [optimize closure]
+ (&/|map (fn [capture]
+ (|let [[_name _analysis] capture]
+ (&/T [_name (optimize _analysis)])))
+ closure))]
+ (defn ^:private pass-0
+ "(-> Bool Analysis Optimized)"
+ [top-level-func? analysis]
+ (|let [[meta analysis-] analysis]
+ (|case analysis-
+ (&a/$bool value)
+ (&/T [meta ($bool value)])
+
+ (&a/$nat value)
+ (&/T [meta ($nat value)])
+
+ (&a/$int value)
+ (&/T [meta ($int value)])
+
+ (&a/$frac value)
+ (&/T [meta ($frac value)])
+
+ (&a/$real value)
+ (&/T [meta ($real value)])
+
+ (&a/$char value)
+ (&/T [meta ($char value)])
+
+ (&a/$text value)
+ (&/T [meta ($text value)])
+
+ (&a/$variant idx is-last? value)
+ (&/T [meta ($variant idx is-last? (pass-0 top-level-func? value))])
+
+ (&a/$tuple elems)
+ (&/T [meta ($tuple (&/|map (partial pass-0 top-level-func?) elems))])
+
+ (&a/$apply func args)
+ (|let [=func (pass-0 top-level-func? func)
+ =args (&/|map (partial pass-0 top-level-func?) args)]
+ (|case =func
+ [_ ($ann [_ ($function _register-offset _arity _scope _captured _body)]
+ _)]
+ (if (and (= _arity (&/|length =args))
+ (not (contains-self-reference? _body)))
+ (inline-loop meta _register-offset _scope _captured =args _body)
+ (&/T [meta ($apply =func =args)]))
+
+ _
+ (&/T [meta ($apply =func =args)])))
+
+ (&a/$case value branches)
+ (let [normal-case-optim (fn []
+ (&/T [meta ($case (pass-0 top-level-func? value)
+ (optimize-pm (&/|map (fn [branch]
+ (|let [[_pattern _body] branch]
+ (&/T [_pattern (pass-0 top-level-func? _body)])))
+ branches)))]))]
+ (|case branches
+ ;; The pattern for a let-expression is a single branch,
+ ;; tying the value to a register.
+ (&/$Cons [(&a-case/$StoreTestAC _register) _body] (&/$Nil))
+ (&/T [meta ($let (pass-0 top-level-func? value) _register (pass-0 top-level-func? _body))])
+
+ (&/$Cons [(&a-case/$BoolTestAC false) _else]
+ (&/$Cons [(&a-case/$BoolTestAC true) _then]
+ (&/$Nil)))
+ (&/T [meta ($if (pass-0 top-level-func? value) (pass-0 top-level-func? _then) (pass-0 top-level-func? _else))])
+
+ ;; The pattern for a record-get is a single branch, with a
+ ;; tuple pattern and a body corresponding to a
+ ;; local-variable extracted from the tuple.
+ (&/$Cons [(&a-case/$TupleTestAC _sub-tests) [_ (&a/$var (&/$Local _member-idx))]] (&/$Nil))
+ (|let [_path (record-read-path _sub-tests _member-idx)]
+ (if (&/|empty? _path)
+ ;; If the path is empty, that means it was a
+ ;; false-positive and normal PM optimization should be
+ ;; done instead.
+ (normal-case-optim)
+ ;; Otherwise, we've got ourselves a record-get expression.
+ (&/T [meta ($record-get (pass-0 top-level-func? value) _path)])))
+
+ ;; If no special patterns are found, just do normal PM optimization.
+ _
+ (normal-case-optim)))
+
+ (&a/$lambda _register-offset scope captured body)
+ (|let [inner-func? (|case body
+ [_ (&a/$lambda _ _ _ _)]
+ true
+
+ _
+ false)]
+ (|case (pass-0 (not inner-func?) body)
+ ;; If the body of a function is another function, that means
+ ;; no work was done in-between and both layers can be folded
+ ;; into one.
+ [_ ($function _ _arity _scope _captured _body)]
+ (|let [new-arity (inc _arity)
+ collapsed-body (shift-function-body scope _scope true _body)]
+ (&/T [meta ($function _register-offset
+ new-arity
+ scope
+ (optimize-closure (partial pass-0 top-level-func?) captured)
+ (if top-level-func?
+ (optimize-iter new-arity collapsed-body)
+ collapsed-body))]))
+
+ ;; Otherwise, they're nothing to be done and we've got a
+ ;; 1-arity function.
+ =body
+ (&/T [meta ($function _register-offset
+ 1 scope
+ (optimize-closure (partial pass-0 top-level-func?) captured)
+ (if top-level-func?
+ (optimize-iter 1 =body)
+ =body))])))
+
+ (&a/$ann value-expr type-expr)
+ (&/T [meta ($ann (pass-0 top-level-func? value-expr) type-expr)])
+
+ (&a/$var var-kind)
+ (&/T [meta ($var var-kind)])
+
+ (&a/$captured scope idx source)
+ (&/T [meta ($captured scope idx (pass-0 top-level-func? source))])
+
+ (&a/$proc proc-ident args special-args)
+ (&/T [meta ($proc proc-ident (&/|map (partial pass-0 top-level-func?) args) special-args)])
+
+ _
+ (assert false (prn-str 'pass-0 top-level-func? (&/adt->text analysis)))
+ ))))
+
+;; [Exports]
+(defn optimize
+ "(-> Analysis Optimized)"
+ [analysis]
+ (->> analysis
+ (pass-0 true)))