1 files changed, 155 insertions, 9 deletions
diff --git a/src/lux/optimizer.clj b/src/lux/optimizer.clj
index 933849be3..e7af21664 100644
--- a/src/lux/optimizer.clj
+++ b/src/lux/optimizer.clj
@@ -9,6 +9,7 @@
 
 ;; [Tags]
 (defvariant
+  ;; These tags just have a one-to-one correspondence with Analysis data-structures.
   ("bool" 1)
   ("nat" 1)
   ("int" 1)
@@ -25,29 +26,80 @@
   ("captured" 3)
   ("proc" 3)
 
-  ;; Purely for optimizations
+  ;; These other tags represent higher-order constructs that manifest
+  ;; themselves as patterns in the code.
+  ;; Lux doesn't formally provide these features, but some macros
+  ;; expose ways to implement them in terms of the other (primitive)
+  ;; features.
+  ;; The optimizer looks for those usage patterns and transforms them
+  ;; into explicit constructs, which are then subject to specialized optimizations.
+
+  ;; This is a loop, as expected in imperative programming.
   ("loop" 1)
+  ;; This is a simple let-expression, as opposed to the more general pattern-matching.
   ("let" 3)
+  ;; This is an access to a record's member. It can be multiple level:
+  ;; e.g. record.l1.l2.l3
+  ;; The record-get token stores the path, for simpler compilation.
   ("record-get" 2)
   )
 
-;; For pattern-matching
+;; [Utils]
+
+;; [[Pattern-Matching Traversal Optimization]]
+
+;; This represents an alternative way to view pattern-matching.
+;; The PM that Lux provides has declarative semantics, with the user
+;; specifying how his data is shaped, but not how to traverse it.
+;; The optimizer's PM is operational in nature, and relies on
+;; specifying a path of traversal, with a variety of operations that
+;; can be done along the way.
+;; The algorithm relies on looking at pattern-matching as traversing a
+;; (possibly) branching path, where each step along the path
+;; corresponds to a value, the ends of the path are the jumping-off
+;; points for the bodies of branches, and branching decisions can be
+;; backtracked, if they don't result in a valid jump.
 (defvariant
+  ;; Throw away the current data-node (CDN). It's useless.
   ("PopPM" 0)
+  ;; Store the CDN in a register.
   ("BindPM" 1)
+  ;; Compare the CDN with a boolean value.
   ("BoolPM" 1)
+  ;; Compare the CDN with a natural value.
   ("NatPM" 1)
+  ;; Compare the CDN with an integer value.
   ("IntPM" 1)
+  ;; Compare the CDN with a real value.
   ("RealPM" 1)
+  ;; Compare the CDN with a character value.
   ("CharPM" 1)
+  ;; Compare the CDN with a text value.
   ("TextPM" 1)
+  ;; Compare the CDN with a variant value. If valid, proceed to test
+  ;; the variant's inner value.
   ("VariantPM" 1)
+  ;; Access a tuple value at a given index, for further examination.
   ("TuplePM" 1)
+  ;; Creates an instance of the backtracking info, as a preparatory
+  ;; step to exploring one of the branching paths.
   ("AltPM" 2)
+  ;; Allows to test the CDN, while keeping a copy of it for more
+  ;; tasting later on.
+  ;; If necessary when doing multiple tests on a single value, like
+  ;; when testing multiple parts of a tuple.
   ("SeqPM" 2)
+  ;; This is the jumping-off point for the PM part, where the PM
+  ;; data-structure is thrown away and the program jumps to the
+  ;; branch's body.
   ("ExecPM" 1))
 
-;; [Utils]
+;; This function does a simple transformation from the declarative
+;; model of PM of the analyser, to the operational model of PM of the
+;; optimizer.
+;; You may notice that all branches end in PopPM.
+;; The reason is that testing does not immediately imply throwing away
+;; the data to be tested, which is why a popping step must immediately follow.
 (defn ^:private transform-pm* [test]
   (|case test
     (&a-case/$NoTestAC)
@@ -90,12 +142,22 @@
 
     (&a-case/$TupleTestAC _sub-tests)
     (|case _sub-tests
+      ;; An empty tuple corresponds to unit, which can't be tested in
+      ;; any meaningful way, so it's just popped.
       (&/$Nil)
       (&/|list $PopPM)
 
+      ;; A tuple of a single element is equivalent to the element
+      ;; itself, to the element's PM is generated.
       (&/$Cons _only-test (&/$Nil))
       (transform-pm* _only-test)
 
+      ;; Single tuple PM features the tests of each tuple member
+      ;; inlined, it's operational equivalent is interleaving the
+      ;; access to each tuple member, followed by the testing of said
+      ;; member.
+      ;; That is way each sequence of access+subtesting gets generated
+      ;; and later they all get concatenated.
       _
       (|let [tuple-size (&/|length _sub-tests)]
         (&/|++ (&/flat-map (fn [idx+test*]
@@ -108,6 +170,15 @@
                                    _sub-tests))
                (&/|list $PopPM))))))
 
+;; It will be common for pattern-matching on a very nested
+;; data-structure to require popping all the intermediate
+;; data-structures that were visited once it's all done.
+;; However, the PM infrastructure employs a single data-stack to keep
+;; all data nodes in the trajectory, and that data-stack can just be
+;; thrown again entirely, in just one step.
+;; Because of that, any ending POPs prior to throwing away the
+;; data-stack would be completely useless.
+;; This function cleans them all up, to avoid wasteful computation later.
 (defn ^:private clean-unnecessary-pops [steps]
   (|case steps
     (&/$Cons ($PopPM) _steps)
@@ -116,11 +187,19 @@
     _
     steps))
 
+;; This transforms a single branch of a PM tree into it's operational
+;; equivalent, while also associating the PM of the branch with the
+;; jump to the branch's body.
 (defn ^:private transform-pm [test body-id]
   (&/fold (fn [right left] ($SeqPM left right))
           ($ExecPM body-id)
           (clean-unnecessary-pops (&/|reverse (transform-pm* test)))))
 
+;; This function fuses together the paths of the PM traversal, adding
+;; branching AltPMs where necessary, and fusing similar paths together
+;; as much as possible, when early parts of them coincide.
+;; The goal is to minimize rework as much as possible by sharing as
+;; much of each path as possible.
 (defn ^:private fuse-pms [pre post]
   (|case (&/T [pre post])
     [($PopPM) ($PopPM)]
@@ -193,6 +272,8 @@
     ($AltPM pre post)
     ))
 
+;; This is the top-level function for optimizing PM, which transforms
+;; each branch and then fuses them together.
 (defn ^:private optimize-pm [branches]
   (|let [;; branches (&/|reverse branches*)
          bodies (&/|map &/|second branches)
@@ -211,6 +292,36 @@
             bodies])
       )))
 
+;; [[Function-Folding Optimization]]
+
+;; The semantics of Lux establish that all functions are of a single
+;; argument and the multi-argument functions are actually nested
+;; functions being generated and then applied.
+;; This, of course, would generate a lot of waste.
+;; To avoid it, Lux actually folds function definitions together,
+;; thereby creating functions that can be used both
+;; one-argument-at-a-time, and also being called with all, or just a
+;; partial amount of their arguments.
+;; This avoids generating too many artifacts during compilation, since
+;; they get "compressed", and it can also lead to faster execution, by
+;; enabling optimized function calls later.
+
+;; Functions and captured variables have "scopes", which tell which
+;; function they are, or to which function they belong.
+;; During the folding, inner functions dissapear, since their bodies
+;; are merged into their outer "parent" functions.
+;; Their scopes must change accordingy.
+(defn ^:private de-scope [old-scope new-scope scope]
+  "(-> Scope Scope Scope Scope)"
+  (if (identical? new-scope scope)
+    old-scope
+    scope))
+
+;; Also, it must be noted that when folding functions, the indexes of
+;; the registers have to be changed accodingly.
+;; That is what the following "shifting" functions are for.
+
+;; Shifts the registers for PM operations.
 (defn ^:private shift-pattern [pattern]
   (|case pattern
     ($BindPM _var-id)
@@ -226,12 +337,7 @@
     pattern
     ))
 
-(defn ^:private de-scope [old-scope new-scope scope]
-  "(-> Scope Scope Scope Scope)"
-  (if (identical? new-scope scope)
-    old-scope
-    scope))
-
+;; Shifts the body of a function after a folding is performed.
 (defn shift-function-body [old-scope new-scope own-body? body]
   "(-> Scope Scope Bool Optimized Optimized)"
   (|let [[meta body-] body]
@@ -278,6 +384,7 @@
           body)
         body)
 
+      ;; This special "apply" rule is for handling better recursive calls.
       ($apply [meta-0 ($var (&/$Local 0))] args)
       (if own-body?
         (&/T [meta ($apply (&/T [meta-0 ($var (&/$Local 0))])
@@ -321,6 +428,15 @@
       body
       )))
 
+;; [[Record-Manipulation Optimizations]]
+
+;; If a pattern-matching tree with a single branch is found, and that
+;; branch corresponds to a tuple PM, and the body corresponds to a
+;; local variable, it's likely that the local refers to some member of
+;; the tuple that is being extracted.
+;; That is the pattern that is to be expected of record read-access,
+;; so this function tries to extract the (possibly nested) path
+;; necessary, ending in the data-node of the wanted member.
 (defn ^:private record-read-path [pms member-idx]
   "(-> (List PM) Idx (List Idx))"
   (loop [current-idx 0
@@ -353,6 +469,15 @@
         (&/|list))
       )))
 
+;; [[Loop Optimizations]]
+
+;; Lux doesn't offer any looping constructs, relying instead on
+;; recursion.
+;; Some common usages of recursion can be written more efficiently
+;; just using regular loops/iteration.
+;; This optimization looks for tail-calls in the function body,
+;; rewriting them as jumps to the beginning of the function, while
+;; they also updated the necessary local variables for the next iteration.
 (defn ^:private optimize-loop [arity optim]
   "(-> Int Optimized Optimized)"
   (|let [[meta optim-] optim]
@@ -378,6 +503,12 @@
       optim
       )))
 
+;; [[Initial Optimization]]
+
+;; Before any big optimization can be done, the incoming Analysis nodes
+;; must be transformed into Optimized nodes, amenable to further transformations.
+;; This function does the job, while also detecting (and optimizing)
+;; some simple surface patterns it may encounter.
 (let [optimize-closure (fn [optimize closure]
                          (&/|map (fn [capture]
                                    (|let [[_name _analysis] capture]
@@ -416,19 +547,29 @@
         
         (&a/$case value branches)
         (|case branches
+          ;; The pattern for a let-expression is a single branch,
+          ;; tying the value to a register.
           (&/$Cons [(&a-case/$StoreTestAC _register) _body] (&/$Nil))
           (&/T [meta ($let (pass-0 value) _register (pass-0 _body))])
 
+          ;; The pattern for a record-get is a single branch, with a
+          ;; tuple pattern and a body corresponding to a
+          ;; local-variable extracted from the tuple.
           (&/$Cons [(&a-case/$TupleTestAC _sub-tests) [_ (&a/$var (&/$Local _member-idx))]] (&/$Nil))
           (|let [_path (record-read-path _sub-tests _member-idx)]
             (if (&/|empty? _path)
+              ;; If the path is empty, that means it was a
+              ;; false-positive and normal PM optimization should be
+              ;; done instead.
               (&/T [meta ($case (pass-0 value)
                                 (optimize-pm (&/|map (fn [branch]
                                                        (|let [[_pattern _body] branch]
                                                          (&/T [_pattern (pass-0 _body)])))
                                                      branches)))])
+              ;; Otherwise, we've got ourselves a record-get expression.
               (&/T [meta ($record-get (pass-0 value) _path)])))
 
+          ;; If no special patterns are found, just do normal PM optimization.
           _
           (&/T [meta ($case (pass-0 value)
                             (optimize-pm (&/|map (fn [branch]
@@ -438,9 +579,14 @@
         
         (&a/$lambda scope captured body)
         (|case (pass-0 body)
+          ;; If the body of a function is another function, that means
+          ;; no work was done in-between and both layers can be folded
+          ;; into one.
           [_ ($function _arity _scope _captured _body)]
           (&/T [meta ($function (inc _arity) scope (optimize-closure pass-0 captured) (shift-function-body scope _scope true _body))])
 
+          ;; Otherwise, they're nothing to be done and we've got a
+          ;; 1-arity function.
           =body
           (&/T [meta ($function 1 scope (optimize-closure pass-0 captured) =body)]))