diff options
Diffstat (limited to 'compiler/Pure.ml')
-rw-r--r-- | compiler/Pure.ml | 581 |
1 files changed, 581 insertions, 0 deletions
diff --git a/compiler/Pure.ml b/compiler/Pure.ml new file mode 100644 index 00000000..77265f75 --- /dev/null +++ b/compiler/Pure.ml @@ -0,0 +1,581 @@ +open Identifiers +open Names +module T = Types +module V = Values +module E = Expressions +module A = LlbcAst +module TypeDeclId = T.TypeDeclId +module TypeVarId = T.TypeVarId +module RegionGroupId = T.RegionGroupId +module VariantId = T.VariantId +module FieldId = T.FieldId +module SymbolicValueId = V.SymbolicValueId +module FunDeclId = A.FunDeclId +module GlobalDeclId = A.GlobalDeclId + +(** We give an identifier to every phase of the synthesis (forward, backward + for group of regions 0, etc.) *) +module SynthPhaseId = IdGen () + +(** Pay attention to the fact that we also define a {!Values.VarId} module in Values *) +module VarId = IdGen () + +type integer_type = T.integer_type [@@deriving show, ord] + +(** The assumed types for the pure AST. + + In comparison with LLBC: + - we removed [Box] (because it is translated as the identity: [Box T == T]) + - we added: + - [Result]: the type used in the error monad. This allows us to have a + unified treatment of expressions (especially when we have to unfold the + monadic binds) + - [State]: the type of the state, when using state-error monads. Note that + this state is opaque to Aeneas (the user can define it, or leave it as + assumed) + *) +type assumed_ty = State | Result | Vec | Option [@@deriving show, ord] + +(* TODO: we should never directly manipulate [Return] and [Fail], but rather + * the monadic functions [return] and [fail] (makes treatment of error and + * state-error monads more uniform) *) +let result_return_id = VariantId.of_int 0 +let result_fail_id = VariantId.of_int 1 +let option_some_id = T.option_some_id +let option_none_id = T.option_none_id + +type type_id = AdtId of TypeDeclId.id | Tuple | Assumed of assumed_ty +[@@deriving show, ord] + +(** Ancestor for iter visitor for [ty] *) +class ['self] iter_ty_base = + object (_self : 'self) + inherit [_] VisitorsRuntime.iter + method visit_id : 'env -> TypeVarId.id -> unit = fun _ _ -> () + method visit_type_id : 'env -> type_id -> unit = fun _ _ -> () + method visit_integer_type : 'env -> integer_type -> unit = fun _ _ -> () + end + +(** Ancestor for map visitor for [ty] *) +class ['self] map_ty_base = + object (_self : 'self) + inherit [_] VisitorsRuntime.map + method visit_id : 'env -> TypeVarId.id -> TypeVarId.id = fun _ id -> id + method visit_type_id : 'env -> type_id -> type_id = fun _ id -> id + + method visit_integer_type : 'env -> integer_type -> integer_type = + fun _ ity -> ity + end + +type ty = + | Adt of type_id * ty list + (** {!Adt} encodes ADTs and tuples and assumed types. + + TODO: what about the ended regions? (ADTs may be parameterized + with several region variables. When giving back an ADT value, we may + be able to only give back part of the ADT. We need a way to encode + such "partial" ADTs. + *) + | TypeVar of TypeVarId.id + | Bool + | Char + | Integer of integer_type + | Str + | Array of ty (* TODO: this should be an assumed type?... *) + | Slice of ty (* TODO: this should be an assumed type?... *) + | Arrow of ty * ty +[@@deriving + show, + visitors + { + name = "iter_ty"; + variety = "iter"; + ancestors = [ "iter_ty_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + polymorphic = false; + }, + visitors + { + name = "map_ty"; + variety = "map"; + ancestors = [ "map_ty_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + polymorphic = false; + }] + +type field = { field_name : string option; field_ty : ty } [@@deriving show] +type variant = { variant_name : string; fields : field list } [@@deriving show] + +type type_decl_kind = Struct of field list | Enum of variant list | Opaque +[@@deriving show] + +type type_var = T.type_var [@@deriving show] + +type type_decl = { + def_id : TypeDeclId.id; + name : name; + type_params : type_var list; + kind : type_decl_kind; +} +[@@deriving show] + +type scalar_value = V.scalar_value [@@deriving show] +type constant_value = V.constant_value [@@deriving show] + +(** Because we introduce a lot of temporary variables, the list of variables + is not fixed: we thus must carry all its information with the variable + itself. + *) +type var = { + id : VarId.id; + basename : string option; + (** The "basename" is used to generate a meaningful name for the variable + (by potentially adding an index to uniquely identify it). + *) + ty : ty; +} +[@@deriving show] + +(* TODO: we might want to redefine field_proj_kind here, to prevent field accesses + * on enumerations. + * Also: tuples... + * Rmk: projections are actually only used as meta-data. + * *) +type mprojection_elem = { pkind : E.field_proj_kind; field_id : FieldId.id } +[@@deriving show] + +type mprojection = mprojection_elem list [@@deriving show] + +(** "Meta" place. + + Meta-data retrieved from the symbolic execution, which gives provenance + information about the values. We use this to generate names for the variables + we introduce. + *) +type mplace = { + var_id : V.VarId.id; + name : string option; + projection : mprojection; +} +[@@deriving show] + +type variant_id = VariantId.id [@@deriving show] + +(** Ancestor for [iter_pat_var_or_dummy] visitor *) +class ['self] iter_value_base = + object (_self : 'self) + inherit [_] VisitorsRuntime.iter + method visit_constant_value : 'env -> constant_value -> unit = fun _ _ -> () + method visit_var : 'env -> var -> unit = fun _ _ -> () + method visit_mplace : 'env -> mplace -> unit = fun _ _ -> () + method visit_ty : 'env -> ty -> unit = fun _ _ -> () + method visit_variant_id : 'env -> variant_id -> unit = fun _ _ -> () + end + +(** Ancestor for [map_typed_rvalue] visitor *) +class ['self] map_value_base = + object (_self : 'self) + inherit [_] VisitorsRuntime.map + + method visit_constant_value : 'env -> constant_value -> constant_value = + fun _ x -> x + + method visit_var : 'env -> var -> var = fun _ x -> x + method visit_mplace : 'env -> mplace -> mplace = fun _ x -> x + method visit_ty : 'env -> ty -> ty = fun _ x -> x + method visit_variant_id : 'env -> variant_id -> variant_id = fun _ x -> x + end + +(** Ancestor for [reduce_typed_rvalue] visitor *) +class virtual ['self] reduce_value_base = + object (self : 'self) + inherit [_] VisitorsRuntime.reduce + + method visit_constant_value : 'env -> constant_value -> 'a = + fun _ _ -> self#zero + + method visit_var : 'env -> var -> 'a = fun _ _ -> self#zero + method visit_mplace : 'env -> mplace -> 'a = fun _ _ -> self#zero + method visit_ty : 'env -> ty -> 'a = fun _ _ -> self#zero + method visit_variant_id : 'env -> variant_id -> 'a = fun _ _ -> self#zero + end + +(** Ancestor for [mapreduce_typed_rvalue] visitor *) +class virtual ['self] mapreduce_value_base = + object (self : 'self) + inherit [_] VisitorsRuntime.mapreduce + + method visit_constant_value : 'env -> constant_value -> constant_value * 'a + = + fun _ x -> (x, self#zero) + + method visit_var : 'env -> var -> var * 'a = fun _ x -> (x, self#zero) + + method visit_mplace : 'env -> mplace -> mplace * 'a = + fun _ x -> (x, self#zero) + + method visit_ty : 'env -> ty -> ty * 'a = fun _ x -> (x, self#zero) + + method visit_variant_id : 'env -> variant_id -> variant_id * 'a = + fun _ x -> (x, self#zero) + end + +(** A pattern (which appears on the left of assignments, in matches, etc.). *) +type pattern = + | PatConcrete of constant_value + (** {!PatConcrete} is necessary because we merge the switches over integer + values and the matches over enumerations *) + | PatVar of var * mplace option + | PatDummy (** Ignored value: [_]. *) + | PatAdt of adt_pattern + +and adt_pattern = { + variant_id : variant_id option; + field_values : typed_pattern list; +} + +and typed_pattern = { value : pattern; ty : ty } +[@@deriving + show, + visitors + { + name = "iter_typed_pattern"; + variety = "iter"; + ancestors = [ "iter_value_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + polymorphic = false; + }, + visitors + { + name = "map_typed_pattern"; + variety = "map"; + ancestors = [ "map_value_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + polymorphic = false; + }, + visitors + { + name = "reduce_typed_pattern"; + variety = "reduce"; + ancestors = [ "reduce_value_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + polymorphic = false; + }, + visitors + { + name = "mapreduce_typed_pattern"; + variety = "mapreduce"; + ancestors = [ "mapreduce_value_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + polymorphic = false; + }] + +type unop = Not | Neg of integer_type | Cast of integer_type * integer_type +[@@deriving show, ord] + +type fun_id = + | Regular of A.fun_id * T.RegionGroupId.id option + (** Backward id: [Some] if the function is a backward function, [None] + if it is a forward function. + + TODO: we need to redefine A.fun_id here, to add [fail] and + [return] (important to get a unified treatment of the state-error + monad). For now, when using the state-error monad: extraction + works only if we unfold all the monadic let-bindings, and we + then replace the content of the occurrences of [Return] to also + return the state (which is really super ugly). + *) + | Unop of unop + | Binop of E.binop * integer_type +[@@deriving show, ord] + +(** An identifier for an ADT constructor *) +type adt_cons_id = { adt_id : type_id; variant_id : variant_id option } +[@@deriving show] + +(** Projection - For now we don't support projection of tuple fields + (because not all the backends have syntax for this). + *) +type projection = { adt_id : type_id; field_id : FieldId.id } [@@deriving show] + +type qualif_id = + | Func of fun_id + | Global of GlobalDeclId.id + | AdtCons of adt_cons_id (** A function or ADT constructor identifier *) + | Proj of projection (** Field projector *) +[@@deriving show] + +(** An instantiated qualified. + + Note that for now we have a clear separation between types and expressions, + which explains why we have the [type_params] field: a function or ADT + constructor is always fully instantiated. + *) +type qualif = { id : qualif_id; type_args : ty list } [@@deriving show] + +type var_id = VarId.id [@@deriving show] + +(** Ancestor for [iter_expression] visitor *) +class ['self] iter_expression_base = + object (_self : 'self) + inherit [_] iter_typed_pattern + method visit_integer_type : 'env -> integer_type -> unit = fun _ _ -> () + method visit_var_id : 'env -> var_id -> unit = fun _ _ -> () + method visit_qualif : 'env -> qualif -> unit = fun _ _ -> () + end + +(** Ancestor for [map_expression] visitor *) +class ['self] map_expression_base = + object (_self : 'self) + inherit [_] map_typed_pattern + + method visit_integer_type : 'env -> integer_type -> integer_type = + fun _ x -> x + + method visit_var_id : 'env -> var_id -> var_id = fun _ x -> x + method visit_qualif : 'env -> qualif -> qualif = fun _ x -> x + end + +(** Ancestor for [reduce_expression] visitor *) +class virtual ['self] reduce_expression_base = + object (self : 'self) + inherit [_] reduce_typed_pattern + + method visit_integer_type : 'env -> integer_type -> 'a = + fun _ _ -> self#zero + + method visit_var_id : 'env -> var_id -> 'a = fun _ _ -> self#zero + method visit_qualif : 'env -> qualif -> 'a = fun _ _ -> self#zero + end + +(** Ancestor for [mapreduce_expression] visitor *) +class virtual ['self] mapreduce_expression_base = + object (self : 'self) + inherit [_] mapreduce_typed_pattern + + method visit_integer_type : 'env -> integer_type -> integer_type * 'a = + fun _ x -> (x, self#zero) + + method visit_var_id : 'env -> var_id -> var_id * 'a = + fun _ x -> (x, self#zero) + + method visit_qualif : 'env -> qualif -> qualif * 'a = + fun _ x -> (x, self#zero) + end + +(** **Rk.:** here, {!expression} is not at all equivalent to the expressions + used in LLBC. They are lambda-calculus expressions, and are thus actually + more general than the LLBC statements, in a sense. + *) +type expression = + | Var of var_id (** a variable *) + | Const of constant_value + | App of texpression * texpression + (** Application of a function to an argument. + + The function calls are still quite structured. + Change that?... We might want to have a "normal" lambda calculus + app (with head and argument): this would allow us to replace some + field accesses with calls to projectors over fields (when there + are clashes of field names, some provers like F* get pretty bad...) + *) + | Abs of typed_pattern * texpression (** Lambda abstraction: [fun x -> e] *) + | Qualif of qualif (** A top-level qualifier *) + | Let of bool * typed_pattern * texpression * texpression + (** Let binding. + + TODO: the boolean should be replaced by an enum: sometimes we use + the error-monad, sometimes we use the state-error monad (and we + do this an a per-function basis! For instance, arithmetic functions + are always in the error monad). + + The boolean controls whether the let is monadic or not. + For instance, in F*: + - non-monadic: [let x = ... in ...] + - monadic: [x <-- ...; ...] + + Note that we are quite general for the left-value on purpose; this + is used in several situations: + + 1. When deconstructing a tuple: + {[ + let (x, y) = p in ... + ]} + (not all languages have syntax like [p.0], [p.1]... and it is more + readable anyway). + + 2. When expanding an enumeration with one variant. + In this case, {!Let} has to be understood as: + {[ + let Cons x tl = ls in + ... + ]} + + Note that later, depending on the language we extract to, we can + eventually update it to something like this (for F*, for instance): + {[ + let x = Cons?.v ls in + let tl = Cons?.tl ls in + ... + ]} + *) + | Switch of texpression * switch_body + | Meta of (meta[@opaque]) * texpression (** Meta-information *) + +and switch_body = If of texpression * texpression | Match of match_branch list +and match_branch = { pat : typed_pattern; branch : texpression } +and texpression = { e : expression; ty : ty } + +(** Meta-value (converted to an expression). It is important that the content + is opaque. + + TODO: is it possible to mark the whole mvalue type as opaque? + *) +and mvalue = (texpression[@opaque]) + +and meta = + | Assignment of mplace * mvalue * mplace option + (** Meta-information stored in the AST. + + The first mplace stores the destination. + The mvalue stores the value which is put in the destination + The second (optional) mplace stores the origin. + *) + | MPlace of mplace (** Meta-information about the origin of a value *) +[@@deriving + show, + visitors + { + name = "iter_expression"; + variety = "iter"; + ancestors = [ "iter_expression_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + }, + visitors + { + name = "map_expression"; + variety = "map"; + ancestors = [ "map_expression_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + concrete = true; + }, + visitors + { + name = "reduce_expression"; + variety = "reduce"; + ancestors = [ "reduce_expression_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + }, + visitors + { + name = "mapreduce_expression"; + variety = "mapreduce"; + ancestors = [ "mapreduce_expression_base" ]; + nude = true (* Don't inherit {!VisitorsRuntime.iter} *); + }] + +(** Information about the "effect" of a function *) +type fun_effect_info = { + input_state : bool; (** [true] if the function takes a state as input *) + output_state : bool; + (** [true] if the function outputs a state (it then lives + in a state monad) *) + can_fail : bool; (** [true] if the return type is a [result] *) +} + +(** Meta information about a function signature *) +type fun_sig_info = { + num_fwd_inputs : int; + (** The number of input types for forward computation *) + num_back_inputs : int option; + (** The number of additional inputs for the backward computation (if pertinent) *) + effect_info : fun_effect_info; +} + +(** A function signature. + + We have the following cases: + - forward function: + [in_ty0 -> ... -> in_tyn -> out_ty] (* pure function *) + `in_ty0 -> ... -> in_tyn -> result out_ty` (* error-monad *) + `in_ty0 -> ... -> in_tyn -> state -> result (state & out_ty)` (* state-error *) + - backward function: + `in_ty0 -> ... -> in_tyn -> back_in0 -> ... back_inm -> (back_out0 & ... & back_outp)` (* pure function *) + `in_ty0 -> ... -> in_tyn -> back_in0 -> ... back_inm -> + result (back_out0 & ... & back_outp)` (* error-monad *) + `in_ty0 -> ... -> in_tyn -> state -> back_in0 -> ... back_inm -> + result (back_out0 & ... & back_outp)` (* state-error *) + + Note that a backward function never returns (i.e., updates) a state: only + forward functions do so. Also, the state input parameter is *betwee* + the forward inputs and the backward inputs. + + The function's type should be given by `mk_arrows sig.inputs sig.output`. + We provide additional meta-information: + - we divide between forward inputs and backward inputs (i.e., inputs specific + to the forward functions, and additional inputs necessary if the signature is + for a backward function) + - we have booleans to give us the fact that the function takes a state as + input, or can fail, etc. without having to inspect the signature + - etc. + *) +type fun_sig = { + type_params : type_var list; + inputs : ty list; + output : ty; + doutputs : ty list; + (** The "decomposed" list of outputs. + + In case of a forward function, the list has length = 1, for the + type of the returned value. + + In case of backward function, the list contains all the types of + all the given back values (there is at most one type per forward + input argument). + + Ex.: + {[ + fn choose<'a, T>(b : bool, x : &'a mut T, y : &'a mut T) -> &'a mut T; + ]} + Decomposed outputs: + - forward function: [T] + - backward function: [T; T] (for "x" and "y") + + *) + info : fun_sig_info; (** Additional information *) +} + +(** An instantiated function signature. See {!fun_sig} *) +type inst_fun_sig = { + inputs : ty list; + output : ty; + doutputs : ty list; + info : fun_sig_info; +} + +type fun_body = { + inputs : var list; + inputs_lvs : typed_pattern list; + (** The inputs seen as patterns. Allows to make transformations, for example + to replace unused variables by [_] *) + body : texpression; +} + +type fun_decl = { + def_id : FunDeclId.id; + back_id : T.RegionGroupId.id option; + basename : fun_name; + (** The "base" name of the function. + + The base name is the original name of the Rust function. We add suffixes + (to identify the forward/backward functions) later. + *) + signature : fun_sig; + is_global_decl_body : bool; + body : fun_body option; +} |