(* opam update ; opam switch 4.04.2+ber-multicore *) (* Preliminaries: the partially-static string monoid from the second staging notebook http://ocamllabs.io/iocamljs/staging2-2018.html *) module type MONOID = sig type t val unit : t val (<*>) : t -> t -> t end type sta = Sta and dyn = Dyn and eseq = E: _ seq -> eseq and _ seq = Empty : _ seq | ConsS : string * dyn seq -> sta seq | ConsD : string code * _ seq -> dyn seq let consS : string -> eseq -> eseq = fun x xs -> match xs with | E Empty -> E (ConsS (x, Empty)) | E (ConsS (y, ys)) -> E (ConsS (x ^ y, ys)) | E (ConsD (_,_) as r) -> E (ConsS (x, r)) let consD : string code -> eseq -> eseq = fun x (E xs) -> E (ConsD (x, xs)) let rec (<*>) : eseq -> eseq -> eseq = fun (E l) r -> match l with | Empty -> r | ConsS (h, t) -> consS h (E t <*> r) | ConsD (h, t) -> consD h (E t <*> r) module PS_string = struct type ps = eseq module N = struct type t = ps let unit = Empty let (<*>) = (<*>) end let dyn d = consD d (E Empty) let sta s = consS s (E Empty) module Eva(O:MONOID) = struct let rec eva : (string code -> O.t) -> (string -> O.t) -> ps -> O.t = fun f g e -> match e with E Empty -> O.unit | E (ConsS (h, Empty)) -> g h | E (ConsD (h, Empty)) -> f h | E (ConsS (h, t)) -> O.(g h <*> eva f g (E t)) | E (ConsD (h, t)) -> O.(f h <*> eva f g (E t)) end end module Code_string_monoid = struct type t = string code let unit = .<"">. let (<*>) l r = .< .~l ^ .~r >. end let cd = let module E = PS_string.Eva(Code_string_monoid) in E.eva (fun d -> d) (fun s -> ..) (* Preliminaries: format strings *) type (_,_) fmt = | Int : (int -> 'a, 'a) fmt | Lit : string -> ('a, 'a) fmt | Bool : (bool -> 'a, 'a) fmt | Cat : ('a,'b) fmt * ('b,'c) fmt -> ('a,'c) fmt let (%) x y = Cat (x, y) (** Preliminaries: some auxiliary functions for parsing *) type 'a reader = string -> 'a * string let is_digit c = String.contains "0123456789" c let read_int : int reader = fun s -> let i = ref 0 in let slen = String.length s in while !i < slen && is_digit s.[!i] do incr i; done; match !i with 0 -> failwith "read_int" | n -> let int = String.sub s 0 !i in let rest = String.sub s !i (slen - !i) in (int_of_string int, rest) let read_exact : string -> unit reader = fun e s -> let slen = String.length s and elen = String.length e in if slen >= elen && e = String.sub s 0 elen then ((), String.sub s elen (slen - elen)) else failwith "read_exact" let read_bool : bool reader = fun s -> let slen = String.length s in if slen >= 5 && String.sub s 0 5 = "false" then (false, String.sub s 5 (slen - 5)) else if slen >= 4 && String.sub s 0 4 = "true" then (true, String.sub s 4 (slen - 4)) else failwith "read_bool" (* Metaprogramming and effects Much of the programming we've seen throughout the course has been compositional. Fold-based programs, of the sort we saw in the lambda calculus lectures are compositional. Functions defined with folds are defined by composing the results of processing sub-values. For example, here is an expression that calculates the length of a list 'lst' using a fold: fold_right (fun h len -> 1 + len) lst 0 Most of the work is done by the first argument of fold, which is a function that receives two values: the head h of each cons cell (which is ignored), and the *result of processing the tail*, 'len'. Since there is no opportunity for the function to examine the structure of the list, it must treat each cons cell uniformly. The 'trie' modules in the second exercise are compositional in a similar sense. Functors such as Trie_sum module Trie_sum (A: TRIE) (B: TRIE) = ... have no opportunity to examine the structure of the modules A and B passed as arguments. Instead, Trie_sum can use the components of A and B to construct new types and values in a uniform way. Simple generative metaprogramming is also compositional in this sense. The code generated by an expression is always a *composition* of the code generated by subexpressions, since MetaOCaml provides no means of inspecting generated code. In the following example f .. ↝ .< ... e ... >. the code built by the call to 'f' may contain 'e' as a literally-included subexpression but since, 'f' has no way of scrutinising .. to determine its structure, the structure of the generated code cannot vary depending on the structure of 'e'. These features --- generativity & compositionality --- support various useful safety guarantees in the language: for example, there is no way to violate variable scoping in generated code. However, they can also make it difficult to generate optimal code. One consequence of compositionality is that the structure of generated programs follows the structure of the generating code. More precisely, the structure of generated code tends to match the evaluation structure of the generating program, since code is generated by evaluating the generating program, leaving quoted subexpressions unevaluated. This replication of structure makes it easy to reason about generated code. For example, we can always be confident that code will not be arbitrarily reordered, changing its meaning. In the following example, we know that all the code generated by 'f' will always appear before all the code generated by 'g': .< (.~(f ()) ; .~(g ()) >. However, sometimes it is desirable to reorder code. For example, we can sometimes generate more efficient code if we can float expressions up to a higher point in the program, outside of loops and function bodies. Consider the following code template: *) let make_loop g = .< for i = 0 to 10 do print_endline .~(g ..) done >. (* If the code generated by 'g' includes an expression that does not depend on 'i' then it may be convenient to move the expression outside the loop: *) let loop1 = make_loop (fun _ -> .< "a" ^ "b" >.) (* Compositionality means that loop1 generates the following code: for i = 0 to 10 do print_endline ("a" ^ "b") done However it would be preferable to generate the more efficient code that performs the catenation a single time, outside the loop: let v = "a" ^ "b" in for i = 0 to 10 do print_endline v done However, if the code generated by 'g' does depend on 'i', then it is essential that it is not moved outside the loop: *) let loop2 = make_loop (fun i -> .< string_of_int .~i >.) (* This time it is not possible to float the generated expression outside the loop; we expect the following code: for i = 0 to 10 do print_endline (string_of_int i) done *) (* The examples above show that it is sometimes desirable to make decisions about where to insert code at a different point in the program than the place where the code is generated. (For example, ideally, 'make_loop' should not need to deal with moving code generated by 'g' outside the loop.) This sort of "code motion" transformation can be supported conveniently using algebraic effects. Below we will see various implementations of code-moving transformations implemented using effects. The most common form of code motion is 'let-insertion' --- i.e. inserting 'let' bindings high up in a generated program. In MetaOCaml the following interface supports let insertion. First, the GenLet effect makes it possible to pass pieces of code between a call to 'perform' and a handler: *) effect GenLet : 'a code -> 'a code (* The let_locus function is a handler for GenLet; it marks a place in the generated code where a let binding may later be inserted: *) let let_locus : (unit -> 'a code) -> 'a code = fun f -> match f () with | x -> x | effect (GenLet e) k -> .< let x = .~e in .~(continue k .< x >. )>. (* The genlet function communicates with let_locus to insert a binding, and returns the freshly bound variable: *) let genlet v = perform (GenLet v) (* Now we can use 'genlet' and 'let_locus' to move code outside of the template in 'make_loop': *) let loop1 = let_locus @@ fun () -> make_loop (fun _ -> genlet .< "a" ^ "b" >.) (* Here is the generated code: let x = "a" ^ "b" in for i = 0 to 10 do print_endline x done *) (* A second effect, 'if-insertion' supports the generation of branches high up in a program. With if-insertion a program can do something that initially appears to be impossible --- statically examine the value of a dynamic variable or expression. Here is an effect, 'Split', that can be used to define if-insertion: *) effect Split : bool code -> bool let split b = perform (Split b) (* The type of Split gives a clue to its behaviour: when performing a Split effect, the caller provides a dynamic 'bool code' value; in return it receives a static 'bool' value! Here is a handler for 'Split', 'if_locus': *) let if_locus f = match f () with | x -> x | effect (Split b) k -> .< if .~b then .~(continue k true) else .~(continue (Obj.clone_continuation k) false) >. ;; (* This time the case for the effect inserts an 'if' expression and runs the continuation twice, once in each branch. (By default, continuations can only be run once; we must call clone_continuation if we want to run a continuation a second time.) In the first branch we know that the condition is true, and we are therefore justified in passing the static value 'true' back to the caller of 'split'. Similarly, in the second branch we can pass a static 'false'. *) (* Here is a simple example: *) let fn = .< fun b -> .~(if_locus @@ fun () -> let s = string_of_bool (split ..) in ..) >.;; (* Although the value of 'b' is not known during code generation, calling 'split' inserts an 'if' into the generated code and returns a static 'true' or 'false' that can be passed to string_of_bool. Here is the code generated for 'fn: fun b -> if b then "true" else "false" Observe that the work of turning the bool into a string has been performed during code generation. *) (* Improving printf with 'let' & 'if' insertion We can use let-insertion and if-insertion to improve the output of the staged printf function (http://ocamllabs.io/iocamljs/staging2-2018.html). In particular, extending the example above, we can avoid run-time conversion of booleans to strings, instead performing the conversion at code generation time. Furthermore, the results of converting static booleans to strings are also static, and can be coalesced with other static values. Here is an improved staged printf function that uses let- and if-insertion together with partially-static data to generate efficient code *) let rec printk4 : type a r. (a, r) fmt -> (PS_string.ps -> r code) -> a code = let open PS_string in fun fmt k -> match fmt with | Int -> genlet .< fun i -> .~(k (dyn ..)) >. | Bool -> genlet .< fun b -> .~(if_locus (fun () -> k (sta (string_of_bool (split ..))))) >. | Lit s -> k (sta s) | Cat (l, r) -> printk4 l (fun x -> printk4 r (fun y -> k (x <*> y))) (* The top-level sprintf function inserts a handler with let_locus to mark the place where let bindings can be inserted *) let sprintf4 fmt = let_locus @@ fun () -> printk4 fmt cd ;; (* Now the example call to sprintf generates code with *no* catenations: *) let printf_example = sprintf4 (Bool % Lit "," % Lit " " % Bool);; (* Here is the generated code: let f b = if b then "(false,true)" else "(false,false)" in let g b = if b then "(true,true)" else "(true,false)" in let h b = if b then g else f in h *) (* This approach can be used to implement a wide variety of code-moving transformations. We consider one final example here: a 'pair-splitting' variant of let-insertion that converts a dynamic expression of pair type into a static pair of dynamic expressions. Here is the effect, Genlet2. Observe that the the argument is a single code value of pair type, while the return value is a pair of values of code type: *) effect Genlet2 : ('a * 'b) code -> ('a code * 'b code) let genlet2 p = perform (Genlet2 p) (* And here is the handler for Genlet2 that, like let_locus, inserts a let binding. The difference here is that the left-hand side of the let binding matches the two components of the pair; the two bound variables are passed individually to the continuation: *) let let2_locus f = match f () with | x -> x | effect (Genlet2 p) k -> .< let (x,y) = .~p in .~(continue k (..,..))>. ;; (* We'll use genlet2 to implement a staged version of scanf (the string-parsing counterpart to the string-printing function printf) that generates efficient code. First, here is an unstaged implementation of the 'sscanf' function, which reads from a string as directed by a format specification. We can reuse the 'fmt' type used for printf. The 'sscanfk' function does most of the work: it takes a format specification 'fmt' a string 's' and a continuation function 'k', and returns a pair of the result and the remainder of the string: *) let rec sscanfk : type a r. (a, r) fmt -> string -> a -> r * string = fun fmt s k -> match fmt with | Int -> let i, s = read_int s in (k i, s) | Lit l -> let (), s = read_exact l s in (k, s) | Bool -> let i, s = read_bool s in (k i, s) | Cat (l, r) -> let k1, s = sscanfk l s k in let k2, s = sscanfk r s k1 in (k2, s) (* The top-level function sscanf simply discards the second part of the return value of sscanfk: *) let sscanf : 'a 'b. ('a, 'b) fmt -> string -> 'a -> 'b = fun f s k -> fst (sscanfk f s k) ;; (* Here is an example of 'sscanf' in action, using a format specification that reads a boolean, a two-character literal string and a second boolean. The second argument is the string to parse. The final argument is a function that receives both boolean results and constructs the result of the whole call: *) let sscanf_example = sscanf (Bool % Lit ", " % Bool) "true, false" (fun x y -> (x,y)) ;; (* Here is the result of staging sscanf using the idealized staging process. As with printf, format strings are treated as static; everything else is dynamic: *) let rec sscanfk2 : type a r. (a, r) fmt -> string code -> a code -> (r * string) code = fun fmt s k -> match fmt with | Int -> .< let i, s = read_int .~s in (.~k i, s) >. | Lit l -> .< let (), s = read_exact l .~s in (.~k, s) >. | Bool -> .< let i, s = read_bool .~s in (.~k i, s) >. | Cat (l, r) -> .< let k1, s = .~(sscanfk2 l s k) in let k2, s = .~(sscanfk2 r .. ..) in (k2, s) >. let sscanf2 : 'a 'b. ('a, 'b) fmt -> (string -> 'a -> 'b) code = fun f -> .< fun s k -> fst .~(sscanfk2 f .. ..) >. ;; (* Here is a call to the staged function sscanf2, which accepts a format specification and generates code to parse according to the specification: *) let sscanf2_example = sscanf2 (Bool % Lit ", " % Bool) (* The code generated by sscanf2 is less than optimal: Here is the generated code for the example above: fun s1 k1 -> fst (let (k2,s2) = let (k3,s3) = let (i1,s4) = read_bool s1 in (k1 i1, s4) in let (k4,s5) = let ((),s5) = read_exact ", " s3 in (k3, s5) in (k4, s5) in let (k5,s6) = let (i2,s7) = read_bool s2 in (k2 i2, s7) in (k5, s6)) The cause of inefficiency is the large number of "administrative" let bindings, which serve no useful purpose in the output; they are simply an artifact of the structure of the generating program. We can remove these administrative bindings using 'genlet2'. Here is an improved staged sscanf function using 'genlet2'. Observe that the let bindings themselves are no longer quoted, and that the two components of the pair are passed around as separate code values (as shown in the return type of sscanfk3): *) let rec sscanfk3 : type a r. (a, r) fmt -> string code -> a code -> r code * string code = fun fmt s k -> match fmt with | Int -> let i, s = genlet2 .. in (.<.~k .~i>., s) | Lit l -> let _, s = genlet2 .. in (k, s) | Bool -> let i, s = genlet2 .. in (.<.~k .~i>., s) | Cat (l, r) -> let k1, s = sscanfk3 l s k in let k3, s = sscanfk3 r s k1 in (k3, s) (* As with sprintf4, there is a single top-level handler let2_locus inserted at the entry point, sscanf3: *) let sscanf3 : 'a 'b. ('a, 'b) fmt -> (string -> 'a -> 'b) code = fun f -> .< fun s k -> .~(let2_locus @@ fun () -> fst (sscanfk3 f .. ..)) >. ;; (* Here is the example call to sscanf3: *) let sscanf3_example = sscanf3 (Bool % Lit ", " % Bool) (* And here is the code generated for the example call. It is significantly simpler (and probably more efficient) than the code generated by sscanf2, since it contains no administrative bindings: fun s k -> let (x1 ,s1) = read_bool s let (x2 ,s2) = read_exact "," s1 in let (x3 ,s3) = read_bool s2 k x 2 *)