(* opam update ; opam switch 4.04.2+ber-multicore *)

(* Preliminaries: the partially-static string monoid from the
   second staging notebook
   http://ocamllabs.io/iocamljs/staging2-2018.html
*)
module type MONOID = sig
  type t
  val unit : t
  val (<*>) : t -> t -> t
end

type sta = Sta and dyn = Dyn and eseq = E: _ seq -> eseq
and _ seq =
    Empty : _ seq
  | ConsS : string      * dyn seq -> sta seq
  | ConsD : string code *  _  seq -> dyn seq

let consS : string -> eseq -> eseq =
  fun x xs -> match xs with
    | E Empty -> E (ConsS (x, Empty))
    | E (ConsS (y, ys)) -> E (ConsS (x ^ y, ys))
    | E (ConsD (_,_) as r) -> E (ConsS (x, r))

let consD : string code -> eseq -> eseq =
  fun x (E xs) -> E (ConsD (x, xs))

let rec (<*>) : eseq -> eseq -> eseq =
  fun (E l) r -> match l with
    | Empty -> r
    | ConsS (h, t) -> consS h (E t <*> r)
    | ConsD (h, t) -> consD h (E t <*> r)

module PS_string =
struct
  type ps = eseq
  module N = struct
    type t = ps
    let unit = Empty
    let (<*>) = (<*>)
  end
   let dyn d = consD d (E Empty)
   let sta s = consS s (E Empty)
   module Eva(O:MONOID) = struct
     let rec eva : (string code -> O.t) -> (string -> O.t) -> ps -> O.t
       = fun f g e  -> match e with
           E Empty -> O.unit
         | E (ConsS (h, Empty)) -> g h
         | E (ConsD (h, Empty)) -> f h
         | E (ConsS (h, t)) -> O.(g h <*> eva f g (E t))
         | E (ConsD (h, t)) -> O.(f h <*> eva f g (E t))
   end
end
module Code_string_monoid = struct
  type t = string code
  let unit = .<"">.
  let (<*>) l r = .< .~l ^ .~r >.
end

let cd = let module E = PS_string.Eva(Code_string_monoid) in 
  E.eva (fun d -> d) (fun s -> .<s>.)

(* Preliminaries: format strings *)
type (_,_) fmt =
  | Int : (int -> 'a, 'a) fmt
  | Lit : string -> ('a, 'a) fmt
  | Bool : (bool -> 'a, 'a) fmt
  | Cat : ('a,'b) fmt * ('b,'c) fmt -> ('a,'c) fmt
let (%) x y = Cat (x, y)

(** Preliminaries: some auxiliary functions for parsing *)
type 'a reader = string -> 'a * string

let is_digit c = String.contains "0123456789" c

let read_int : int reader =
  fun s ->
    let i = ref 0 in
    let slen = String.length s in
    while !i < slen && is_digit s.[!i] do incr i; done;
    match !i with
      0 -> failwith "read_int"
    | n ->
       let int = String.sub s 0 !i in
       let rest = String.sub s !i (slen - !i) in
       (int_of_string int, rest)

let read_exact : string -> unit reader =
  fun e s ->
    let slen = String.length s
    and elen = String.length e in
    if slen >= elen && e = String.sub s 0 elen then
      ((), String.sub s elen (slen - elen))
    else failwith "read_exact" 

let read_bool : bool reader =
  fun s ->
    let slen = String.length s in
    if slen >= 5 && String.sub s 0 5 = "false"
    then (false, String.sub s 5 (slen - 5))
    else if slen >= 4 && String.sub s 0 4 = "true"
    then (true, String.sub s 4 (slen - 4))
    else failwith "read_bool"

(* Metaprogramming and effects

   Much of the programming we've seen throughout the course has been
   compositional.

   Fold-based programs, of the sort we saw in the lambda calculus
   lectures are compositional.  Functions defined with folds are
   defined by composing the results of processing sub-values.  For
   example, here is an expression that calculates the length of a list
   'lst' using a fold:

      fold_right (fun h len -> 1 + len) lst 0

   Most of the work is done by the first argument of fold, which is a
   function that receives two values: the head h of each cons cell
   (which is ignored), and the *result of processing the tail*, 'len'.
   Since there is no opportunity for the function to examine the
   structure of the list, it must treat each cons cell uniformly.

   The 'trie' modules in the second exercise are compositional in a
   similar sense.  Functors such as Trie_sum

       module Trie_sum (A: TRIE) (B: TRIE) = ...

   have no opportunity to examine the structure of the modules A and B
   passed as arguments.  Instead, Trie_sum can use the components of A
   and B to construct new types and values in a uniform way.

   Simple generative metaprogramming is also compositional in this
   sense.

   The code generated by an expression is always a *composition* of
   the code generated by subexpressions, since MetaOCaml provides no
   means of inspecting generated code.  In the following example

          f .<e>. ↝  .< ... e ... >.

   the code built by the call to 'f' may contain 'e' as a
   literally-included subexpression but since, 'f' has no way of
   scrutinising .<e>. to determine its structure, the structure of the
   generated code cannot vary depending on the structure of 'e'.

   These features --- generativity & compositionality --- support
   various useful safety guarantees in the language: for example,
   there is no way to violate variable scoping in generated code.
   However, they can also make it difficult to generate optimal code.

   One consequence of compositionality is that the structure of
   generated programs follows the structure of the generating code.
   More precisely, the structure of generated code tends to match the
   evaluation structure of the generating program, since code is
   generated by evaluating the generating program, leaving quoted
   subexpressions unevaluated.

   This replication of structure makes it easy to reason about
   generated code.  For example, we can always be confident that code
   will not be arbitrarily reordered, changing its meaning.  In the
   following example, we know that all the code generated by 'f' will
   always appear before all the code generated by 'g':

        .< (.~(f ()) ; .~(g ()) >.

   However, sometimes it is desirable to reorder code.  For example,
   we can sometimes generate more efficient code if we can float
   expressions up to a higher point in the program, outside of loops
   and function bodies.  Consider the following code template:
*)

let make_loop g =
  .< for i = 0 to 10 do print_endline .~(g .<i>.) done >.

(* If the code generated by 'g' includes an expression that does not
   depend on 'i' then it may be convenient to move the expression
   outside the loop:
*)
let loop1 = make_loop (fun _ -> .< "a" ^ "b" >.)
(* Compositionality means that loop1 generates the following code:

     for i = 0 to 10 do print_endline ("a" ^ "b") done

   However it would be preferable to generate the more efficient code that
   performs the catenation a single time, outside the loop:

     let v = "a" ^ "b" in
     for i = 0 to 10 do print_endline v done

   However, if the code generated by 'g' does depend on 'i', then
   it is essential that it is not moved outside the loop:
*)
let loop2 = make_loop (fun i -> .< string_of_int .~i >.)

(* This time it is not possible to float the generated expression
   outside the loop; we expect the following code:

     for i = 0 to 10 do print_endline (string_of_int i) done
*)


(* The examples above show that it is sometimes desirable to make
   decisions about where to insert code at a different point in the
   program than the place where the code is generated.  (For example,
   ideally, 'make_loop' should not need to deal with moving code 
   generated by 'g' outside the loop.)

   This sort of "code motion" transformation can be supported
   conveniently using algebraic effects.  Below we will see various
   implementations of code-moving transformations implemented using
   effects.

   The most common form of code motion is 'let-insertion' --- i.e.
   inserting 'let' bindings high up in a generated program.  In
   MetaOCaml the following interface supports let insertion.  First,
   the GenLet effect makes it possible to pass pieces of code between
   a call to 'perform' and a handler: 
*)
effect GenLet : 'a code -> 'a code
(*
  The let_locus function is a handler for GenLet; it marks a place in the
  generated code where a let binding may later be inserted:
*)
let let_locus : (unit -> 'a code) -> 'a code =
  fun f -> match f () with
  | x -> x
  | effect (GenLet e) k ->
    .< let x = .~e in .~(continue k  .< x >. )>.

(*
   The genlet function communicates with let_locus to insert
   a binding, and returns the freshly bound variable:
*)
let genlet v = perform (GenLet v)

(* Now we can use 'genlet' and 'let_locus' to move 
   code outside of the template in 'make_loop':
*)
let loop1 = let_locus @@ fun () ->
            make_loop (fun _ -> genlet .< "a" ^ "b" >.)
(* 
   Here is the generated code:

     let x = "a" ^ "b"  in
     for i = 0 to 10 do print_endline x done
*)

(*

   A second effect, 'if-insertion' supports the generation of branches
   high up in a program.  With if-insertion a program can do something
   that initially appears to be impossible --- statically examine the
   value of a dynamic variable or expression.

   Here is an effect, 'Split', that can be used to define if-insertion:
*)
effect Split : bool code -> bool
let split b = perform (Split b)
(*
   The type of Split gives a clue to its behaviour: when performing a Split
   effect, the caller provides a dynamic 'bool code' value; in return it
   receives a static 'bool' value!

   Here is a handler for 'Split', 'if_locus':
*)
let if_locus f = match f () with
  | x -> x
  | effect (Split b) k ->
    .< if .~b then .~(continue k true)
       else .~(continue (Obj.clone_continuation k) false) >.
;;
(* 
   This time the case for the effect inserts an 'if' expression and runs 
   the continuation twice, once in each branch.  (By default, continuations
   can only be run once; we must call clone_continuation if we want to run
   a continuation a second time.)  In the first branch we know that
   the condition is true, and we are therefore justified in passing the static
   value 'true' back to the caller of 'split'.  Similarly, in the second
   branch we can pass a static 'false'.
*)
(*
   Here is a simple example:
*)
let fn = .< fun b -> .~(if_locus @@ fun () -> 
                        let s = string_of_bool (split .<b>.)
                        in .<s>.) >.;;
(* Although the value of 'b' is not known during code generation,
   calling 'split' inserts an 'if' into the generated code
   and returns a static 'true' or 'false' that can be passed to
   string_of_bool.  Here is the code generated for 'fn:

       fun b -> if b then "true"
                     else "false"

   Observe that the work of turning the bool into a string has been
   performed during code generation.
*)

(*
  Improving printf with 'let' & 'if' insertion

  We can use let-insertion and if-insertion to improve the output of
  the staged printf function
  (http://ocamllabs.io/iocamljs/staging2-2018.html).

  In particular, extending the example above, we can avoid run-time
  conversion of booleans to strings, instead performing the conversion
  at code generation time.  Furthermore, the results of converting
  static booleans to strings are also static, and can be coalesced
  with other static values.

  Here is an improved staged printf function that uses let- and
  if-insertion together with partially-static data to generate
  efficient code
*)

let rec printk4
  : type a r. (a, r) fmt -> (PS_string.ps -> r code) -> a code =
  let open PS_string in
  fun fmt k -> match fmt with
  | Int -> genlet
           .< fun i -> .~(k (dyn .<string_of_int i>.)) >.
  | Bool -> genlet
            .< fun b -> 
             .~(if_locus (fun  () ->
                k (sta (string_of_bool (split .<b>.))))) >.
  | Lit s -> k (sta s)
  | Cat (l, r) -> printk4 l (fun x ->
                  printk4 r (fun y ->
                  k (x <*> y)))

(* The top-level sprintf function inserts a handler with let_locus to
   mark the place where let bindings can be inserted *)
let sprintf4 fmt = let_locus @@ fun () -> printk4 fmt cd ;;

(* Now the example call to sprintf generates code with *no* catenations: *)
let printf_example = sprintf4 (Bool % Lit "," % Lit " " % Bool);;
(*
  Here is the generated code:

  let f b = if b then "(false,true)"
            else "(false,false)" in
  let g b = if b then "(true,true)"
            else "(true,false)" in
  let h b = if b then g else f in h 
*)

(* This approach can be used to implement a wide variety of
   code-moving transformations.  We consider one final example here: a
   'pair-splitting' variant of let-insertion that converts a dynamic
   expression of pair type into a static pair of dynamic expressions.

   Here is the effect, Genlet2.  Observe that the the argument is a
   single code value of pair type, while the return value is a pair
   of values of code type:
*)
effect Genlet2 : ('a * 'b) code -> ('a code * 'b code)
let genlet2 p = perform (Genlet2 p)

(* And here is the handler for Genlet2 that, like let_locus, inserts a
   let binding. The difference here is that the left-hand side of the
   let binding matches the two components of the pair; the two bound
   variables are passed individually to the continuation:
*)
  let let2_locus f =
    match f () with 
    | x -> x
    | effect (Genlet2 p) k ->
      .< let (x,y) = .~p in .~(continue k (.<x>.,.<y>.))>.
;;
(* We'll use genlet2 to implement a staged version of scanf (the
   string-parsing counterpart to the string-printing function printf)
   that generates efficient code.

   First, here is an unstaged implementation of the 'sscanf' function,
   which reads from a string as directed by a format specification.
   We can reuse the 'fmt' type used for printf.  The 'sscanfk'
   function does most of the work: it takes a format specification
   'fmt' a string 's' and a continuation function 'k', and returns
   a pair of the result and the remainder of the string:
*)
let rec sscanfk
  : type a r. (a, r) fmt -> string -> a -> r * string =
  fun fmt s k -> match fmt with
    | Int ->        let i, s = read_int s in
                    (k i, s)
    | Lit l ->      let (), s = read_exact l s in
                    (k, s)
    | Bool ->       let i, s = read_bool s in
                    (k i, s)
    | Cat (l, r) -> let k1, s = sscanfk l s k in
                    let k2, s = sscanfk r s k1 in
                    (k2, s)

(* The top-level function sscanf simply discards the second part
   of the return value of sscanfk: *)
let sscanf : 'a 'b. ('a, 'b) fmt -> string -> 'a -> 'b =
  fun f s k -> fst (sscanfk f s k)
;;

(* Here is an example of 'sscanf' in action, using a format
   specification that reads a boolean, a two-character literal string
   and a second boolean.  The second argument is the string to parse.
   The final argument is a function that receives both boolean results
   and constructs the result of the whole call:
*)
let sscanf_example =
  sscanf (Bool % Lit ", " % Bool) "true, false" (fun x y -> (x,y))
;;

(* Here is the result of staging sscanf using the idealized staging
   process.

   As with printf, format strings are treated as static; everything
   else is dynamic:
*)
 let rec sscanfk2
   : type a r. (a, r) fmt -> string code -> a code
       -> (r * string) code =
   fun fmt s k -> match fmt with
     | Int ->     .< let i, s = read_int .~s in
                     (.~k i, s) >.
     | Lit l ->   .< let (), s = read_exact l .~s in
                     (.~k, s) >.
     | Bool ->    .< let i, s = read_bool .~s in
                     (.~k i, s) >.
     | Cat (l, r) -> .< let k1, s = .~(sscanfk2 l s k) in
                        let k2, s = .~(sscanfk2 r .<s>. .<k1>.)
                        in (k2, s) >.
 let sscanf2 : 'a 'b. ('a, 'b) fmt -> (string -> 'a -> 'b) code =
   fun f -> .< fun s k -> fst .~(sscanfk2 f .<s>. .<k>.) >.
;;
(*
  Here is a call to the staged function sscanf2, which accepts
  a format specification and generates code to parse according
  to the specification:
*)
let sscanf2_example = sscanf2 (Bool % Lit ", " % Bool)
(*
  The code generated by sscanf2 is less than optimal:
  Here is the generated code for the example above:

     fun s1 k1  ->
     fst (let (k2,s2) =
           let (k3,s3) =
            let (i1,s4) = read_bool s1 in (k1 i1, s4) in
           let (k4,s5) =
            let ((),s5) = read_exact ", " s3 in (k3, s5) in
           (k4, s5) in
          let (k5,s6) =
           let (i2,s7) = read_bool s2 in (k2 i2, s7) in
          (k5, s6))

  The cause of inefficiency is the large number of "administrative"
  let bindings, which serve no useful purpose in the output; they
  are simply an artifact of the structure of the generating program.

  We can remove these administrative bindings using 'genlet2'.

  Here is an improved staged sscanf function using 'genlet2'.
  Observe that the let bindings themselves are no longer quoted,
  and that the two components of the pair are passed around
  as separate code values (as shown in the return type of sscanfk3):
*)
let rec sscanfk3
  : type a r. (a, r) fmt -> string code -> a code -> r code * string code =
  fun fmt s k -> match fmt with
    | Int ->        let i, s = genlet2 .<read_int .~s>. in
                    (.<.~k .~i>., s)
    | Lit l ->      let _, s = genlet2 .<read_exact l .~s>. in
                    (k, s)
    | Bool ->       let i, s = genlet2 .<read_bool .~s>. in
                    (.<.~k .~i>., s)
    | Cat (l, r) -> let k1, s = sscanfk3 l s k in
                    let k3, s = sscanfk3 r s k1 in
                    (k3, s)

(* As with sprintf4, there is a single top-level handler let2_locus
   inserted at the entry point, sscanf3: *)
let sscanf3 : 'a 'b. ('a, 'b) fmt -> (string -> 'a -> 'b) code =
  fun f -> .< fun s k -> .~(let2_locus @@ fun () -> 
                            fst (sscanfk3 f .<s>. .<k>.)) >.
;;
(* Here is the example call to sscanf3: *)
let sscanf3_example = sscanf3 (Bool % Lit ", " % Bool)
(* And here is the code generated for the example call.  It is
   significantly simpler (and probably more efficient) than the code
   generated by sscanf2, since it contains no administrative bindings:

    fun s k ->
     let (x1 ,s1) = read_bool s
     let (x2 ,s2) = read_exact "," s1 in
     let (x3 ,s3) = read_bool s2
       k x 2
*)