Theory Congproc_Ex

(*  Title:      HOL/Imperative_HOL/ex/Congproc_Ex.thy
    Author:     Norbert Schirmer, Apple, 2024
*)

section ‹Examples for congruence procedures (congprocs)›

theory Congproc_Ex
  imports "../Imperative_HOL"
begin

text ‹The simplifier works bottom up, which means that when invoked on a (compound) term it first
descends into the subterms to normalise those and then works its way up to the head of the term
trying to apply rewrite rules for the current redex (reducible expression) it encounters on the
way up. Descending into the term can be influenced by congruence rules. Before descending into the
subterms the simplifier checks for a congruence rule for the head of the term. If it finds one
it behaves according to that rule, otherwise the simplifier descends into each subterm subsequently.

While rewrite rules can be complemented with simplification procedures (simprocs) to get even
more programmatic control, congruence rules can be complemented with congruence
procedures (congprocs):

 Congprocs share the same ML signature as simprocs and provide a similar interface in
  Isabelle/ML as well as Isabelle/Isar:
  @{ML_type "morphism -> Proof.context -> thm option"}
 Congprocs are triggered by associated term patterns (just like simprocs) not just the head constant
  (which is the case for congruence rules). Like simprocs, congprocs are managed in a term net.
 Congprocs have precedence over congruence rules (unlike simprocs)
 In case the term net selects multiple candidates,
  the one with the more specific term pattern is tried first. A pattern
  p1› is considered more specific than p2› if p2› matches p1› but not vice versa.
 To avoid surprises the theorems returned by a congproc should follow the structure of
  ordinary congruence rule. Either the conclusion should return an equation where the head of the
  left hand side and right hand side coincide, or the right hand side is already in normal form.
  Otherwise, simplification might skip some relevant subterms or do repeated simplification of
  some subterms. Some fine points are illustrated by the following examples.
›


subsection ‹Congproc examples with if-then-else›

ML fun assert expected eq = if (expected aconvc (Thm.rhs_of eq)) then eq else
  raise error ("unexpected: " ^ @{make_string} eq)

fun assert_equiv expected eq =
  if Pattern.equiv @{theory} (Thm.term_of expected, Thm.term_of (Thm.rhs_of eq)) then eq else
  raise error ("unexpected: " ^ @{make_string} eq)
text ‹The standard setup uses @{thm if_weak_cong}. Only if the condition simplifies to
 termTrue or termFalse the branches are simplified.›
experiment fixes a::nat
begin
ML_val @{cterm "if a < 2 then a < 2 else ¬ a < 2"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "if a < 2 then a < 2 else ¬ a < 2"}
end

text ‹A congproc that supplies the 'strong' rule @{thm if_cong}
simproc_setup passive congproc if_cong (if x then a else b) = (K (K (K (SOME @{thm if_cong [cong_format]}))))

experiment
begin
text ‹The congproc takes precedence over the cong rules›
declare [[simproc add: if_cong, simp_trace = false]]
ML_val @{cterm "if ((a::nat) < 2) then a < 2 else ¬ ((a::nat) < 2)"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm True}
end

text ‹When we replace the congruence rule with a congproc that provides the same
rule we would expect that the result is the same.›

simproc_setup passive congproc if_weak_cong_bad (if x then a else b) = (K (K (K (SOME @{thm if_weak_cong [cong_format]}))))

experiment
begin

ML_val @{cterm "if True then (1::nat) + 2 else 2 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "Suc (Suc (Suc 0))"}


declare if_weak_cong [cong del]
declare [[simproc add: if_weak_cong_bad, simp_trace]]

ML_val @{cterm "if True then (1::nat) + 2 else 2 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "(1::nat) + 2"}
text ‹We do not get the same result. The then-branch is selected but not simplified.
As the simplifier works bottom up it can usually assume that the subterms are already in
'simp normal form'. So the simplifier avoids to revisit the then-branch when it applies
@{thm if_True}. However, the weak congruence rule
@{thm if_weak_cong} only simplifies the condition and neither branch.
As the simplifier analyses congruence rules this rule is classified as weak. Whenever a
redex is simplified (for which a weak congruence rule is active) the simplifier deviates from its
 default behaviour and rewrites the result. However, the simplifier does not analyse the
congproc. To achieve the same result we can explicitly specify it as ‹weak›.›
end

simproc_setup passive weak_congproc if_weak_cong (if x then a else b) = (K (K (K (SOME @{thm if_weak_cong [cong_format]}))))

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_weak_cong, simp_trace]]
ML_val @{cterm "if True then (1::nat) + 2 else 2 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "Suc (Suc (Suc 0))"}
end

text ‹Now some more ambitious congproc that combines the effect of @{thm if_weak_cong} and @{thm if_cong}.
It first simplifies the condition and depending on the result decides to either simplify only
one of the branches (in case the condition evaluates to termTrue or termFalse, or otherwise
it simplifies both branches.
›

lemma if_True_weak_cong:
  "P = True  x = x'  (if P then x else y) = (if True then x' else y)"
  by simp

lemma if_False_weak_cong:
  "P = False  y = y'  (if P then x else y) = (if False then x else y')"
  by simp

text ‹Note that we do not specify the congproc as ‹weak› as every relevant subterm is
simplified.›
simproc_setup passive congproc if_cong_canonical (if x then a else b) = let
    val if_True_weak_cong = @{thm if_True_weak_cong [cong_format]}
    val if_False_weak_cong = @{thm if_False_weak_cong [cong_format]}
    val if_cong = @{thm if_cong [cong_format]}
  in
    (K (fn ctxt => fn ct =>
      let
         val (_, [P, x, y]) = Drule.strip_comb ct
         val P_eq = Simplifier.asm_full_rewrite ctxt P
         val rhs = Thm.dest_equals_rhs (Thm.cprop_of P_eq)
         val rule = (case Thm.term_of rhs of
                       @{term True}  => if_True_weak_cong
                     | @{term False} => if_False_weak_cong
                     | _ => if_cong)
      in
        SOME (rule OF [P_eq])
      end))
   end

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_cong_canonical, simp_trace]]
ML_val @{cterm "if True then (1::nat) + 2 else 2 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "Suc (Suc (Suc 0))"}
end

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_cong_canonical, simp_trace]]
text ‹Canonical congruence behaviour:
 First condition is simplified to True
 Congruence rule is selected and then "then-branch" is simplified but "else-branch" is left untouched
 Congruence step is finished and now rewriting with @{thm if_True} is done.
Note that there is no attempt to revisit the result, as congproc is not weak.›
ML_val @{cterm "if ((2::nat) < 3) then 22 + 2 else 21 + 1"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "24"}
end

experiment fixes a ::nat
begin
text ‹The weak congruence rule shows no effect.›
ML_val @{cterm "if a < b then a < b  True else ¬ a < b  True"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "if a < b then a < b  True else ¬ a < b  True"}

text ‹The congproc simplifies the term.›
declare if_weak_cong [cong del]
declare [[simproc add: if_cong_canonical, simp_trace]]
ML_val @{cterm "if a < b then a < b  True else ¬ a < b  True"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "True"}
end

text ‹Beware of congprocs that implement non-standard congruence rules, like:›

lemma if_True_cong: "P = True  (if P then x else y) = x"
  by simp

lemma if_False_cong: "P = False  (if P then x else y) = y"
  by simp

simproc_setup passive congproc if_cong_bad (if x then a else b) = let
    val if_True_cong = @{thm if_True_cong [cong_format]}
    val if_False_cong = @{thm if_False_cong [cong_format]}
    val if_cong = @{thm if_cong [cong_format]}
  in
    (K (fn ctxt => fn ct =>
      let
         val (_, [P, x, y]) = Drule.strip_comb ct
         val P_eq = Simplifier.asm_full_rewrite ctxt P
         val rhs = Thm.dest_equals_rhs (Thm.cprop_of P_eq)
         val rule = (case Thm.term_of rhs of
                       @{term True}  => if_True_cong
                     | @{term False} => if_False_cong
                     | _ => if_cong )
      in
        SOME (rule OF [P_eq])
      end))
   end

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_cong_bad, simp_trace]]

ML_val @{cterm "if ((2::nat) < 3) then 22 + 2 else 21 + 1"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "24"}
text ‹The result is the same as with the canonical congproc. But when inspecting the simp_trace›
we can  observe some odd congruence behaviour:
 First condition is simplified to True
 Non-standard congruence rule @{thm if_True_cong} is selected which does
  not have the same head on the right hand side and simply gives back the "then-branch"
 Incidently simplification continues on the then-branch as there are simplification rules for
  the redex @{term "22 + 2"}. So we were lucky.

The following example with a nested if-then-else illustrates what can go wrong.
›

ML_val @{cterm "if ((2::nat) < 3) then (if ((3::nat) < 2) then 20 + 1 else 20 + 2) else 20 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "if (3::nat) < 2 then 20 + 1 else 20 + 2"}

text ‹For the a nested if-then-else we get stuck as there is no simplification rule
triggering for the inner if-then-else once it is at the toplevel. Note that it does not
help to specify the congproc as ‹weak›. The last step of the simplifier was the application
of the congruence rule. No rewrite rule is triggered for the resulting redex so the simplifier
does not revisit the term. Note that congruence rules (and congprocs) are applied only when the
simplifier walks down the term (top-down), simplification rules (and simprocs) on the other hand
are only applied when the simplifier walks up the term (bottom-up). As the simplifier is on its
way up there is no reason to try a congruence rule on the resulting redex.
It only tries to apply simplification rules.

So congprocs should better behave canonically like ordinary congruence rules and
 preserve the head of the redex:
›
end

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_cong, simp_trace]]

ML_val @{cterm "if ((2::nat) < 3) then (if ((3::nat) < 2) then 20 + 1 else 20 + 2) else 20 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "22"}
end

text ‹Alternatively one can supply a non standard rule if the congproc takes care of the normalisation
of the relevant subterms itself.›

lemma if_True_diy_cong: "P = True  x = x'  (if P then x else y) = x'"
  by simp

lemma if_False_diy_cong: "P = False  y = y'  (if P then x else y) = y'"
  by simp

simproc_setup passive congproc if_cong_diy (if x then a else b) = let
    val if_True_diy_cong = @{thm if_True_diy_cong [cong_format]}
    val if_False_diy_cong = @{thm if_False_diy_cong [cong_format]}
    val if_cong = @{thm if_cong [cong_format]}
  in
    (K (fn ctxt => fn ct =>
      let
         val (_, [P, x, y]) = Drule.strip_comb ct
         val P_eq = Simplifier.asm_full_rewrite ctxt P
         val rhs = Thm.dest_equals_rhs (Thm.cprop_of P_eq)
         val (rule, ts)  = (case Thm.term_of rhs of
                       @{term True}  => (if_True_diy_cong, [x])
                     | @{term False} => (if_False_diy_cong, [y])
                     | _ => (if_cong, []) )
         val eqs = map (Simplifier.asm_full_rewrite ctxt) ts ― ‹explicitly simplify the subterms›
      in
        SOME (rule OF (P_eq::eqs))
      end))
   end

experiment
begin
declare if_weak_cong [cong del]
declare [[simproc add: if_cong_diy, simp_trace]]

ML_val @{cterm "if ((2::nat) < 3) then (if ((3::nat) < 2) then 20 + 1 else 20 + 2) else 20 + 3"}
  |> Simplifier.asm_full_rewrite @{context}
  |> assert @{cterm "22"}
end


subsection ‹Sketches for more meaningful congprocs›

text ‹One motivation for congprocs is the simplification of monadic terms which occur in the
context of the verification of imperative programs. We use Imperative_HOL as an example here.
In typical monadic programs we encounter lots of monadic binds and
guards aka assertions. Typical assertions protect against arithmetic overflows, dangling pointers
or might encode type information for some pointers. In particular when those assertions are
mechanically generated, e.g. by refinement proofs, there tends to be a lot of redundancy in
the assertions that are sprinkled all over the place in the program. Removing those redundant
guards by simplification can be utilised by congprocs.

›

text ‹
A first attempt for a congruence rule to propagate an assertion through a bind is the following:
We can assume the predicate when simplifying the 'body' termf:
›
lemma assert_bind_cong':
  "(P x = P' x)  (P x  f = f')  ((assert P x)  f) = ((assert P' x)  f')"
  by (auto simp add: assert_def bind_def simp add: execute_raise split: option.splits)

text ‹Unfortunately this is not a plain congruence rule that the simplifier can work with.
The problem is that congruence rules only work on the head constant of the left hand side of
 the equation in the conclusion. This is constbind. But the rule is too specific as it only works
for binds where the first monadic action is an constassert. Fortunately congprocs offer
that flexibility. Like simprocs they can be triggered by patterns not only the head constant.

A slightly more abstract version, generalises the parameter termx for simplification of the body
termf. This also illustrates the introduction of bound variables that are passed along through
the constbind.
›

lemma assert_bind_cong:
  "(P x = P' x)  (x. P x  f x = f' x)  ((assert P x)  f) = ((assert P' x)  f')"
  by (auto simp add: assert_def bind_def simp add: execute_raise execute_return split: option.splits)

text ‹Another typical use case is that a monadic action returns a tuple which is then propagated
through the binds. The tuple is naturally stated in 'eta expanded' form like termλ(x,y). f x y such that the
body can directly refer to the bound variables termx and termz and not via constfst and
 constsnd.›

lemma assert_bind_cong2':
  "(P a b = P' a b)  (P a b  f a b = f' a b) 
  ((assert (λ(x,y). P x y) (a,b))  (λ(x,y). f x y)) = ((assert (λ(x,y). P' x y) (a,b))  (λ(x,y). f' x y))"
  apply (auto simp add: assert_def bind_def simp add: execute_raise execute_return
      split: option.splits)
  done

lemma assert_bind_cong2:
  "(P a b = P' a b)  (a b. P a b  f a b = f' a b) 
  ((assert (λ(x,y). P x y) (a,b))  (λ(x,y). f x y)) = ((assert (λ(x,y). P' x y) (a,b))  (λ(x,y). f' x y))"
  apply (auto simp add: assert_def bind_def simp add: execute_raise execute_return
      split: option.splits)
  done

lemma assert_True_cond[simp]: "P x  ((assert P x)  f) = f x"
  by (auto simp add: assert_def bind_def
      simp add: execute_return execute_raise split: option.splits)

simproc_setup passive congproc assert_bind_cong ((assert P x)  f) = K (K (K (SOME @{thm assert_bind_cong [cong_format]})))

simproc_setup passive congproc assert_bind_cong2 ((assert P x)  f) = K (K (K (SOME @{thm assert_bind_cong2 [cong_format]})))

experiment
begin
declare [[simproc add: assert_bind_cong]]
text ‹The second assert is removed as expected.›
ML_val @{cterm "do {x <- assert P x; y <- assert P x; f y}"}
  |> (Simplifier.asm_full_rewrite @{context})
  |> assert_equiv @{cterm "assert P x  f"}
end

experiment fixes a::nat
begin
declare [[simproc add: assert_bind_cong]]
text ‹Does not work as expected due to issues with binding of the tuples›
ML_val @{cterm "do {(a, b) <- assert (λ(x,y). x < y) (a,b); (k,i) <- assert (λ(x,y). x < y)  (a, b); return (k < i)}"}
  |> (Simplifier.asm_full_rewrite @{context})
  |> assert_equiv @{cterm "assert (λc. a < b) (a, b) 
      (λx. case x of (a, b)  assert (λc. a < b) (a, b)  (λx. case x of (k, i)  return (k < i)))"}
end

experiment fixes a::nat
begin
declare [[simproc add: assert_bind_cong2]]
text ‹Works as expected. The second assert is removed and the condition is propagated to the final
 constreturn
ML_val @{cterm "do {(a, b) <- assert (λ(x,y). x < y) (a,b); (k,i) <- assert (λ(x,y). x < y)  (a, b); return (k < i)}"}
  |> (Simplifier.asm_full_rewrite @{context})
  |> assert_equiv @{cterm "assert (λ(x, y). x < y) (a, b)  (λ(x, y). return True)"}
end

text ‹To properly handle tuples in general we cold of course refine our congproc to
analyse the arity of the constbind and then derive a variant of @{thm assert_bind_cong2} with the
corresponding arity, 3, 4, 5... We leave this as a exercise for the reader.

N.B. For the problem of tuple-splitting there sure are other solutions, e.g. normalising the
program with @{thm case_prod_conv} or @{thm case_prod_unfold}. The drawback is that this usually
diminishes the readability of the monadic expression. Moreover, from a performance perspective it
is usually better to split a rule like @{thm assert_bind_cong2}, which is abstract and of a fixed
known small size, compared to normalisation of an unknown user defined  monadic expression which might
be quite sizeable.
›


subsection ‹Customizing the context in congruence rules and congprocs›

text ‹
When the simplifier works on a term it manages its context in the simpset. In
particular when 'going under' an abstraction λx. ...› it introduces a fresh free variable termx,
substitutes it in the body and continues. Also when going under an implication termP  C it
assumes termP, extracts simplification rules from termP which it adds to the simpset and
simplifies the conclusion termC. This pattern is what we typically encounter in congruence rules
like @{thm assert_bind_cong2} where we have a precondition like
 terma b. P a b  f a b = f' a b. This advises the simplifier to fix terma and termb,
assume termP a b, extract simplification rules from that, and continue to simplify termf a b.

With congprocs we can go beyond this default behaviour of the simplifier as we are not restricted
to the format of congruence rules. In the end we have to deliver an equation but are free how we
derive it. A common building block of such more refined congprocs is that we
not only want to add termP a b to the simpset but want to enhance some other application specific
data with that premise, e.g. add it to a collection of named theorems or come up with some derived facts
that we want to offer some other tool (like another simproc, or solver).
The simpset already offers the possiblity to customise @{ML Simplifier.mksimps} which is a
function of type @{ML_type "Proof.context -> thm -> thm list"}. This function is used to derive equations
from a premise like termP a b when it is added by the simplifier. We have extended that
function to type @{ML_type "thm -> Proof.context -> thm list * Proof.context"} to give the user the
control to do additional modifications to the context:
@{ML Simplifier.get_mksimps_context}, @{ML Simplifier.set_mksimps_context}
The following contrived example illustrates the potential usage:
›

definition EXTRACT :: "bool  bool" where "EXTRACT P = P"
definition UNPROTECT :: "bool  bool" where "UNPROTECT P = P"

lemma EXTRACT_TRUE_UNPROTECT_D: "EXTRACT P  True  (UNPROTECT P  True)"
  by (simp add: EXTRACT_def UNPROTECT_def)

named_theorems my_theorems

text ‹We modify @{ML Simplifier.mksimps} to derive a theorem about termUNPROTECT P from
 termEXTRACT P and add it to the named theorems @{thm my_theorems}.›

setup let
  fun my_mksimps old_mksimps thm ctxt =
    let
      val (thms, ctxt') = old_mksimps thm ctxt
      val thms' = map_filter (try (fn thm => @{thm EXTRACT_TRUE_UNPROTECT_D} OF [thm])) thms
      val _ = tracing ("adding: " ^ @{make_string} thms' ^ " to my_theorems")
      val ctxt'' = ctxt' |> Context.proof_map (fold (Named_Theorems.add_thm @{named_theorems my_theorems}) thms')
    in
      (thms, ctxt'' )
    end
in
  Context.theory_map (fn context =>
    let val old_mksimps = Simplifier.get_mksimps_context (Context.proof_of context)
    in context |> Simplifier.map_ss (Simplifier.set_mksimps_context (my_mksimps old_mksimps)) end)
end

text ‹We provide a simproc that matches on termUNPROTECT P and tries to solve it
with rules in named theorems @{thm my_theorems}.›
simproc_setup UNPROTECT (UNPROTECT P) = fn _ => fn ctxt => fn ct =>
  let
    val thms = Named_Theorems.get ctxt @{named_theorems my_theorems}
    val _ = tracing ("my_theorems: " ^ @{make_string} thms)
    val eq = Simplifier.rewrite (ctxt addsimps thms) ct
  in if Thm.is_reflexive eq then NONE else SOME eq end

lemma "EXTRACT P  UNPROTECT P"
  supply [[simp_trace]]
  apply (simp)
  done

text ‹Illustrate the antiquotation.›
ML val conproc1 = simproc_setuppassive weak_congproc if_cong1  ("if x then a else b") =
  (K (K (K (SOME @{thm if_cong [cong_format]}))))

end