Skip to content

Commit d1812c3

Browse files
committed
Less aggressive CSE across calls to known runtime functions
Keeping all equations across these calls is semantically sound, but can result in over-utilization of callee-save registers or stack locations. Mitigate this risk by keeping only [Builtin] equations. A more refined cost criterion can be considered later.
1 parent 47e65c1 commit d1812c3

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

backend/CSE.v

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,25 @@ Definition kill_loads_after_store
319319
let p := aaddressing app addr args in
320320
kill_equations (filter_after_store n p (size_chunk chunk)) n.
321321

322+
(** [kill_cheap_computations n] removes all equations corresponding to
323+
``cheap'' computations, i.e. computations that are not worth
324+
factoring across a call to a runtime library function.
325+
(Such a factoring has its own costs, since the result must be kept
326+
in a callee-save register or a stack location.)
327+
As a rough approximation of costs, we say that all [Op] and [Load]
328+
computations are cheap, and all [Builtin] computations are
329+
expensive. More precise criteria are possible. *)
330+
331+
Definition filter_cheap (r: rhs) : bool :=
332+
match r with
333+
| Op _ _ => true
334+
| Load _ _ _ _ => true
335+
| Builtin _ _ => false
336+
end.
337+
338+
Definition kill_cheap_computations (n: numbering) : numbering :=
339+
kill_equations filter_cheap n.
340+
322341
(** [add_store_result n chunk addr rargs rsrc] updates the numbering [n]
323342
to reflect the knowledge gained after executing an instruction
324343
[Istore chunk addr rargs rsrc]. An equation [vsrc >= Load chunk addr vargs]
@@ -469,17 +488,19 @@ Module Solver := BBlock_solver(Numbering).
469488

470489
(** The transfer function for the dataflow analysis returns the numbering
471490
``after'' execution of the instruction at [pc], as a function of the
472-
numbering ``before''. For [Iop] and [Iload] instructions, we add
473-
equations or reuse existing value numbers as described for
474-
[add_op] and [add_load]. For [Istore] instructions, we forget
475-
equations involving memory loads at possibly overlapping locations,
476-
then add an equation for loads from the same location stored to.
477-
For [Icall] instructions, we could simply associate a fresh, unconstrained by equations value number
478-
to the result register. However, it is often undesirable to eliminate
479-
common subexpressions across a function call (there is a risk of
480-
increasing too much the register pressure across the call), so we
481-
just forget all equations and start afresh with an empty numbering.
482-
Finally, for instructions that modify neither registers nor
491+
numbering ``before''.
492+
- For [Iop] and [Iload] instructions, we add equations or reuse
493+
existing value numbers as described for [add_op] and [add_load].
494+
- For [Istore] instructions, we forget equations involving memory
495+
loads at possibly overlapping locations, then add an equation for
496+
loads from the same location stored to.
497+
- For [Icall] instructions, we could simply associate a fresh,
498+
unconstrained by equations value number to the result register.
499+
However, it is often undesirable to eliminate common subexpressions
500+
across a function call (there is a risk of increasing too much the
501+
register pressure across the call), so we just forget all equations
502+
and start afresh with an empty numbering.
503+
- Finally, for instructions that modify neither registers nor
483504
the memory, we keep the numbering unchanged.
484505
485506
For builtin invocations [Ibuiltin], we have four strategies:
@@ -492,7 +513,12 @@ Module Solver := BBlock_solver(Numbering).
492513
- Keep all equations, taking advantage of the fact that neither memory
493514
nor registers are modified. This is appropriate for annotations and
494515
volatile loads.
495-
- Keep all equations and add a new equation. This is for known builtin functions.
516+
- Keep all equations and add a new equation. This is appropriate for
517+
builtin functions with known semantics.
518+
519+
[Icall] instructions that call runtime library functions with known
520+
semantics can be analyzed like a builtin invocation,
521+
by adding a new [Builtin] equation.
496522
*)
497523

498524
Definition transfer (f: function) (dm: defmap) (approx: PMap.t VA.t) (pc: node) (before: numbering) :=
@@ -513,7 +539,9 @@ Definition transfer (f: function) (dm: defmap) (approx: PMap.t VA.t) (pc: node)
513539
| Icall sig ros args res s =>
514540
match is_known_runtime_function dm ros with
515541
| None => empty_numbering
516-
| Some bf => add_builtin before (BR res) bf (map (@BA _) args)
542+
| Some bf =>
543+
let n := kill_cheap_computations before in
544+
add_builtin n (BR res) bf (map (@BA _) args)
517545
end
518546
| Itailcall sig ros args =>
519547
empty_numbering

backend/CSEproof.v

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,14 @@ Proof.
628628
- econstructor; eauto using builtin_args_depends_on_memory_correct.
629629
Qed.
630630

631+
Lemma kill_cheap_computations_hold:
632+
forall valu ge sp rs m n,
633+
numbering_holds valu ge sp rs m n ->
634+
numbering_holds valu ge sp rs m (kill_cheap_computations n).
635+
Proof.
636+
intros. eapply kill_equations_hold; eauto.
637+
Qed.
638+
631639
Lemma store_normalized_range_sound:
632640
forall bc chunk v,
633641
vmatch bc v (store_normalized_range chunk) ->
@@ -1030,7 +1038,6 @@ Qed.
10301038

10311039
End REDUCELD.
10321040

1033-
10341041
(** The numberings associated to each instruction by the static analysis
10351042
are inductively satisfiable, in the following sense: the numbering
10361043
at the function entry point is satisfiable, and for any RTL execution
@@ -1242,15 +1249,15 @@ Lemma transf_step_correct:
12421249
Proof.
12431250
induction 1; intros; inv MS; try (TransfInstr; intro C).
12441251

1245-
(* Inop *)
1246-
- left; econstructor; split.
1252+
- (* Inop *)
1253+
left; econstructor; split.
12471254
eapply exec_Inop; eauto.
12481255
econstructor; eauto.
12491256
eapply analysis_correct_1; eauto. simpl; auto.
12501257
unfold transfer; rewrite H; auto.
12511258

1252-
(* Iop *)
1253-
- destruct (is_trivial_op op) eqn:TRIV.
1259+
- (* Iop *)
1260+
destruct (is_trivial_op op) eqn:TRIV.
12541261
+ (* unchanged *)
12551262
exploit eval_operation_lessdef. eapply regs_lessdef_regs; eauto. eauto. eauto.
12561263
intros [v' [A B]].
@@ -1388,7 +1395,7 @@ Proof.
13881395
apply EV. intros. econstructor; eauto.
13891396
eapply analysis_correct_1; eauto. simpl; auto.
13901397
unfold transfer; rewrite H, IK.
1391-
eapply add_builtin_holds with (res := BR res); eauto using kill_all_loads_hold, eval_builtin_args_trivial.
1398+
eapply add_builtin_holds with (res := BR res); eauto using kill_cheap_computations_hold, eval_builtin_args_trivial.
13921399
replace v0 with v by congruence.
13931400
apply set_reg_lessdef; auto.
13941401
apply Val.lessdef_trans with (rs#r); auto.
@@ -1407,7 +1414,7 @@ Proof.
14071414
econstructor; eauto.
14081415
eapply analysis_correct_1; eauto. simpl; auto.
14091416
unfold transfer; rewrite H, IK.
1410-
eapply add_builtin_holds with (res := BR res); eauto using kill_all_loads_hold, eval_builtin_args_trivial.
1417+
eapply add_builtin_holds with (res := BR res); eauto using kill_cheap_computations_hold, eval_builtin_args_trivial.
14111418
apply set_reg_lessdef; auto.
14121419
** (* the builtin function fails *)
14131420
right. econstructor; exists 1%nat; split.

0 commit comments

Comments
 (0)