Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Incremental TD3: reluctant function destabilization #369

Merged
merged 8 commits into from
Oct 11, 2021
33 changes: 20 additions & 13 deletions src/incremental/compareAST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ open Cil

type changed_global = {
old: global;
current: global
current: global;
unchangedHeader: bool
}

type change_info = {
Expand Down Expand Up @@ -230,13 +231,19 @@ and eq_block ((a, af): Cil.block * fundec) ((b, bf): Cil.block * fundec) =
a.battrs = b.battrs && List.for_all (fun (x,y) -> eq_stmt (x, af) (y, bf)) (List.combine a.bstmts b.bstmts)

let eqF (a: Cil.fundec) (b: Cil.fundec) =
try
eq_varinfo a.svar b.svar &&
List.for_all (fun (x, y) -> eq_varinfo x y) (List.combine a.sformals b.sformals) &&
List.for_all (fun (x, y) -> eq_varinfo x y) (List.combine a.slocals b.slocals) &&
eq_block (a.sbody, a) (b.sbody, b)
with Invalid_argument _ -> (* One of the combines failed because the lists have differend length *)
false
let unchangedHeader =
try
eq_varinfo a.svar b.svar &&
List.for_all (fun (x, y) -> eq_varinfo x y) (List.combine a.sformals b.sformals)
with Invalid_argument _ -> false in
let identical =
try
unchangedHeader &&
List.for_all (fun (x, y) -> eq_varinfo x y) (List.combine a.slocals b.slocals) &&
eq_block (a.sbody, a) (b.sbody, b)
with Invalid_argument _ -> (* The combine failed because the lists have differend length *)
false in
identical, unchangedHeader

let rec eq_init (a: init) (b: init) = match a, b with
| SingleInit e1, SingleInit e2 -> eq_exp e1 e2
Expand All @@ -250,9 +257,9 @@ let eq_initinfo (a: initinfo) (b: initinfo) = match a.init, b.init with

let eq_glob (a: global) (b: global) = match a, b with
| GFun (f,_), GFun (g,_) -> eqF f g
| GVar (x, init_x, _), GVar (y, init_y, _) -> eq_varinfo x y (* ignore the init_info - a changed init of a global will lead to a different start state *)
| GVarDecl (x, _), GVarDecl (y, _) -> eq_varinfo x y
| _ -> print_endline @@ "Not comparable: " ^ (Pretty.sprint ~width:100 (Cil.d_global () a)) ^ " and " ^ (Pretty.sprint ~width:100 (Cil.d_global () a)); false
| GVar (x, init_x, _), GVar (y, init_y, _) -> (eq_varinfo x y, false) (* ignore the init_info - a changed init of a global will lead to a different start state *)
| GVarDecl (x, _), GVarDecl (y, _) -> (eq_varinfo x y, false)
| _ -> print_endline @@ "Not comparable: " ^ (Pretty.sprint ~width:100 (Cil.d_global () a)) ^ " and " ^ (Pretty.sprint ~width:100 (Cil.d_global () a)); (false, false)

(* Returns a list of changed functions *)
let compareCilFiles (oldAST: Cil.file) (newAST: Cil.file) =
Expand All @@ -270,10 +277,10 @@ let compareCilFiles (oldAST: Cil.file) (newAST: Cil.file) =
(try
let old_global = GlobalMap.find ident map in
(* Do a (recursive) equal comparision ignoring location information *)
let identical = eq_glob old_global global in
let identical, unchangedHeader = eq_glob old_global global in
if identical
then changes.unchanged <- global :: changes.unchanged
else changes.changed <- {current = global; old = old_global} :: changes.changed
else changes.changed <- {current = global; old = old_global; unchangedHeader = unchangedHeader} :: changes.changed
with Not_found -> ())
with NoGlobalIdentifier _ -> () (* Global was no variable or function, it does not belong into the map *)
in
Expand Down
8 changes: 5 additions & 3 deletions src/incremental/updateCil.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,18 @@ let update_ids (old_file: file) (ids: max_ids) (new_file: file) (map: (global_id
| _ -> ()
with Failure m -> ()
in
let reset_changed_fun (f: fundec) (old_f: fundec) =
let reset_changed_fun (f: fundec) (old_f: fundec) unchangedHeader =
f.svar.vid <- old_f.svar.vid;
update_vid_max f.svar.vid;
List.iter (fun l -> l.vid <- make_vid ()) f.slocals;
List.iter (fun f -> f.vid <- make_vid ()) f.sformals;
if unchangedHeader then
List.iter (fun (f,old_f) -> f.vid <- old_f.vid; update_vid_max f.vid) (List.combine f.sformals old_f.sformals)
else List.iter (fun f -> f.vid <- make_vid ()) f.sformals;
List.iter (fun s -> s.sid <- make_sid ()) f.sallstmts;
in
let reset_changed_globals (changed: changed_global) =
match (changed.current, changed.old) with
| GFun (nw, _), GFun (old, _) -> reset_changed_fun nw old
| GFun (nw, _), GFun (old, _) -> reset_changed_fun nw old changed.unchangedHeader
| _ -> ()
in
let update_fun (f: fundec) =
Expand Down
49 changes: 38 additions & 11 deletions src/solvers/td3.ml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ module WP =
* if exp.earlyglobs: the contexts will be the same since they don't contain the global, but the start state will be different!
*)
print_endline "Destabilizing start functions if their start state changed...";
(* record whether any function's start state changed. In that case do not use reluctant destabilization *)
let any_changed_start_state = ref false in
Comment on lines +249 to +250
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If earlyglobs is turned on and a global initializer changes, the start state of a start function changes but its context stays the same. To still trigger the reanalysis for the start function, a check is implemented at the beginning of the incremental preprocessing in the td3 solver to detect this and explicitly destabilize the start function. If this happens and the start function is already fully destabilized there is no need to apply reluctant destabilization on the changed functions afterwards.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If earlyglobs is turned on and a global initializer changes, the start state of a start function changes but its context stays the same.

If earlyglobs is on, the globals are never in the local states even, so changing a global initializer doesn't change the start state of the start function (nor its context). The change to the global initializer should just be destabilizing the global and thus everywhere its read.
Or do you mean when earlyglobs is off, because then globals are initially in the start state?

If this happens and the start function is already fully destabilized there is no need to apply reluctant destabilization on the changed functions afterwards.

The start state of a function changing is a function-specific property though. If one function's start state changes, but another's doesn't, then reluctant destabilization could still be applied to the latter. Changing one shouldn't prevent reluctant destabilization of others, but just the changed one.

Also, destabilizing a function entry that was already destabilized shouldn't really be doing anything: the first destabilize clears its infl, so the second one doesn't do anything anyway.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If earlyglobs is on, the globals are never in the local states even, so changing a global initializer doesn't change the start state of the start function (nor its context).

There seems to be some inconsistent behavior now. If you run the following minimalistic example with the incremental config (i.e. privatization off) the function main has a changed start state (because of the changed global initializer) and is therefor destabilized during the preparation for the incremental run. The global g is not a member of the list st.

int g = 0; // change to g = 2;

int main() {
  return 0;
}

This is the behavior of the incremental analysis as I know it and as it is described in the comments of the changed start state handling. I managed however to reconstruct what you described by turning privatization and the (probably not incremental compatible) mutex analysis on. Do you know if there was any recent change to the privatization that could affect this? Why are globals part of st with privatization turned on?

The start state of a function changing is a function-specific property though. If one function's start state changes, but another's doesn't, then reluctant destabilization could still be applied to the latter.

With the behavior I described above, a changed global initializer affects the start state of every start function so all of them are destabilized anyway. If it weren't like that, it would be nicer to make a distinction between which function's start state changed. The problem I see is, that one can't relate an arbitrary changed function to the start function it is (indirectly) being called from. So it would be difficult (at least efficiently) to tell whether the changed function needs to be solved separately as part of reluctant destabilization.

Also, destabilizing a function entry that was already destabilized shouldn't really be doing anything: the first destabilize clears its infl, so the second one doesn't do anything anyway.

The point is not to avoid the destabilization, but to avoid the function specific solve on a possibly outdated context. If the start function has been destabilized completely anyway, solving it with the top-down solver directly avoids unnecessary evaluations.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you run the following minimalistic example with the incremental config (i.e. privatization off) the function main has a changed start state (because of the changed global initializer) and is therefor destabilized during the preparation for the incremental run. The global g is not a member of the list st.

I've never been a fan of earlyglobs, so I'm not 100% sure what its expected behavior should be. But my understanding has been that it should from the beginning (global initialization) make global program variables into global constraint system variables, so g should be in st. Not being there and instead remaining in the entry state of the function sounds like an earlyglobs bug to me.

the (probably not incremental compatible) mutex analysis

I'm not sure why the mutex analysis should be incompatible with incremental because incremental analysis on the level of constraint systems shouldn't care whatsoever about the content of the constraint system.
The only problem currently (before #363) is that protecting locksets in global constraint variables cannot improve, but that should be all.

Do you know if there was any recent change to the privatization that could affect this?

No, but it could be a bug remaining from the time all the traces article privatization implementations were added. earlyglobs caused a bunch of annoying issues there, but those got fixed to the extent that the regression tests pass. Although there might still be some bug that wasn't caught by the tests then.

Why are globals part of st with privatization turned on?

Without earlyglobs, globals are kept in the local state just like all other variables. This allows them to be analyzed flow-sensitively. As long as the program is in single-threaded mode, they will be there.
What privatization does is that when multi-threaded mode is entered, they are removed from the local state and instead tracked flow-insensitively in the global invariant.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to have such a set_start? Couldn't it just call side to set those, which wouldn't overwrite but would accumulate soundly?

The start values are a weird thing altogether: in #370 I realized I have to use them to construct transformed right-hand sides, which also join those desired start values. Because all they really are, is right-hand sides with a constant value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why the mutex analysis should be incompatible with incremental because incremental analysis on the level of constraint systems shouldn't care whatsoever about the content of the constraint system.

Sorry, I mixed this up. I was thinking about the ids of threads that seem to be based on the location of the function to be executed. So this would be a problem for the consistency of ids across different versions in an incremental run.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything actionable left here? We need a long-term fix for these sorts of program harnessing type hacks, which is #178, but is there some direct fix needed to ensure earlyglobs publish is more robust? To me that seems out of scope for this pull request, or does this need fixing here and now?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After the discussions and decision on Friday, no, there's no need to address these things here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I think I can merge this PR. For fixing the incremental analysis to handle globals in the start variables properly, I will open a new issue because it does not really relate to reluctant destabilization but rather to the incremental analysis in general.

(* ignore @@ Pretty.printf "st: %d, data.st: %d\n" (List.length st) (List.length data.st); *)
List.iter (fun (v,d) ->
match GU.assoc_eq v data.st S.Var.equal with
Expand All @@ -255,9 +257,10 @@ module WP =
()
else (
ignore @@ Pretty.printf "Function %a has changed start state: %a\n" S.Var.pretty_trace v S.Dom.pretty_diff (d, d');
any_changed_start_state := true;
destabilize v
)
| None -> ignore @@ Pretty.printf "New start function %a not found in old list!\n" S.Var.pretty_trace v
| None -> any_changed_start_state := true; ignore @@ Pretty.printf "New start function %a not found in old list!\n" S.Var.pretty_trace v
) st;

print_endline "Destabilizing changed functions...";
Expand All @@ -269,26 +272,34 @@ module WP =
let obsolete_funs = filter_map (fun c -> match c.old with GFun (f,l) -> Some f | _ -> None) S.increment.changes.changed in
let removed_funs = filter_map (fun g -> match g with GFun (f,l) -> Some f | _ -> None) S.increment.changes.removed in
(* TODO: don't use string-based nodes, make obsolete of type Node.t BatSet.t *)
let obsolete = Set.union (Set.of_list (List.map (fun a -> Node.show_id (Function a)) obsolete_funs))
(Set.of_list (List.map (fun a -> Node.show_id (FunctionEntry a)) obsolete_funs)) in
let obsolete_ret = Set.of_list (List.map (fun f -> Node.show_id (Function f)) obsolete_funs) in
let obsolete_entry = Set.of_list (List.map (fun f -> Node.show_id (FunctionEntry f)) obsolete_funs) in

List.iter (fun a -> print_endline ("Obsolete function: " ^ a.svar.vname)) obsolete_funs;

(* Actually destabilize all nodes contained in changed functions. TODO only destabilize fun_... nodes *)
HM.iter (fun k v -> if Set.mem (S.Var.var_id k) obsolete then destabilize k) stable; (* TODO: don't use string-based nodes *)
let old_ret = Hashtbl.create 103 in
if not !any_changed_start_state && GobConfig.get_bool "incremental.reluctant" then (
(* save entries of changed functions in rho for the comparison whether the result has changed after a function specific solve *)
HM.iter (fun k v -> if Set.mem (S.Var.var_id k) obsolete_ret then ( (* TODO: don't use string-based nodes *)
let old_rho = HM.find rho k in
let old_infl = HM.find_default infl k VS.empty in
Hashtbl.replace old_ret k (old_rho, old_infl))) rho;
) else (
HM.iter (fun k _ -> if Set.mem (S.Var.var_id k) obsolete_entry then destabilize k) stable
);

(* We remove all unknowns for program points in changed or removed functions from rho, stable, infl and wpoint *)
(* TODO: don't use string-based nodes, make marked_for_deletion of type unit (Hashtbl.Make (Node)).t *)
let add_nodes_of_fun (functions: fundec list) (nodes)=
let add_nodes_of_fun (functions: fundec list) (nodes) withEntry =
let add_stmts (f: fundec) =
List.iter (fun s -> Hashtbl.replace nodes (Node.show_id (Statement s)) ()) (f.sallstmts)
in
List.iter (fun f -> Hashtbl.replace nodes (Node.show_id (FunctionEntry f)) (); Hashtbl.replace nodes (Node.show_id (Function f)) (); add_stmts f; Hashtbl.replace nodes (string_of_int (CfgTools.get_pseudo_return_id f)) ()) functions;
List.iter (fun f -> if withEntry then Hashtbl.replace nodes (Node.show_id (FunctionEntry f)) (); Hashtbl.replace nodes (Node.show_id (Function f)) (); add_stmts f; Hashtbl.replace nodes (string_of_int (CfgTools.get_pseudo_return_id f)) ()) functions;
in

let marked_for_deletion = Hashtbl.create 103 in
add_nodes_of_fun obsolete_funs marked_for_deletion;
add_nodes_of_fun removed_funs marked_for_deletion;
add_nodes_of_fun obsolete_funs marked_for_deletion (!any_changed_start_state || not (GobConfig.get_bool "incremental.reluctant"));
add_nodes_of_fun removed_funs marked_for_deletion true;

print_endline "Removing data for changed and removed functions...";
let delete_marked s = HM.iter (fun k v -> if Hashtbl.mem marked_for_deletion (S.Var.var_id k) then HM.remove s k ) s in (* TODO: don't use string-based nodes *)
Expand All @@ -297,10 +308,26 @@ module WP =
delete_marked wpoint;
delete_marked stable;

print_data data "Data after clean-up"

print_data data "Data after clean-up";

List.iter set_start st;

if not !any_changed_start_state && GobConfig.get_bool "incremental.reluctant" then (
(* solve on the return node of changed functions. Only destabilize influenced nodes outside the function if the analysis result changed *)
print_endline "Separately solving changed functions...";
Hashtbl.iter (fun x (old_rho, old_infl) -> ignore @@ Pretty.printf "test for %a\n" Node.pretty_trace (S.Var.node x);
solve x Widen;
if not (S.Dom.leq (HM.find rho x) old_rho) then (
HM.replace infl x old_infl;
destabilize x; HM.replace stable x ())
) old_ret;

print_endline "Final solve..."
)
);

List.iter set_start st;
if not (GobConfig.get_bool "incremental.load") then List.iter set_start st;
List.iter init vs;
(* If we have multiple start variables vs, we might solve v1, then while solving v2 we side some global which v1 depends on with a new value. Then v1 is no longer stable and we have to solve it again. *)
let i = ref 0 in
Expand Down
1 change: 1 addition & 0 deletions src/util/defaults.ml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ let _ = ()
; reg Incremental "incremental.save" "false" "Store incremental analysis results."
; reg Incremental "incremental.stable" "true" "Reuse the stable set and selectively destabilize it (recommended)."
; reg Incremental "incremental.wpoint" "false" "Reuse the wpoint set (not recommended). Reusing the wpoint will combine existing results at previous widening points."
; reg Incremental "incremental.reluctant" "true" "Destabilize nodes in changed functions reluctantly"

(* {4 category [Semantics]} *)
let _ = ()
Expand Down