From fb471a43aeda50c9b184b81d72762138e693a8ae Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 13:05:16 +0000 Subject: [PATCH 1/8] Add helper functions to provide additional upgrade checks. Signed-off-by: Mike McClurg --- ocaml/xapi/helpers.ml | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 3bc8f15d36e..213b94a88d6 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -252,10 +252,15 @@ let compare_int_lists : int list -> int list -> int = let first_non_zero is = List.fold_left (fun a b -> if (a<>0) then a else b) 0 is in first_non_zero (List.map2 compare a b) +let version_string_of : __context:Context.t -> API.ref_host -> string = + fun ~__context host -> + List.assoc Xapi_globs._product_version + (Db.Host.get_software_version ~__context ~self:host) + let version_of : __context:Context.t -> API.ref_host -> int list = fun ~__context host -> - let vs = List.assoc Xapi_globs._product_version (Db.Host.get_software_version ~__context ~self:host) in - List.map int_of_string (String.split '.' vs) + let vs = version_string_of ~__context host + in List.map int_of_string (String.split '.' vs) (* Compares host versions, analogous to Pervasives.compare. *) let compare_host_product_versions : __context:Context.t -> API.ref_host -> API.ref_host -> int = @@ -269,6 +274,19 @@ let max_version_in_pool : __context:Context.t -> int list = and versions = List.map (version_of ~__context) (Db.Host.get_all ~__context) in List.fold_left max_version [] versions +let rec string_of_int_list : int list -> string = function + [] -> "" + | (x::xs) -> + if xs == [] + then string_of_int x + else string_of_int x ^ "." ^ string_of_int_list xs + +let host_has_highest_version_in_pool : __context:Context.t -> host:API.ref_host -> bool = + fun ~__context ~host -> + let host_version = version_of ~__context host + and max_version = max_version_in_pool ~__context in + (compare_int_lists host_version max_version) >= 0 + (* Assertion functions which raise an exception if certain invariants are broken during an upgrade. *) let assert_rolling_upgrade_not_in_progress : __context:Context.t -> unit = @@ -278,12 +296,11 @@ let assert_rolling_upgrade_not_in_progress : __context:Context.t -> unit = let assert_host_has_highest_version_in_pool : __context:Context.t -> host:API.ref_host -> unit = fun ~__context ~host -> - let host_version = version_of ~__context host - and max_version = max_version_in_pool ~__context in - if (compare_int_lists host_version max_version) < 0 then + if not (host_has_highest_version_in_pool ~__context ~host:host) then raise (Api_errors.Server_error (Api_errors.not_supported_during_upgrade, [])) -let assert_host_versions_not_decreasing : __context:Context.t -> host_from:API.ref_host -> host_to:API.ref_host -> unit = +let assert_host_versions_not_decreasing : + __context:Context.t -> host_from:API.ref_host -> host_to:API.ref_host -> unit = fun ~__context ~host_from ~host_to -> let from_version = version_of ~__context host_from and to_version = version_of ~__context host_to in From 325adc8905470f57dba5eebd186579afadc0f085 Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 13:05:46 +0000 Subject: [PATCH 2/8] Fix EOL whitespace and some blank lines in helpers.ml Signed-off-by: Mike McClurg --- ocaml/xapi/helpers.ml | 172 +++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 87 deletions(-) diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 213b94a88d6..229e088eefb 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -43,13 +43,13 @@ let choose_network_name_for_pif device = let rpc_fun : (Http.request -> Unix.file_descr -> Xml.xml -> Xml.xml) option ref = ref None let get_rpc () = - match !rpc_fun with + match !rpc_fun with None -> failwith "No rpc set!" | Some f -> f (* Given a device-name and a VLAN, figure out what the dom0 device name is that corresponds to this: *) let get_dom0_network_device_name dev vlan = - if vlan = -1L then dev else Printf.sprintf "%s.%Ld" dev vlan + if vlan = -1L then dev else Printf.sprintf "%s.%Ld" dev vlan (* !! FIXME - trap proper MISSINGREFERENCE exception when this has been defined *) (* !! FIXME(2) - this code could be shared with the CLI? *) @@ -62,7 +62,7 @@ let get_management_ip_addr () = let addrs = Netdev.Addr.get (Xapi_inventory.lookup Xapi_inventory._management_interface) in let (addr,netmask) = List.hd addrs in Some (Unix.string_of_inet_addr addr) - with e -> None + with e -> None let get_localhost_uuid () = Xapi_inventory.lookup Xapi_inventory._installation_uuid @@ -71,22 +71,20 @@ let get_localhost ~__context : API.ref_host = let uuid = get_localhost_uuid () in Db.Host.get_by_uuid ~__context ~uuid -let get_localhost_ref = Db.Host.get_by_uuid ~uuid:(get_localhost_uuid ()) - -let make_rpc ~__context xml = +let make_rpc ~__context xml = let subtask_of = Ref.string_of (Context.get_task_id __context) in if Pool_role.is_master () then (* Master goes via domain socket *) (* !!! FIXME - maybe could make this go direct !!! *) - Xmlrpcclient.do_xml_rpc_unix ~subtask_of ~version:"1.0" + Xmlrpcclient.do_xml_rpc_unix ~subtask_of ~version:"1.0" ~filename:Xapi_globs.unix_domain_socket ~path:"/" xml else (* Slave has to go back to master via network *) - Xmlrpcclient.do_secure_xml_rpc + Xmlrpcclient.do_secure_xml_rpc ~subtask_of ~use_stunnel_cache:true ~version:"1.1" ~host:(Pool_role.get_master_address ()) - ~port:!Xapi_globs.https_port ~path:"/" xml + ~port:!Xapi_globs.https_port ~path:"/" xml (* No auth needed over unix domain socket *) (** Log into pool master using the client code, call a function @@ -103,34 +101,34 @@ let call_api_functions ~__context f = let do_master_login () = let session = Client.Client.Session.slave_login rpc (get_localhost ~__context) !Xapi_globs.pool_secret in require_explicit_logout := true; - session + session in let session_id = try - if Pool_role.is_master() then + if Pool_role.is_master() then begin let session_id = Context.get_session_id __context in - if Db.Session.get_pool ~__context ~self:session_id + if Db.Session.get_pool ~__context ~self:session_id then session_id else do_master_login () - end + end else let session_id = Context.get_session_id __context in (* read any attr to test if session is still valid *) ignore (Db.Session.get_pool ~__context ~self:session_id) ; session_id - with _ -> + with _ -> do_master_login () in (* let () = debug "login done" in *) - finally - (fun () -> f rpc session_id) + finally + (fun () -> f rpc session_id) (fun () -> (* debug "remote client call finished; logging out"; *) - if !require_explicit_logout + if !require_explicit_logout then Client.Client.Session.logout rpc session_id) -let call_emergency_mode_functions hostname f = +let call_emergency_mode_functions hostname f = let rpc xml = Xmlrpcclient.do_secure_xml_rpc ~version:"1.0" ~host:hostname ~port:!Xapi_globs.https_port ~path:"/" xml in let session_id = Client.Client.Session.slave_local_login rpc !Xapi_globs.pool_secret in @@ -206,7 +204,7 @@ type direct_pv_boot_t = { kernel: string; kernel_args: string; ramdisk: string o (** An 'indirect' PV boot (one that defers to a bootloader) has the following options: *) -type indirect_pv_boot_t = +type indirect_pv_boot_t = { bootloader: string; (** bootloader to use (eg "pygrub") *) extra_args: string; (** extra commandline arguments to pass bootloader for the kernel *) legacy_args: string; (** "legacy" args to cope with Zurich/Geneva guests *) @@ -215,7 +213,7 @@ type indirect_pv_boot_t = } (** A type which represents the boot method a guest is configured to use *) -type boot_method = +type boot_method = | HVM of hvm_boot_t | DirectPV of direct_pv_boot_t | IndirectPV of indirect_pv_boot_t @@ -228,7 +226,7 @@ let string_of_boot_method = function Printf.sprintf "Direct PV boot with kernel = %s; args = %s; ramdisk = %s" x.kernel x.kernel_args (string_of_option x.ramdisk) | IndirectPV x -> - Printf.sprintf "Indirect PV boot via bootloader %s; extra_args = %s; legacy_args = %s; bootloader_args = %s; VDIs = [ %s ]" + Printf.sprintf "Indirect PV boot via bootloader %s; extra_args = %s; legacy_args = %s; bootloader_args = %s; VDIs = [ %s ]" x.bootloader x.extra_args x.legacy_args x.pv_bootloader_args (String.concat "; " (List.map Ref.string_of x.vdis)) @@ -317,20 +315,20 @@ let get_boot_record_of_record ~__context ~string:lbr ~uuid:current_vm_uuid = end with (* xapi import/upgrade fallback: if sexpr parsing fails, try parsing using legacy xmlrpc format*) - Api_errors.Server_error (code,_) when code=Api_errors.field_type_error -> + Api_errors.Server_error (code,_) when code=Api_errors.field_type_error -> begin API.From.vM_t "ret_val" (Xml.parse_string lbr) end - with e -> + with e -> warn "Warning: exception '%s' parsing last booted record - returning current record instead" (ExnHelper.string_of_exn e); Db.VM.get_record ~__context ~self:(Db.VM.get_by_uuid ~__context ~uuid:current_vm_uuid) -let get_boot_record ~__context ~self = - let r = Db.VM.get_record_internal ~__context ~self in +let get_boot_record ~__context ~self = + let r = Db.VM.get_record_internal ~__context ~self in let lbr = get_boot_record_of_record ~__context ~string:r.Db_actions.vM_last_booted_record ~uuid:r.Db_actions.vM_uuid in (* CA-31903: we now use an unhealthy mix of fields from the boot_records and the live VM. In particular the VM is currently using dynamic_min and max from the live VM -- not the boot-time settings. *) - { lbr with + { lbr with API.vM_memory_target = 0L; API.vM_memory_dynamic_min = r.Db_actions.vM_memory_dynamic_min; API.vM_memory_dynamic_max = r.Db_actions.vM_memory_dynamic_max; @@ -339,10 +337,10 @@ let get_boot_record ~__context ~self = let set_boot_record ~__context ~self newbootrec = (* blank last_booted_record field in newbootrec, so we don't just keep encapsulating - old last_boot_records in new snapshots! *) + old last_boot_records in new snapshots! *) let newbootrec = {newbootrec with API.vM_last_booted_record=""; API.vM_bios_strings=[]} in - if rolling_upgrade_in_progress ~__context then - begin + if rolling_upgrade_in_progress ~__context then + begin (* during a rolling upgrade, there might be slaves in the pool who have not yet been upgraded to understand sexprs, so let's still talk using the legacy xmlrpc format. @@ -353,7 +351,7 @@ let set_boot_record ~__context ~self newbootrec = else begin (* if it's not a rolling upgrade, then we know everyone - else in the pool will understand s-expressions. + else in the pool will understand s-expressions. *) let sexpr = Xmlrpc_sexpr.xmlrpc_to_sexpr_str (API.To.vM_t newbootrec) in Db.VM.set_last_booted_record ~__context ~self ~value:sexpr @@ -361,7 +359,7 @@ let set_boot_record ~__context ~self newbootrec = () (** Inspect the current configuration of a VM and return a boot_method type *) -let boot_method_of_vm ~__context ~vm = +let boot_method_of_vm ~__context ~vm = if vm.API.vM_HVM_boot_policy <> "" then begin (* hvm_boot describes the HVM boot order. How? as a qemu-dm -boot param? *) let timeoffset = try List.assoc "timeoffset" vm.API.vM_platform with _ -> "0" in @@ -375,20 +373,20 @@ let boot_method_of_vm ~__context ~vm = DirectPV { kernel = kern; kernel_args = args; ramdisk = ramdisk } end else begin (* Extract the default kernel from the boot disk via bootloader *) - (* NB We allow multiple bootable VDIs, in which case the - bootloader gets to choose. Note that a VM may have no + (* NB We allow multiple bootable VDIs, in which case the + bootloader gets to choose. Note that a VM may have no bootable VDIs; this might happen for example if the bootloader intends to PXE boot *) - let bootable = List.filter - (fun self -> Db.VBD.get_bootable ~__context ~self) + let bootable = List.filter + (fun self -> Db.VBD.get_bootable ~__context ~self) vm.API.vM_VBDs in let non_empty = List.filter (fun self -> not (Db.VBD.get_empty ~__context ~self)) bootable in - let boot_vdis = - List.map + let boot_vdis = + List.map (fun self -> Db.VBD.get_VDI ~__context ~self) non_empty in - IndirectPV + IndirectPV { bootloader = vm.API.vM_PV_bootloader; extra_args = vm.API.vM_PV_args; legacy_args = vm.API.vM_PV_legacy_args; @@ -415,7 +413,7 @@ let has_booted_hvm_of_record ~__context r = && let boot_record = get_boot_record_of_record ~__context ~string:r.Db_actions.vM_last_booted_record ~uuid:r.Db_actions.vM_uuid in boot_record.API.vM_HVM_boot_policy <> "" - + let device_protocol_of_string domarch = match Domain.domarch_of_string domarch with | Domain.Arch_HVM | Domain.Arch_native -> Device_common.Protocol_Native @@ -424,19 +422,19 @@ let device_protocol_of_string domarch = let is_running ~__context ~self = Db.VM.get_domid ~__context ~self <> -1L -let devid_of_vif ~__context ~self = +let devid_of_vif ~__context ~self = int_of_string (Db.VIF.get_device ~__context ~self) exception Device_has_no_VIF -let vif_of_devid ~__context ~vm devid = +let vif_of_devid ~__context ~vm devid = let vifs = Db.VM.get_VIFs ~__context ~self:vm in let devs = List.map (fun self -> devid_of_vif ~__context ~self) vifs in let table = List.combine devs vifs in let has_vif = List.mem_assoc devid table in if not(has_vif) then raise Device_has_no_VIF - else List.assoc devid table + else List.assoc devid table (** Return the domid on the *local host* associated with a specific VM. Note that if this is called without the VM lock then the result is undefined: the @@ -475,8 +473,8 @@ let is_sr_shared ~__context ~self = List.length (Db.SR.get_PBDs ~__context ~self let get_shared_srs ~__context = let srs = Db.SR.get_all ~__context in List.filter (fun self -> is_sr_shared ~__context ~self) srs - -let get_pool ~__context = List.hd (Db.Pool.get_all ~__context) + +let get_pool ~__context = List.hd (Db.Pool.get_all ~__context) let get_main_ip_address ~__context = try Pool_role.get_master_address () with _ -> "127.0.0.1" @@ -490,17 +488,17 @@ let is_pool_master ~__context ~host = (** Indicates whether ballooning is enabled for the given virtual machine. *) let ballooning_enabled_for_vm ~__context vm_record = true -let get_vm_metrics ~__context ~self = +let get_vm_metrics ~__context ~self = let metrics = Db.VM.get_metrics ~__context ~self in if metrics = Ref.null then failwith "Could not locate VM_metrics object for VM: internal error" else metrics -let get_vbd_metrics ~__context ~self = +let get_vbd_metrics ~__context ~self = let metrics = Db.VBD.get_metrics ~__context ~self in if metrics = Ref.null then failwith "Could not locate VBD_metrics object for VBD: internal error" else metrics -let get_vif_metrics ~__context ~self = +let get_vif_metrics ~__context ~self = let metrics = Db.VIF.get_metrics ~__context ~self in if metrics = Ref.null then failwith "Could not locate VIF_metrics object for VIF: internal error" @@ -527,11 +525,11 @@ let get_pool_secret () = end (* Checks if an SR exists, returning an SR ref option (None if it is missing) *) -let check_sr_exists ~__context ~self = +let check_sr_exists ~__context ~self = try ignore(Db.SR.get_uuid ~__context ~self); Some self with _ -> None (* Returns an SR suitable for suspending this VM *) -let choose_suspend_sr ~__context ~vm = +let choose_suspend_sr ~__context ~vm = (* If the Pool.suspend_image_SR exists, use that. Otherwise try the Host.suspend_image_SR *) let pool = get_pool ~__context in let pool_sr = Db.Pool.get_suspend_image_SR ~__context ~self:pool in @@ -541,11 +539,11 @@ let choose_suspend_sr ~__context ~vm = match check_sr_exists ~__context ~self:pool_sr, check_sr_exists ~__context ~self:host_sr with | Some x, _ -> x | _, Some x -> x - | None, None -> + | None, None -> raise (Api_errors.Server_error (Api_errors.vm_no_suspend_sr, [Ref.string_of vm])) (* Returns an SR suitable for receiving crashdumps of this VM *) -let choose_crashdump_sr ~__context ~vm = +let choose_crashdump_sr ~__context ~vm = (* If the Pool.crashdump_SR exists, use that. Otherwise try the Host.crashdump_SR *) let pool = get_pool ~__context in let pool_sr = Db.Pool.get_crash_dump_SR ~__context ~self:pool in @@ -554,7 +552,7 @@ let choose_crashdump_sr ~__context ~vm = match check_sr_exists ~__context ~self:pool_sr, check_sr_exists ~__context ~self:host_sr with | Some x, _ -> x | _, Some x -> x - | None, None -> + | None, None -> raise (Api_errors.Server_error (Api_errors.vm_no_crashdump_sr, [Ref.string_of vm])) (* return the operations filtered for cancels functions *) @@ -562,7 +560,7 @@ let cancel_tasks ~__context ~ops ~all_tasks_in_db (* all tasks in database *) ~t let cancel_splitset_taskid set1 taskids = let su1 = ref [] and c = ref false in let into (e, _) l = List.mem e l in - (* If it's a task we want to explicitly cancel or a task which doesn't exist in the + (* If it's a task we want to explicitly cancel or a task which doesn't exist in the database at all then we should cancel it. *) List.iter (fun s1 -> if into s1 taskids || not(List.mem (Ref.of_string (fst s1)) all_tasks_in_db) then c := true else su1 := s1 :: !su1) set1; !su1, !c @@ -571,15 +569,15 @@ let cancel_tasks ~__context ~ops ~all_tasks_in_db (* all tasks in database *) ~t if got_common then set unique_ops -(** Returns true if the media is removable. +(** Returns true if the media is removable. Currently this just means "CD" but might change in future? *) let is_removable ~__context ~vbd = Db.VBD.get_type ~__context ~self:vbd = `CD (** Returns true if this SR is the XenSource Tools SR *) -let is_tools_sr ~__context ~sr = +let is_tools_sr ~__context ~sr = let other_config = Db.SR.get_other_config ~__context ~self:sr in (* Miami GA *) - List.mem_assoc Xapi_globs.tools_sr_tag other_config + List.mem_assoc Xapi_globs.tools_sr_tag other_config (* Miami beta2 and earlier: *) || (List.mem_assoc Xapi_globs.xensource_internal other_config) @@ -590,28 +588,28 @@ let is_valid_MAC mac = List.length l = 6 && (List.fold_left (fun acc s -> acc && String.length s = 2 && validchar s.[0] && validchar s.[1]) true l) (** Returns true if the supplied IP address looks like one of mine *) -let this_is_my_address address = +let this_is_my_address address = let inet_addrs = Netdev.Addr.get (Xapi_inventory.lookup Xapi_inventory._management_interface) in let addresses = List.map Unix.string_of_inet_addr (List.map fst inet_addrs) in List.mem address addresses (** Returns the list of hosts thought to be live *) -let get_live_hosts ~__context = +let get_live_hosts ~__context = let hosts = Db.Host.get_all ~__context in List.filter (fun self -> let metrics = Db.Host.get_metrics ~__context ~self in try Db.Host_metrics.get_live ~__context ~self:metrics with _ -> false) hosts (** Return the first IPv4 address we find for a hostname *) -let gethostbyname host = +let gethostbyname host = let throw_resolve_error() = failwith (Printf.sprintf "Couldn't resolve hostname: %s" host) in let he = try Unix.gethostbyname host with _ -> throw_resolve_error() in if Array.length he.Unix.h_addr_list = 0 then throw_resolve_error(); - Unix.string_of_inet_addr he.Unix.h_addr_list.(0) + Unix.string_of_inet_addr he.Unix.h_addr_list.(0) (** Indicate whether VM.clone should be allowed on suspended VMs *) -let clone_suspended_vm_enabled ~__context = +let clone_suspended_vm_enabled ~__context = try let pool = get_pool ~__context in let other_config = Db.Pool.get_other_config ~__context ~self:pool in @@ -634,17 +632,17 @@ let find_secondary_partition () = try let other_partition,_ = Forkhelpers.execute_command_get_output "/opt/xensource/libexec/find-partition" ["-p"; "alternate"] in (* Sanity check: does it exist? *) - let () = + let () = if not (Sys.file_exists other_partition) then raise (File_doesnt_exist other_partition) in other_partition with e -> debug "Cannot find secondary system image partition: %s" (Printexc.to_string e); - raise (Api_errors.Server_error(Api_errors.cannot_find_oem_backup_partition, + raise (Api_errors.Server_error(Api_errors.cannot_find_oem_backup_partition, [Printexc.to_string e])) -let call_script ?(log_successful_output=true) script args = +let call_script ?(log_successful_output=true) script args = try Unix.access script [ Unix.X_OK ]; (* Use the same $PATH as xapi *) @@ -652,7 +650,7 @@ let call_script ?(log_successful_output=true) script args = let output, _ = Forkhelpers.execute_command_get_output ~env script args in if log_successful_output then debug "%s %s succeeded [ output = '%s' ]" script (String.concat " " args) output; output - with + with | Unix.Unix_error _ as e -> debug "Assuming script %s doesn't exist: caught %s" script (ExnHelper.string_of_exn e); raise e @@ -661,12 +659,12 @@ let call_script ?(log_successful_output=true) script args = raise e (* Repeatedly bisect a range to find the maximum value for which the monotonic function returns true *) -let rec bisect f lower upper = +let rec bisect f lower upper = let ( /* ) = Int64.div and ( -* ) = Int64.sub and ( +* ) = Int64.add in assert (lower <= upper); - if upper -* lower < 2L + if upper -* lower < 2L then (if f upper then upper else lower) - else + else (* there must be a distinct midpoint integer *) let mid = (upper +* lower) /* 2L in assert ((lower < mid) && (mid < upper)); @@ -675,21 +673,21 @@ let rec bisect f lower upper = else bisect f lower mid (* All non best-effort VMs with always_run set should be kept running at all costs *) -let vm_should_always_run always_run restart_priority = +let vm_should_always_run always_run restart_priority = always_run && (restart_priority <> Constants.ha_restart_best_effort) (* Returns true if the specified VM is "protected" (non best-effort) by xHA *) -let is_xha_protected ~__context ~self = +let is_xha_protected ~__context ~self = vm_should_always_run (Db.VM.get_ha_always_run ~__context ~self) (Db.VM.get_ha_restart_priority ~__context ~self) let is_xha_protected_r record = vm_should_always_run record.API.vM_ha_always_run record.API.vM_ha_restart_priority open Listext -let subset a b = List.fold_left (fun acc x -> acc && (List.mem x b)) true a +let subset a b = List.fold_left (fun acc x -> acc && (List.mem x b)) true a (* Only returns true if the SR is marked as shared, all hosts have PBDs and all PBDs are currently_attached. Is used to prevent a non-shared disk being added to a protected VM *) -let is_sr_properly_shared ~__context ~self = +let is_sr_properly_shared ~__context ~self = let shared = Db.SR.get_shared ~__context ~self in if not shared then begin false @@ -709,9 +707,9 @@ let get_pif_underneath_vlan ~__context vlan_pif_ref = let vlan_rec = Db.PIF.get_VLAN_master_of ~__context ~self:vlan_pif_ref in Db.VLAN.get_tagged_PIF ~__context ~self:vlan_rec -(* Only returns true if the network is shared properly; it must either be fully virtual (no PIFs) or +(* Only returns true if the network is shared properly; it must either be fully virtual (no PIFs) or every host must have a currently_attached PIF *) -let is_network_properly_shared ~__context ~self = +let is_network_properly_shared ~__context ~self = let pifs = Db.Network.get_PIFs ~__context ~self in let plugged_pifs = List.filter (fun pif -> Db.PIF.get_currently_attached ~__context ~self:pif) pifs in let plugged_hosts = List.setify (List.map (fun pif -> Db.PIF.get_host ~__context ~self:pif) plugged_pifs) in @@ -723,7 +721,7 @@ let is_network_properly_shared ~__context ~self = (* || pifs = [] *) (* It's NOT ok to be fully virtual: see CA-20703. Change this in sync with assert_can_boot_here *) || not missing_pifs -let vm_assert_agile ~__context ~self = +let vm_assert_agile ~__context ~self = (* All referenced VDIs should be in shared SRs *) List.iter (fun vbd -> if not(Db.VBD.get_empty ~__context ~self:vbd) then begin @@ -741,7 +739,7 @@ let vm_assert_agile ~__context ~self = (Db.VM.get_VIFs ~__context ~self) (* Select an item from a list with a probability proportional to the items weight / total weight of all items *) -let weighted_random_choice weighted_items (* list of (item, integer) weight *) = +let weighted_random_choice weighted_items (* list of (item, integer) weight *) = let total_weight, acc' = List.fold_left (fun (total, acc) (x, weight) -> (total + weight), (x, total + weight) :: acc) (0, []) weighted_items in let cumulative = List.rev acc' in @@ -760,7 +758,7 @@ let loadavg () = (* Toggled by an explicit Host.disable call to prevent a master restart making us bounce back *) let user_requested_host_disable = ref false -let consider_enabling_host_nolock ~__context = +let consider_enabling_host_nolock ~__context = debug "Helpers.consider_enabling_host_nolock called"; (* If HA is enabled only consider marking the host as enabled if all the storage plugs in successfully. Disabled hosts are excluded from the HA planning calculations. Otherwise a host may boot, @@ -770,7 +768,7 @@ let consider_enabling_host_nolock ~__context = let localhost = get_localhost ~__context in let pbds = Db.Host.get_PBDs ~__context ~self:localhost in Xapi_local_pbd_state.resynchronise ~__context ~pbds; - let all_pbds_ok = List.fold_left (&&) true (List.map (fun self -> Db.PBD.get_currently_attached ~__context ~self) pbds) in + let all_pbds_ok = List.fold_left (&&) true (List.map (fun self -> Db.PBD.get_currently_attached ~__context ~self) pbds) in if not !user_requested_host_disable && (not ha_enabled || all_pbds_ok) then begin (* If we were in the middle of a shutdown or reboot with HA enabled but somehow we failed @@ -802,7 +800,7 @@ let consider_enabling_host_nolock ~__context = end (** Attempt to minimise the number of times we call consider_enabling_host_nolock *) -let consider_enabling_host = +let consider_enabling_host = At_least_once_more.make "consider_enabling_host" (fun () -> Server_helpers.exec_with_new_task "consider_enabling_host" @@ -811,13 +809,13 @@ let consider_enabling_host = let consider_enabling_host_request ~__context = At_least_once_more.again consider_enabling_host -let consider_enabling_host ~__context = +let consider_enabling_host ~__context = debug "Helpers.consider_enabling_host called"; consider_enabling_host_request ~__context -let local_storage_exists () = +let local_storage_exists () = (try ignore(Unix.stat (Xapi_globs.xapi_blob_location)); true - with _ -> false) + with _ -> false) let touch_file fname = try @@ -828,7 +826,7 @@ let touch_file fname = with | e -> (warn "Unable to touch ready file '%s': %s" fname (Printexc.to_string e)) -let vm_to_string __context vm = +let vm_to_string __context vm = let str = Ref.string_of vm in if not (Db.is_valid_ref __context vm) @@ -849,7 +847,7 @@ let vm_string_to_assoc vm_string = | SExpr.Node l -> List.map assoc_of_node l | _ -> raise (Api_errors.Server_error(Api_errors.invalid_value ,["Invalid vm_string"])) -let i_am_srmaster ~__context ~sr = +let i_am_srmaster ~__context ~sr = (* Assuming there is a plugged in PBD on this host then we are an 'srmaster' IFF: we are a pool master and this is a shared SR OR this is a non-shared SR *) @@ -870,7 +868,7 @@ let copy_snapshot_metadata rpc session_id ?lookup_table ~src_record ~dst_ref = ~transportable_snapshot_id:src_record.API.vM_transportable_snapshot_id (** Remove all entries in this table except the valid_keys *) -let remove_other_keys table valid_keys = +let remove_other_keys table valid_keys = let keys = Hashtbl.fold (fun k v acc -> k :: acc) table [] in List.iter (fun k -> if not (List.mem k valid_keys) then Hashtbl.remove table k) keys @@ -885,8 +883,8 @@ let update_vswitch_controller ~__context ~host = (Printexc.to_string e) (Db.Host.get_name_label ~__context ~self:host) -let set_vm_uncooperative ~__context ~self ~value = - let current_value = +let set_vm_uncooperative ~__context ~self ~value = + let current_value = let oc = Db.VM.get_other_config ~__context ~self in List.mem_assoc "uncooperative" oc && (bool_of_string (List.assoc "uncooperative" oc)) in if value <> current_value then begin From 0dd151b784fcd6987d75cdfe40ee507d78048cfa Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 14:46:38 +0000 Subject: [PATCH 3/8] Add rolling pool upgrade assertions to start_on and resume_on at the message_forwarding layer. Signed-off-by: Mike McClurg --- ocaml/xapi/message_forwarding.ml | 54 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 4b76819f3d9..3b860900711 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -1071,6 +1071,8 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct let start_on ~__context ~vm ~host ~start_paused ~force = info "VM.start_on: VM = '%s'; host '%s'" (vm_uuid ~__context vm) (host_uuid ~__context host); + if Helpers.rolling_upgrade_in_progress ~__context then + Helpers.assert_host_has_highest_version_in_pool ~__context ~host:host ; let local_fn = Local.VM.start_on ~vm ~host ~start_paused ~force in with_vm_operation ~__context ~self:vm ~doc:"VM.start_on" ~op:`start_on (fun () -> @@ -1361,31 +1363,33 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct ~priority:1L ~cls:`VM ~obj_uuid:uuid ~body:message_body) with _ -> ()); Monitor_rrds.push_rrd __context (Db.VM.get_uuid ~__context ~self:vm) - let resume_on ~__context ~vm ~host ~start_paused ~force = - info "VM.resume_on: VM = '%s'; host = '%s'" (vm_uuid ~__context vm) (host_uuid ~__context host); - let local_fn = Local.VM.resume_on ~vm ~host ~start_paused ~force in - with_vm_operation ~__context ~self:vm ~doc:"VM.resume_on" ~op:`resume_on - (fun () -> - with_vbds_marked ~__context ~vm ~doc:"VM.resume_on" ~op:`attach - (fun vbds -> - let snapshot = Helpers.get_boot_record ~__context ~self:vm in - reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_resume - (fun () -> - do_op_on ~local_fn ~__context ~host - (fun session_id rpc -> Client.VM.resume_on rpc session_id vm host start_paused force)))); - update_vbd_operations ~__context ~vm; - update_vif_operations ~__context ~vm; - let uuid = Db.VM.get_uuid ~__context ~self:vm in - let message_body = - Printf.sprintf "VM '%s' resumed on host: %s (uuid: %s)" - (Db.VM.get_name_label ~__context ~self:vm) - (Db.Host.get_name_label ~__context ~self:host) - (Db.Host.get_uuid ~__context ~self:host) - in - (try ignore(Xapi_message.create ~__context ~name:Api_messages.vm_resumed - ~priority:1L ~cls:`VM ~obj_uuid:uuid ~body:message_body) with _ -> ()); - Monitor_rrds.push_rrd __context (Db.VM.get_uuid ~__context ~self:vm) - + let resume_on ~__context ~vm ~host ~start_paused ~force = + info "VM.resume_on: VM = '%s'; host = '%s'" (vm_uuid ~__context vm) (host_uuid ~__context host); + if Helpers.rolling_upgrade_in_progress ~__context then + Helpers.assert_host_has_highest_version_in_pool ~__context ~host:host ; + let local_fn = Local.VM.resume_on ~vm ~host ~start_paused ~force in + with_vm_operation ~__context ~self:vm ~doc:"VM.resume_on" ~op:`resume_on + (fun () -> + with_vbds_marked ~__context ~vm ~doc:"VM.resume_on" ~op:`attach + (fun vbds -> + let snapshot = Helpers.get_boot_record ~__context ~self:vm in + reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_resume + (fun () -> + do_op_on ~local_fn ~__context ~host + (fun session_id rpc -> Client.VM.resume_on rpc session_id vm host start_paused force)))); + update_vbd_operations ~__context ~vm; + update_vif_operations ~__context ~vm; + let uuid = Db.VM.get_uuid ~__context ~self:vm in + let message_body = + Printf.sprintf "VM '%s' resumed on host: %s (uuid: %s)" + (Db.VM.get_name_label ~__context ~self:vm) + (Db.Host.get_name_label ~__context ~self:host) + (Db.Host.get_uuid ~__context ~self:host) + in + (try ignore(Xapi_message.create ~__context ~name:Api_messages.vm_resumed + ~priority:1L ~cls:`VM ~obj_uuid:uuid ~body:message_body) with _ -> ()); + Monitor_rrds.push_rrd __context (Db.VM.get_uuid ~__context ~self:vm) + let pool_migrate ~__context ~vm ~host ~options = info "VM.pool_migrate: VM = '%s'; host = '%s'" (vm_uuid ~__context vm) (host_uuid ~__context host); let local_fn = Local.VM.pool_migrate ~vm ~host ~options in From 2c4b348a8c0b025e30149b43eabf886c7ee9c4a9 Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 14:53:04 +0000 Subject: [PATCH 4/8] Call the proper Helpers.get_localhost function. Signed-off-by: Mike McClurg --- ocaml/xapi/xapi_vm.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 2bdeae18540..e0f7fc6cc89 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -215,7 +215,7 @@ let unpause ~__context ~vm = let start ~__context ~vm ~start_paused:paused ~force = if Helpers.rolling_upgrade_in_progress ~__context then Helpers.assert_host_has_highest_version_in_pool ~__context - ~host:(Helpers.get_localhost_ref ~__context) ; + ~host:(Helpers.get_localhost ~__context) ; License_check.with_vm_license_check ~__context vm (fun () -> Local_work_queue.wait_in_line Local_work_queue.normal_vm_queue (Printf.sprintf "VM.start %s" (Context.string_of_task __context)) @@ -761,10 +761,10 @@ let suspend ~__context ~vm = ) () ) -let resume ~__context ~vm ~start_paused ~force = +let resume ~__context ~vm ~start_paused ~force = if Helpers.rolling_upgrade_in_progress ~__context then Helpers.assert_host_has_highest_version_in_pool ~__context - ~host:(Helpers.get_localhost_ref ~__context) ; + ~host:(Helpers.get_localhost ~__context) ; Local_work_queue.wait_in_line Local_work_queue.long_running_queue (Printf.sprintf "VM.resume %s" (Context.string_of_task __context)) (fun () -> From b0258add96288d9a37c4311ddf129dbc903db906 Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 14:55:48 +0000 Subject: [PATCH 5/8] Fix whitespace. Signed-off-by: Mike McClurg --- ocaml/xapi/xapi_vm.ml | 206 +++++++++++++++++++++--------------------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index e0f7fc6cc89..6dbb18881c4 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -33,7 +33,7 @@ exception InvalidOperation of string let assert_operation_valid = Xapi_vm_lifecycle.assert_operation_valid -let update_allowed_operations ~__context ~self = +let update_allowed_operations ~__context ~self = Helpers.log_exn_continue "updating allowed operations of VBDs/VIFs/VDIs in VM.update_allowed_operations" (fun () -> List.iter @@ -49,13 +49,13 @@ let update_allowed_operations ~__context ~self = (Db.VM.get_VIFs ~__context ~self) ) (); Xapi_vm_lifecycle.update_allowed_operations ~__context ~self - -let assert_can_boot_here ~__context ~self ~host = + +let assert_can_boot_here ~__context ~self ~host = let snapshot = Db.VM.get_record ~__context ~self in assert_can_boot_here ~__context ~self ~host ~snapshot - -let retrieve_wlb_recommendations ~__context ~vm = + +let retrieve_wlb_recommendations ~__context ~vm = let snapshot = Db.VM.get_record ~__context ~self:vm in retrieve_wlb_recommendations ~__context ~vm ~snapshot @@ -66,24 +66,24 @@ let immediate_complete ~__context = Helpers.progress ~__context (0.0 -. 1.0) (* API *) -let set_actions_after_shutdown ~__context ~self ~value = +let set_actions_after_shutdown ~__context ~self ~value = Db.VM.set_actions_after_shutdown ~__context ~self ~value -let set_actions_after_reboot ~__context ~self ~value = +let set_actions_after_reboot ~__context ~self ~value = Db.VM.set_actions_after_reboot ~__context ~self ~value -let set_actions_after_crash ~__context ~self ~value = +let set_actions_after_crash ~__context ~self ~value = set_actions_after_crash ~__context ~self ~value -let set_is_a_template ~__context ~self ~value = +let set_is_a_template ~__context ~self ~value = set_is_a_template ~__context ~self ~value -let validate_restart_priority include_empty_string x = +let validate_restart_priority include_empty_string x = if not(List.mem x (Constants.ha_valid_restart_priorities @ (if include_empty_string then [ "" ] else []))) then raise (Api_errors.Server_error(Api_errors.invalid_value, [ "ha_restart_priority"; x ])) -let set_ha_always_run ~__context ~self ~value = +let set_ha_always_run ~__context ~self ~value = let current = Db.VM.get_ha_always_run ~__context ~self in let prio = Db.VM.get_ha_restart_priority ~__context ~self in debug "set_ha_always_run current=%b value=%b" current value; - if not current && value then begin + if not current && value then begin if prio <> Constants.ha_restart_best_effort then Xapi_ha_vm_failover.assert_new_vm_preserves_ha_plan ~__context self; validate_restart_priority false prio @@ -105,29 +105,29 @@ let assert_ha_always_run_is_true ~__context ~vm = let assert_ha_always_run_is_false ~__context ~vm = set_ha_always_run ~__context ~self:vm ~value:false -let set_ha_restart_priority ~__context ~self ~value = +let set_ha_restart_priority ~__context ~self ~value = let ha_always_run = Db.VM.get_ha_always_run ~__context ~self in validate_restart_priority (not ha_always_run) value; let current = Db.VM.get_ha_restart_priority ~__context ~self in if true && ha_always_run - && current = Constants.ha_restart_best_effort + && current = Constants.ha_restart_best_effort && value <> Constants.ha_restart_best_effort then begin Xapi_ha_vm_failover.assert_new_vm_preserves_ha_plan ~__context self; let pool = Helpers.get_pool ~__context in if Db.Pool.get_ha_enabled ~__context ~self:pool then let (_: bool) = Xapi_ha_vm_failover.update_pool_status ~__context in () end; - - if current <> value + + if current <> value then Db.VM.set_ha_restart_priority ~__context ~self ~value let compute_memory_overhead = compute_memory_overhead open Xapi_vm_memory_constraints -let set_memory_static_range ~__context ~self ~min ~max = +let set_memory_static_range ~__context ~self ~min ~max = (* Called on the master only when the VM is offline *) if Db.VM.get_power_state ~__context ~self <> `Halted then failwith "assertion_failed: set_memory_static_range should only be \ @@ -173,23 +173,23 @@ let set_memory_limits ~__context ~self update_memory_overhead ~__context ~vm:self (* CA-12940: sanity check to make sure this never happens again *) -let assert_power_state_is ~__context ~vm ~expected = +let assert_power_state_is ~__context ~vm ~expected = let actual = Db.VM.get_power_state ~__context ~self:vm in - if actual <> expected - then raise (Api_errors.Server_error(Api_errors.vm_bad_power_state, - [ Ref.string_of vm; - Record_util.power_to_string expected; + if actual <> expected + then raise (Api_errors.Server_error(Api_errors.vm_bad_power_state, + [ Ref.string_of vm; + Record_util.power_to_string expected; Record_util.power_to_string actual ])) (* If HA is enabled on the Pool and the VM is marked as always_run then block the action *) -let assert_not_ha_protected ~__context ~vm = +let assert_not_ha_protected ~__context ~vm = let pool = Helpers.get_pool ~__context in let always_run = Db.VM.get_ha_always_run ~__context ~self:vm in let priority = Db.VM.get_ha_restart_priority ~__context ~self:vm in if Db.Pool.get_ha_enabled ~__context ~self:pool && (Helpers.vm_should_always_run always_run priority) then raise (Api_errors.Server_error(Api_errors.vm_is_protected, [ Ref.string_of vm ])) -let pause_already_locked ~__context ~vm = +let pause_already_locked ~__context ~vm = let domid = Helpers.domid_of_vm ~__context ~self:vm in with_xc (fun xc -> Domain.pause ~xc domid); @@ -247,7 +247,7 @@ let start ~__context ~vm ~start_paused:paused ~force = ~progress_cb:(TaskHelper.set_progress ~__context) ~pcidevs:None ~__context ~vm ~snapshot; delete_guest_metrics ~__context ~self:vm; - let localhost = Helpers.get_localhost ~__context in + let localhost = Helpers.get_localhost ~__context in Helpers.call_api_functions ~__context (fun rpc session_id -> Client.VM.atomic_set_resident_on rpc session_id vm localhost); @@ -269,13 +269,13 @@ let start ~__context ~vm ~start_paused:paused ~force = (** For VM.start_on and VM.resume_on the message forwarding layer should only forward here if 'host' = localhost *) -let assert_host_is_localhost ~__context ~host = +let assert_host_is_localhost ~__context ~host = let localhost = Helpers.get_localhost ~__context in if host <> localhost then let msg = "Error in message forwarding layer: host parameter was not localhost" in raise (Api_errors.Server_error (Api_errors.internal_error, [ msg ])) -let start_on ~__context ~vm ~host ~start_paused ~force = +let start_on ~__context ~vm ~host ~start_paused ~force = (* If we modify this to support start_on other-than-localhost, insert a precheck to insure that we're starting on an appropriately versioned host during an upgrade, as per @@ -284,7 +284,7 @@ let start_on ~__context ~vm ~host ~start_paused ~force = start ~__context ~vm ~start_paused ~force module TwoPhase = struct - (* Reboots and shutdowns come in two phases: + (* Reboots and shutdowns come in two phases: in_guest: where the domain is asked to shutdown quietly in_dom0: where the domain is blown away and, in the case of reboot, recreated. We wish to serialise only the dom0 part of these operations. @@ -317,7 +317,7 @@ module TwoPhase = struct running)) (** Called before a regular synchronous reboot/shutdown to simulate parallel in-guest shutdowns *) - let simulate_internal_shutdown domid = + let simulate_internal_shutdown domid = Helpers.log_exn_continue (Printf.sprintf "simulate_internal_shutdown domid=%d" domid) (fun () -> match Xapi_fist.simulate_internal_shutdown () with @@ -350,7 +350,7 @@ module Reboot = struct TwoPhase.simulate_internal_shutdown domid; debug "%s Reboot.in_guest domid=%d clean=%b" api_call_name domid clean; - (* NB a parallel internal halt may leave the domid as -1. If so then there's no work for us + (* NB a parallel internal halt may leave the domid as -1. If so then there's no work for us to do here. *) if domid <> -1 then begin if clean then begin @@ -373,7 +373,7 @@ module Reboot = struct (with_xs (fun xs -> xs.Xs.write (Hotplug.get_private_path domid ^ "/" ^ Xapi_globs.artificial_reboot_delay) "0")); (* The domain might be killed by the event thread. Again, this is ok. *) Helpers.log_exn_continue (Printf.sprintf "Xc.domain_shutdown domid=%d Xc.Reboot" domid) - (fun () -> + (fun () -> with_xc (fun xc -> Xc.domain_shutdown xc domid Xc.Reboot) ) () end @@ -394,12 +394,12 @@ module Reboot = struct the current and new values in the current_snapshot. Just in case someone raced with us and bumped the static_max *again* we cap it to the reserved value. *) - let new_mem = - if new_snapshot.API.vM_memory_static_max > current_snapshot.API.vM_memory_static_max + let new_mem = + if new_snapshot.API.vM_memory_static_max > current_snapshot.API.vM_memory_static_max then current_snapshot.API.vM_memory_static_max (* reserved value *) else new_snapshot.API.vM_memory_static_max (* new value is smaller *) in let new_snapshot = { new_snapshot with API.vM_memory_static_max = new_mem } in - + (* Before we destroy the old domain we check which PCI devices were plugged in *) let pcidevs = with_xc_and_xs (fun xc xs -> Device.PCI.list xc xs domid) in debug "Listed PCI devices: [ %s ]" (String.concat ", " (List.map (fun (x, dev) -> string_of_int x ^ "/" ^ (Device.PCI.to_string dev)) pcidevs)); @@ -409,22 +409,22 @@ module Reboot = struct (* CA-13585: prevent glitch where power-state goes to Halted in the middle of a reboot. If an error causes us to leave this function then the event thread should resynchronise the VM record properly. *) - + (* Make sure the monitoring stuff doesn't send back the RRD to the master if we're rebooting *) let uuid = current_snapshot.API.vM_uuid in Mutex.execute Monitor.lock (fun () -> Monitor.rebooting_vms := Rrd_shared.StringSet.add uuid !Monitor.rebooting_vms); - + Xapi_hooks.vm_pre_destroy ~__context ~reason:(if clean then Xapi_hooks.reason__clean_reboot else Xapi_hooks.reason__hard_reboot) ~vm; debug "Destroying domain..."; with_xc_and_xs (fun xc xs -> Vmops.destroy ~__context ~xc ~xs ~self:vm ~clear_currently_attached:false domid `Running); - + (* At this point the domain has been destroyed but the VM is still marked as Running. If any error occurs then we must remember to clean everything up... *) - + (* Set the new boot record *) debug "Setting boot record"; Helpers.set_boot_record ~__context ~self:vm new_snapshot; - + debug "%s phase 2/3: starting new domain" api_call_name; begin try @@ -437,16 +437,16 @@ module Reboot = struct Db.VM.set_power_state ~__context ~self:vm ~value:`Halted; raise e end; - + Mutex.execute Monitor.lock (fun () -> Monitor.rebooting_vms := Rrd_shared.StringSet.remove uuid !Monitor.rebooting_vms); - + (* NB domid will be fresh *) let domid = Helpers.domid_of_vm ~__context ~self:vm in - + try delete_guest_metrics ~__context ~self:vm; debug "%s phase 3/3: unpausing new domain (domid %d)" api_call_name domid; - with_xc_and_xs (fun xc xs -> + with_xc_and_xs (fun xc xs -> Domain.unpause ~xc domid; ); Db.VM.set_resident_on ~__context ~self:vm ~value:localhost; @@ -455,19 +455,19 @@ module Reboot = struct with exn -> error "Caught exception during %s: %s" api_call_name (ExnHelper.string_of_exn exn); with_xc_and_xs (fun xc xs -> Vmops.destroy ~__context ~xc ~xs ~self:vm domid `Halted); - raise exn + raise exn ) (** In the synchronous API call paths, acquire the VM lock and see if the VM hasn't rebooted yet. If necessary we reboot it here. *) - let in_dom0_already_queued args = - Locking_helpers.with_lock args.TwoPhase.vm - (fun _ _ -> + let in_dom0_already_queued args = + Locking_helpers.with_lock args.TwoPhase.vm + (fun _ _ -> if TwoPhase.is_vm_running args then debug "VM %s has already rebooted: taking no action" (Ref.string_of args.TwoPhase.vm) else in_dom0_already_locked args) () - (** In the synchronouse API call paths, wait in the domU_internal_shutdown_queue and then attempt + (** In the synchronouse API call paths, wait in the domU_internal_shutdown_queue and then attempt to reboot the VM. NB this is the same queue used by the event thread. *) let in_dom0 args = Local_work_queue.wait_in_line Local_work_queue.domU_internal_shutdown_queue @@ -488,12 +488,12 @@ module Shutdown = struct TwoPhase.simulate_internal_shutdown domid; debug "%s Shutdown.in_guest domid=%d clean=%b" api_call_name domid clean; - (* NB a parallel internal halt may leave the domid as -1. If so then there's no work for us + (* NB a parallel internal halt may leave the domid as -1. If so then there's no work for us to do here. *) if domid <> -1 then begin if clean then begin debug "%s: phase 1/2: waiting for the domain to shutdown" api_call_name; - + match with_xal (fun xal -> Vmops.clean_shutdown_with_reason ~xal ~at:(TaskHelper.set_progress ~__context) ~__context ~self:vm domid Domain.Halt) with @@ -514,7 +514,7 @@ module Shutdown = struct debug "%s phase 0/3: no shutdown request required since this is a hard_shutdown" api_call_name; (* The domain might be killed by the event thread. Again, this is ok. *) Helpers.log_exn_continue (Printf.sprintf "Xc.domain_shutdown domid=%d Xc.Halt" domid) - (fun () -> + (fun () -> debug "Xc.domain_shutdown domid=%d Halt" domid; with_xc (fun xc -> Xc.domain_shutdown xc domid Xc.Halt) ) () @@ -529,13 +529,13 @@ module Shutdown = struct let domid = Helpers.domid_of_vm ~__context ~self:vm in debug "%s Shutdown.in_dom0_already_locked domid=%d" api_call_name domid; if domid <> -1 then begin - with_xc_and_xs + with_xc_and_xs (fun xc xs -> let di = Xc.domain_getinfo xc domid in (* If someone rebooted it while we dropped the lock: *) if Xal.is_running di then raise (Api_errors.Server_error(Api_errors.other_operation_in_progress, [ "VM"; Ref.string_of vm ])); - + (* Invoke pre_destroy hook *) Xapi_hooks.vm_pre_destroy ~__context ~reason:(if clean then Xapi_hooks.reason__clean_shutdown else Xapi_hooks.reason__hard_shutdown) ~vm; debug "%s: phase 2/2: destroying old domain (domid %d)" api_call_name domid; @@ -550,7 +550,7 @@ module Shutdown = struct if Db.VM.get_power_state ~__context ~self:vm = `Suspended then begin debug "hard_shutdown: destroying any suspend VDI"; - + let vdi = Db.VM.get_suspend_VDI ~__context ~self:vm in if vdi <> Ref.null (* avoid spurious but scary messages *) then Helpers.log_exn_continue @@ -564,14 +564,14 @@ module Shutdown = struct (** In the synchronous API call paths, acquire the lock, check if the VM's domain has shutdown (if not error out) and continue with the shutdown *) - let in_dom0_already_queued args = - Locking_helpers.with_lock args.TwoPhase.vm - (fun _ _ -> + let in_dom0_already_queued args = + Locking_helpers.with_lock args.TwoPhase.vm + (fun _ _ -> if TwoPhase.is_vm_running args then raise (Api_errors.Server_error(Api_errors.other_operation_in_progress, [ "VM"; Ref.string_of args.TwoPhase.vm ])) else in_dom0_already_locked args) () - (** In the synchronouse API call paths, wait in the domU_internal_shutdown_queue and then attempt + (** In the synchronouse API call paths, wait in the domU_internal_shutdown_queue and then attempt to reboot the VM. NB this is the same queue used by the event thread. *) let in_dom0 args = Local_work_queue.wait_in_line Local_work_queue.domU_internal_shutdown_queue @@ -587,16 +587,16 @@ let of_action = function | `destroy -> Shutdown.actions (** If our operation conflicts with another parallel operation (i.e. we ask for shutdown - but guest admin asks for reboot) then we raise an OTHER_OPERATION_IN_PROGRESS exception + but guest admin asks for reboot) then we raise an OTHER_OPERATION_IN_PROGRESS exception and retry the whole procedure. *) let retry_on_conflict (x: TwoPhase.args) (y: TwoPhase.t) = - let rec retry n = - try + let rec retry n = + try y.TwoPhase.in_guest x; if Xapi_fist.disable_sync_lifecycle_path () then warn "FIST: disable_sync_lifecycle_path: deferring to the event thread" else y.TwoPhase.in_dom0 x - with + with | Api_errors.Server_error(code, _) as e when code = Api_errors.other_operation_in_progress -> let aborting = n < 1 in debug "Conflict when executing %s: %s" x.TwoPhase.api_call_name (if aborting then "aborting" else "retrying"); @@ -604,10 +604,10 @@ let retry_on_conflict (x: TwoPhase.args) (y: TwoPhase.t) = Thread.delay 5.; retry (n - 1) in retry 10 - + (** CA-11132: Record information about the shutdown in odd other-config keys for Egenera *) -let record_shutdown_details ~__context ~vm reason initiator action = +let record_shutdown_details ~__context ~vm reason initiator action = let replace_other_config_key ~__context ~vm k v = begin try Db.VM.remove_from_other_config ~__context ~self:vm ~key:k @@ -622,7 +622,7 @@ let record_shutdown_details ~__context ~vm reason initiator action = replace_other_config_key ~__context ~vm "last_shutdown_action" action'; replace_other_config_key ~__context ~vm "last_shutdown_time" (Date.to_string (Date.of_float (Unix.gettimeofday()))); info "VM %s shutdown initiated %sly; actions_after[%s] = %s" vm' initiator reason' action' - + (** VM.hard_reboot entrypoint *) let hard_reboot ~__context ~vm = let action = Db.VM.get_actions_after_reboot ~__context ~self:vm in @@ -675,7 +675,7 @@ let power_state_reset ~__context ~vm = error "VM.power_state_reset vm=%s blocked because VM is a control domain" (Ref.string_of vm); raise (Api_errors.Server_error(Api_errors.cannot_reset_control_domain, [ Ref.string_of vm ])); end; - (* Perform sanity checks if VM is Running or Paused since we don't want to + (* Perform sanity checks if VM is Running or Paused since we don't want to lose track of running domains. *) let power_state = Db.VM.get_power_state ~__context ~self:vm in if power_state = `Running || power_state = `Paused then begin @@ -694,7 +694,7 @@ let power_state_reset ~__context ~vm = if domid = -1L then None else (try Some (with_xc (fun xc -> Xc.domain_getinfo xc (Int64.to_int domid))) with e -> - debug "VM.power_state_reset vm=%s caught %s: assuming domain doesn't exist" + debug "VM.power_state_reset vm=%s caught %s: assuming domain doesn't exist" (Ref.string_of vm) (ExnHelper.string_of_exn e); None) end else None in @@ -712,17 +712,17 @@ let power_state_reset ~__context ~vm = (* No domain found so this is ok *) () end else begin - (* If resident on another host, check if that host is alive: if so + (* If resident on another host, check if that host is alive: if so then refuse to perform the reset, since we have delegated state management to this host and we trust it -- this call is intended for coping with - host failures and backup restores, not for working around agent bugs. + host failures and backup restores, not for working around agent bugs. If the host agent software is malfunctioning, then it should be restarted (via Host.restart_agent or 'service xapi restart') *) debug "VM.power_state_reset vm=%s resident_on<>localhost; checking liveness of remote host" (Ref.string_of vm); if Xapi_host.is_host_alive ~__context ~host:resident then begin error "VM.power_state_reset vm=%s resident_on=%s; host is alive so refusing to reset power-state" (Ref.string_of vm) (Ref.string_of resident); - raise (Api_errors.Server_error(Api_errors.host_is_live, [ Ref.string_of resident ])) + raise (Api_errors.Server_error(Api_errors.host_is_live, [ Ref.string_of resident ])) end end end; @@ -748,7 +748,7 @@ let suspend ~__context ~vm = (* Call the memory image creating 90%, *) (* the device un-hotplug the final 10% *) Vmops.suspend ~__context ~xc ~xs ~vm ~live:false - ~progress_cb:(fun x -> + ~progress_cb:(fun x -> TaskHelper.set_progress ~__context (x *. 0.9) ); @@ -894,7 +894,7 @@ let create ~__context ~protection_policy ~is_snapshot_from_vmpp -let destroy ~__context ~self = +let destroy ~__context ~self = let parent = Db.VM.get_parent ~__context ~self in (* rebase the children *) @@ -909,7 +909,7 @@ let destroy ~__context ~self = lock on a specific pool host and is used to manage contention between API threads and the event monitoring thread on live VMs. Since clone does not deal with live VMs we ommit lock_vm. *) -let clone ~__context ~vm ~new_name = +let clone ~__context ~vm ~new_name = TaskHelper.set_cancellable ~__context; (* !!! Note - please do not be tempted to put this on the "long_running_queue", even though it may be long running.. XenRT relies on fast clones being parallelizable wrt other long-running ops such as @@ -920,7 +920,7 @@ let clone ~__context ~vm ~new_name = let new_vm = Xapi_vm_clone.clone Xapi_vm_clone.Disk_op_clone ~__context ~vm ~new_name in if Db.VM.get_is_a_snapshot ~__context ~self:vm && Db.VM.get_power_state ~__context ~self:new_vm <> `Halted then hard_shutdown ~__context ~vm:new_vm; - new_vm + new_vm (* We do call wait_in_line for snapshot and snapshot_with_quiesce because the locks are taken at *) (* the VBD level (with pause/unpause mechanism *) @@ -939,28 +939,28 @@ let snapshot_with_quiesce ~__context ~vm ~new_name = (* revert too is still valid. *) let revert ~__context ~snapshot = let vm = Db.VM.get_snapshot_of ~__context ~self:snapshot in - let vm = - if Db.is_valid_ref __context vm + let vm = + if Db.is_valid_ref __context vm then vm else Xapi_vm_snapshot.create_vm_from_snapshot ~__context ~snapshot in Xapi_vm_snapshot.revert ~__context ~snapshot ~vm - + (* As the checkpoint operation modify the domain state, we take the vm_lock to do not let the event *) (* thread mess around with that. *) let checkpoint ~__context ~vm ~new_name = if not (Pool_features.is_enabled ~__context Features.Checkpoint) then raise (Api_errors.Server_error(Api_errors.license_restriction, [])) else begin - Local_work_queue.wait_in_line Local_work_queue.long_running_queue + Local_work_queue.wait_in_line Local_work_queue.long_running_queue (Printf.sprintf "VM.checkpoint %s" (Context.string_of_task __context)) (fun () -> TaskHelper.set_cancellable ~__context; - Locking_helpers.with_lock vm + Locking_helpers.with_lock vm (fun token () -> Xapi_vm_snapshot.checkpoint ~__context ~vm ~new_name) () ) end - + let copy ~__context ~vm ~new_name ~sr = (* See if the supplied SR is suitable: it must exist and be a non-ISO SR *) (* First the existence check. It's not an error to not exist at all. *) @@ -968,7 +968,7 @@ let copy ~__context ~vm ~new_name ~sr = maybe (fun sr -> debug "Copying disks to SR: %s" (Db.SR.get_uuid ~__context ~self:sr)) sr; (* Second the non-iso check. It is an error to be an iso SR *) maybe (fun sr -> if Db.SR.get_content_type ~__context ~self:sr = "iso" - then raise (Api_errors.Server_error(Api_errors.operation_not_allowed, + then raise (Api_errors.Server_error(Api_errors.operation_not_allowed, [ "Cannot copy a VM's disks to an ISO SR" ]))) sr; Local_work_queue.wait_in_line Local_work_queue.long_running_queue (Printf.sprintf "VM.copy %s" (Context.string_of_task __context)) @@ -979,7 +979,7 @@ let copy ~__context ~vm ~new_name ~sr = new_vm ) -let provision ~__context ~vm = +let provision ~__context ~vm = Local_work_queue.wait_in_line Local_work_queue.long_running_queue (Printf.sprintf "VM.provision %s" (Context.string_of_task __context)) (fun () -> @@ -994,20 +994,20 @@ let provision ~__context ~vm = debug "install: phase 1/3: creating VBDs and VDIs"; let script, vbds = Xapi_templates.pre_install rpc session_id vm in (* If an error occurs after this then delete the created VDIs, VBDs... *) - begin + begin try debug "install: phase 2/3: running optional script (in domain 0)"; let dom0 = Helpers.get_domain_zero __context in - Xapi_templates_install.post_install_script rpc session_id __context dom0 vm (script, vbds); + Xapi_templates_install.post_install_script rpc session_id __context dom0 vm (script, vbds); debug "install: phase 3/3: removing install information from VM"; Xapi_templates.post_install rpc session_id vm; debug "finished install"; with e -> (* On error delete the VBDs and their associated VDIs *) let vdis = List.map (fun self -> Client.VBD.get_VDI rpc session_id self) vbds in - List.iter (Helpers.log_exn_continue "deleting auto-provisioned VBD" - (fun self -> Client.VBD.destroy rpc session_id self)) vbds; - List.iter (Helpers.log_exn_continue "deleting auto-provisioned VDI" + List.iter (Helpers.log_exn_continue "deleting auto-provisioned VBD" + (fun self -> Client.VBD.destroy rpc session_id self)) vbds; + List.iter (Helpers.log_exn_continue "deleting auto-provisioned VDI" (fun self -> Client.VDI.destroy rpc session_id self)) vdis; raise e end @@ -1085,7 +1085,7 @@ let wait_memory_target_live ~__context ~self = Locking_helpers.with_lock self (f wait_memory_target_live ~__context ~self () ) () -let get_cooperative ~__context ~self = +let get_cooperative ~__context ~self = (* If the VM is not supposed to be capable of ballooning then return true *) let vm_r = Db.VM.get_record ~__context ~self in if not(Helpers.ballooning_enabled_for_vm ~__context vm_r) then begin @@ -1115,8 +1115,8 @@ let send_trigger ~__context ~vm ~trigger = Locking_helpers.with_lock vm (fun tok send_trigger ~__context ~vm ~trigger ) () -let get_boot_record ~__context ~self = Locking_helpers.with_lock self (fun token () -> - Helpers.get_boot_record ~__context ~self +let get_boot_record ~__context ~self = Locking_helpers.with_lock self (fun token () -> + Helpers.get_boot_record ~__context ~self ) () let get_data_sources ~__context ~self = Monitor_rrds.query_possible_vm_dss (Db.VM.get_uuid ~__context ~self) @@ -1144,27 +1144,27 @@ let reserve_vbds_and_vifs ~__context ~vm f = let vifs = Db.VM.get_VIFs ~__context ~self:vm in let vifs = List.filter (fun self -> try Db.VIF.get_currently_attached ~__context ~self with _ -> false) vifs in - let set_all value = + let set_all value = List.iter (fun self -> try Db.VBD.set_reserved ~__context ~self ~value with _ -> ()) vbds; List.iter (fun self -> try Db.VIF.set_reserved ~__context ~self ~value with _ -> ()) vifs in set_all true; finally f (fun () -> set_all false) (* Undocumented Rio message, deprecated in favour of standard VM.clone *) -let csvm ~__context ~vm = - Xapi_vm_clone.clone ~__context Xapi_vm_clone.Disk_op_clone ~vm +let csvm ~__context ~vm = + Xapi_vm_clone.clone ~__context Xapi_vm_clone.Disk_op_clone ~vm ~new_name:(Db.VM.get_name_label ~__context ~self:vm ^ "-cloned-suspended") (* XXX: NOT IN RIO *) -(** Return the largest possible static-max setting which will fit in a given amount of +(** Return the largest possible static-max setting which will fit in a given amount of free physical memory. If 'approximate' is true then we return a more conservative value which allows for the number of vCPUs to be changed (for example). NB function is related to Vmops.check_enough_memory. *) -let maximise_memory ~__context ~self ~total ~approximate = +let maximise_memory ~__context ~self ~total ~approximate = (* If being conservative then round the vcpus up to 64 and assume HVM (worst case) *) - let vcpus = + let vcpus = if approximate then 64 else Int64.to_int (Db.VM.get_VCPUs_max ~__context ~self) in @@ -1175,7 +1175,7 @@ let maximise_memory ~__context ~self ~total ~approximate = let shadow_multiplier = Db.VM.get_HVM_shadow_multiplier ~__context ~self in (* Need to find the maximum input value to this function so that it still evaluates to true *) - let will_fit static_max = + let will_fit static_max = let mem_kib = static_max /* 1024L in debug "Checking: mem_kib=%Ld" mem_kib; debug " total =%Ld" total; @@ -1194,16 +1194,16 @@ let create_new_blob ~__context ~vm ~name ~mime_type = let blob = Xapi_blob.create ~__context ~mime_type in Db.VM.add_to_blobs ~__context ~self:vm ~key:name ~value:blob; blob - + let s3_suspend ~__context ~vm = (* XXX: TODO: monitor the guest's response; track the s3 state *) - Locking_helpers.with_lock vm (fun _ () -> + Locking_helpers.with_lock vm (fun _ () -> let domid = Helpers.domid_of_vm ~__context ~self:vm in with_xs (fun xs -> Domain.shutdown ~xs domid Domain.S3Suspend)) () - + let s3_resume ~__context ~vm = (* XXX: TODO: monitor the guest's response; track the s3 state *) - Locking_helpers.with_lock vm (fun _ () -> + Locking_helpers.with_lock vm (fun _ () -> let domid = Helpers.domid_of_vm ~__context ~self:vm in with_xc (fun xc -> Domain.send_s3resume ~xc domid)) () @@ -1220,8 +1220,8 @@ let copy_bios_strings ~__context ~vm ~host = (* also set the affinity field to push the VM to start on this host *) Db.VM.set_affinity ~__context ~self:vm ~value:host end - -let set_protection_policy ~__context ~self ~value = + +let set_protection_policy ~__context ~self ~value = if Db.VM.get_is_control_domain ~__context ~self then ( (* do not assign vmpps to the dom0 vm of any host in the pool *) raise (Api_errors.Server_error(Api_errors.invalid_value, [Ref.string_of value])) From 95a93cf055b534b59cfc61dbe68de97e05e15a4c Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 14:57:07 +0000 Subject: [PATCH 6/8] Call appropriate Helpers.get_localhost function. Signed-off-by: Mike McClurg --- ocaml/xapi/xapi_vm_migrate.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 5516db79f63..8d8779461b4 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -712,7 +712,7 @@ let pool_migrate ~__context ~vm ~host ~options = (* Migration is only allowed to a host of equal or greater versions. *) if Helpers.rolling_upgrade_in_progress ~__context then Helpers.assert_host_versions_not_decreasing ~__context - ~host_from:(Helpers.get_localhost_ref ~__context) + ~host_from:(Helpers.get_localhost ~__context) ~host_to:host ; Local_work_queue.wait_in_line Local_work_queue.long_running_queue (Printf.sprintf "VM.pool_migrate %s" (Context.string_of_task __context)) From 4996e4f0f46a8fd55d21effedde287adcc542c67 Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Tue, 22 Feb 2011 14:58:12 +0000 Subject: [PATCH 7/8] Remove old-versioned hosts from the list of candidate hosts to receive a newly started/resumed vm. Signed-off-by: Mike McClurg --- ocaml/xapi/xapi_vm_helpers.ml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index b1b4904d559..195cb976320 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -486,7 +486,12 @@ let vm_can_run_on_host __context vm snapshot host = let host_can_run_vm () = assert_can_boot_here_no_memcheck ~__context ~self:vm ~host ~snapshot; true in - try host_enabled () && host_live () && host_can_run_vm () + let host_has_good_version () = + if Helpers.rolling_upgrade_in_progress ~__context + then Helpers.host_has_highest_version_in_pool ~__context ~host:host + else true in + try host_enabled () && host_live () && + host_can_run_vm () && host_has_good_version () with _ -> false (** Selects a single host from the set of all hosts on which the given [vm] From 6f33b865b62c9f6502370b7f74d790234cfd8cc5 Mon Sep 17 00:00:00 2001 From: Mike McClurg Date: Wed, 23 Feb 2011 11:49:29 +0000 Subject: [PATCH 8/8] Ignore WLB during rolling pool upgrade. Signed-off-by: Mike McClurg Acked-by: Jonathan Knowles --- ocaml/xapi/xapi_vm_helpers.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 195cb976320..7a030a329cd 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -515,11 +515,13 @@ let choose_host_uses_wlb ~__context = (** Given a virtual machine, returns a host it can boot on, giving *) (** priority to an affinity host if one is present. WARNING: called *) (** while holding the global lock from the message forwarding layer. *) -let choose_host_for_vm ~__context ~vm ~snapshot = - if choose_host_uses_wlb ~__context then +let choose_host_for_vm ~__context ~vm ~snapshot = + if (choose_host_uses_wlb ~__context) + && not (Helpers.rolling_upgrade_in_progress ~__context) + then try let rec filter_and_convert recs = - match recs with + match recs with | (h, recom) :: tl -> begin debug "\n%s\n" (String.concat ";" recom); @@ -533,7 +535,7 @@ let choose_host_for_vm ~__context ~vm ~snapshot = end | [] -> [] in - begin + begin let all_hosts = (List.sort (fun (h, s, r) (h', s', r') -> @@ -572,7 +574,7 @@ let choose_host_for_vm ~__context ~vm ~snapshot = choose_host_for_vm_no_wlb ~__context ~vm ~snapshot end with - | Api_errors.Server_error(error_type, error_detail) -> + | Api_errors.Server_error(error_type, error_detail) -> debug "Encountered error when using wlb for choosing host \ \"%s: %s\". Using original algorithm" error_type @@ -593,7 +595,7 @@ let choose_host_for_vm ~__context ~vm ~snapshot = with _ -> () end; choose_host_for_vm_no_wlb ~__context ~vm ~snapshot - | Failure "float_of_string" -> + | Failure "float_of_string" -> debug "Star ratings from wlb could not be parsed to floats. \ Using original algorithm"; choose_host_for_vm_no_wlb ~__context ~vm ~snapshot