From 58e9def69a7ba8d7db045ccfff52001c79313c8d Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 03:29:43 +0000 Subject: [PATCH 1/3] CA-408552: 1/3 Improve bootstrom performance by save db ops events_from_xenopsd thread is critical as it sync up VM status in case of bootstorm, this thread is flood as lots of events comes from xenopsd waiting for process. During processing of the events, VM/VDI/VBD update_allowed_operations will be called to refresh the allowed operations. However, for each ops (start/suspend,etc) for the same object(VM), the object info is always the same no matter what the ops is. Thus, it is not necessary to query the object information over and over again. Disclosure is used to resovle the issue. Query once and the disclosure will just remember the query result. The performance test for starting 500 VM on 4 hosts improve around 10% performance for both XS8 and XS9 This commit just introduce disclosure and caller call the disclosure instead of the original function Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 685 ++++++++++++++++---------------- ocaml/xapi/xapi_vm_lifecycle.ml | 7 +- 2 files changed, 347 insertions(+), 345 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 15dff1df4d..9691f3831c 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -63,7 +63,7 @@ let check_sm_feature_error (op : API.vdi_operations) sm_features sr = specified, it should contain at least all the VBD records from the database that are linked to this VDI. *) let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) - ?vbd_records ha_enabled record _ref' op = + ?vbd_records ha_enabled record _ref' = let ( let* ) = Result.bind in let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in @@ -83,360 +83,370 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) - let* () = - if - Helpers.rolling_upgrade_in_progress ~__context - && not - (Xapi_globs.Vdi_operations_set.mem op - Xapi_globs.rpu_allowed_vdi_operations - ) - then - Error (Api_errors.not_supported_during_upgrade, []) - else - Ok () - in - let* () = - (* Don't fail with other_operation_in_progress if VDI mirroring is in - progress and destroy is called as part of VDI mirroring *) - let is_vdi_mirroring_in_progress = - op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops + fun op -> + let* () = + if + Helpers.rolling_upgrade_in_progress ~__context + && not + (Xapi_globs.Vdi_operations_set.mem op + Xapi_globs.rpu_allowed_vdi_operations + ) + then + Error (Api_errors.not_supported_during_upgrade, []) + else + Ok () in - if - List.exists (fun (_, op) -> op <> `copy) current_ops - && not is_vdi_mirroring_in_progress - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () - in - (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) + let* () = + (* Don't fail with other_operation_in_progress if VDI mirroring is in + progress and destroy is called as part of VDI mirroring *) + let is_vdi_mirroring_in_progress = + op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops + in + if + List.exists (fun (_, op) -> op <> `copy) current_ops + && not is_vdi_mirroring_in_progress + then + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) + ) + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr + && pbd_record.API.pBD_currently_attached ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached - ) - pbd_records - in - let* () = - if pbds_attached = [] && op = `resize then - Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) - else - Ok () - in - - (* check to see whether VBDs exist which are using this VDI *) - - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - in - let my_active_rw_vbd_records = - List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - in - (* If the VBD is currently_attached then some operations can still be - performed ie: VDI.clone (if the VM is suspended we have to have the - 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; - 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked - to a VM, but the implementation first waits for the VDI's VBDs to be - unplugged and destroyed, and the checks are performed there. - *) - let operation_can_be_performed_live = - match op with - | `snapshot - | `resize_online - | `blocked - | `clone - | `mirror - | `enable_cbt - | `disable_cbt - | `data_destroy -> - true - | _ -> - false - in - let operation_can_be_performed_with_ro_attach = - operation_can_be_performed_live - || match op with `copy -> true | _ -> false - in - (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) - let blocked_by_attach = - let blocked_by_attach = - if operation_can_be_performed_live then - false - else if operation_can_be_performed_with_ro_attach then - my_active_rw_vbd_records <> [] + pbd_records + in + let* () = + if pbds_attached = [] && op = `resize then + Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) else - my_active_vbd_records <> [] + Ok () in - let allow_attached_vbds = - (* We use Valid_ref_list.list to ignore exceptions due to invalid - references that could propagate to the message forwarding layer, which - calls this function to check for errors - these exceptions would - prevent the actual XenAPI function from being run. Checks called from - the message forwarding layer should not fail with an exception. *) - let true_for_all_active_vbds f = - Valid_ref_list.for_all f my_active_vbd_records - in + + (* check to see whether VBDs exist which are using this VDI *) + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") + ) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + in + let my_active_rw_vbd_records = + List.filter + (fun vbd -> vbd.Db_actions.vBD_mode = `RW) + my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + in + (* If the VBD is currently_attached then some operations can still be + performed ie: VDI.clone (if the VM is suspended we have to have the + 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; + 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked + to a VM, but the implementation first waits for the VDI's VBDs to be + unplugged and destroyed, and the checks are performed there. + *) + let operation_can_be_performed_live = match op with - | `list_changed_blocks -> - let vbd_connected_to_vm_snapshot vbd = - let vm = vbd.Db_actions.vBD_VM in - Db.is_valid_ref __context vm - && Db.VM.get_is_a_snapshot ~__context ~self:vm - in - (* We allow list_changed_blocks on VDIs attached to snapshot VMs, - because VM.checkpoint may set the currently_attached fields of the - snapshot's VBDs to true, and this would block list_changed_blocks. *) - true_for_all_active_vbds vbd_connected_to_vm_snapshot + | `snapshot + | `resize_online + | `blocked + | `clone + | `mirror + | `enable_cbt + | `disable_cbt + | `data_destroy -> + true | _ -> false in - blocked_by_attach && not allow_attached_vbds - in - let* () = - if blocked_by_attach then - Error - (Api_errors.vdi_in_use, [_ref; Record_util.vdi_operations_to_string op]) - else if - (* data_destroy first waits for all the VBDs to disappear in its - implementation, so it is harmless to allow it when any of the VDI's - VBDs have operations in progress. This ensures that we avoid the retry - mechanism of message forwarding and only use the event loop. *) - my_has_current_operation_vbd_records <> [] && op <> `data_destroy - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () - in - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in - let* () = check_sm_feature_error op sm_features sr in - let allowed_for_cbt_metadata_vdi = - match op with - | `clone - | `copy - | `disable_cbt - | `enable_cbt - | `mirror - | `resize - | `resize_online - | `snapshot - | `set_on_boot -> - false - | `blocked - | `data_destroy - | `destroy - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `update -> - true - in - let* () = - if - (not allowed_for_cbt_metadata_vdi) - && record.Db_actions.vDI_type = `cbt_metadata - then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string `cbt_metadata] - ) - else - Ok () - in - let allowed_when_cbt_enabled = - match op with - | `mirror | `set_on_boot -> - false - | `blocked - | `clone - | `copy - | `data_destroy - | `destroy - | `disable_cbt - | `enable_cbt - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `resize - | `resize_online - | `snapshot - | `update -> - true - in - let* () = - if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_cbt_enabled, [_ref]) - else - Ok () - in - let check_destroy () = - if sr_type = "udev" then - Error (Api_errors.vdi_is_a_physical_device, [_ref]) - else if is_tools_sr then - Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then - Error (Api_errors.ha_is_enabled, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - && Xapi_pool_helpers.ha_enable_in_progress ~__context - then - Error (Api_errors.ha_enable_in_progress, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - && Xapi_pool_helpers.ha_disable_in_progress ~__context - then - Error (Api_errors.ha_disable_in_progress, []) - else - Ok () - in - match op with - | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + let operation_can_be_performed_with_ro_attach = + operation_can_be_performed_live + || match op with `copy -> true | _ -> false + in + (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) + let blocked_by_attach = + let blocked_by_attach = + if operation_can_be_performed_live then + false + else if operation_can_be_performed_with_ro_attach then + my_active_rw_vbd_records <> [] + else + my_active_vbd_records <> [] + in + let allow_attached_vbds = + (* We use Valid_ref_list.list to ignore exceptions due to invalid + references that could propagate to the message forwarding layer, which + calls this function to check for errors - these exceptions would + prevent the actual XenAPI function from being run. Checks called from + the message forwarding layer should not fail with an exception. *) + let true_for_all_active_vbds f = + Valid_ref_list.for_all f my_active_vbd_records + in + match op with + | `list_changed_blocks -> + let vbd_connected_to_vm_snapshot vbd = + let vm = vbd.Db_actions.vBD_VM in + Db.is_valid_ref __context vm + && Db.VM.get_is_a_snapshot ~__context ~self:vm + in + (* We allow list_changed_blocks on VDIs attached to snapshot VMs, + because VM.checkpoint may set the currently_attached fields of the + snapshot's VBDs to true, and this would block list_changed_blocks. *) + true_for_all_active_vbds vbd_connected_to_vm_snapshot + | _ -> + false + in + blocked_by_attach && not allow_attached_vbds + in + let* () = + if blocked_by_attach then + Error + ( Api_errors.vdi_in_use + , [_ref; Record_util.vdi_operations_to_string op] + ) + else if + (* data_destroy first waits for all the VBDs to disappear in its + implementation, so it is harmless to allow it when any of the VDI's + VBDs have operations in progress. This ensures that we avoid the retry + mechanism of message forwarding and only use the event loop. *) + my_has_current_operation_vbd_records <> [] && op <> `data_destroy then - Error (Api_errors.ha_is_enabled, []) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) else Ok () - | `destroy -> - check_destroy () - | `data_destroy -> - if not record.Db_actions.vDI_is_a_snapshot then + in + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let* () = check_sm_feature_error op sm_features sr in + let allowed_for_cbt_metadata_vdi = + match op with + | `clone + | `copy + | `disable_cbt + | `enable_cbt + | `mirror + | `resize + | `resize_online + | `snapshot + | `set_on_boot -> + false + | `blocked + | `data_destroy + | `destroy + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `update -> + true + in + let* () = + if + (not allowed_for_cbt_metadata_vdi) + && record.Db_actions.vDI_type = `cbt_metadata + then Error - (Api_errors.operation_not_allowed, ["VDI is not a snapshot: " ^ _ref]) - else if not record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string `cbt_metadata] + ) else - check_destroy () - | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + Ok () + in + let allowed_when_cbt_enabled = + match op with + | `mirror | `set_on_boot -> + false + | `blocked + | `clone + | `copy + | `data_destroy + | `destroy + | `disable_cbt + | `enable_cbt + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `resize + | `resize_online + | `snapshot + | `update -> + true + in + let* () = + if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.ha_is_enabled, []) + Error (Api_errors.vdi_cbt_enabled, [_ref]) else Ok () - | `resize_online -> - if + in + let check_destroy () = + if sr_type = "udev" then + Error (Api_errors.vdi_is_a_physical_device, [_ref]) + else if is_tools_sr then + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else if ha_enabled && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then Error (Api_errors.ha_is_enabled, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + && Xapi_pool_helpers.ha_enable_in_progress ~__context + then + Error (Api_errors.ha_enable_in_progress, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + && Xapi_pool_helpers.ha_disable_in_progress ~__context + then + Error (Api_errors.ha_disable_in_progress, []) else Ok () - | `snapshot when record.Db_actions.vDI_sharable -> - Error (Api_errors.vdi_is_sharable, [_ref]) - | (`snapshot | `copy) when reset_on_boot -> - Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) - | `snapshot -> - if List.exists (fun (_, op) -> op = `copy) current_ops then - Error - ( Api_errors.operation_not_allowed - , ["Snapshot operation not allowed during copy."] - ) - else - Ok () - | `copy -> - if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then - Error - ( Api_errors.operation_not_allowed - , [ - "VDI containing HA statefile or redo log cannot be copied (check \ - the VDI's allowed operations)." - ] - ) - else - Ok () - | `enable_cbt | `disable_cbt -> - if record.Db_actions.vDI_is_a_snapshot then - Error (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) - else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] - ) - else if reset_on_boot then + in + match op with + | `forget -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else + Ok () + | `destroy -> + check_destroy () + | `data_destroy -> + if not record.Db_actions.vDI_is_a_snapshot then + Error + ( Api_errors.operation_not_allowed + , ["VDI is not a snapshot: " ^ _ref] + ) + else if not record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + else + check_destroy () + | `resize -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `resize_online -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `snapshot when record.Db_actions.vDI_sharable -> + Error (Api_errors.vdi_is_sharable, [_ref]) + | (`snapshot | `copy) when reset_on_boot -> Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) - else + | `snapshot -> + if List.exists (fun (_, op) -> op = `copy) current_ops then + Error + ( Api_errors.operation_not_allowed + , ["Snapshot operation not allowed during copy."] + ) + else + Ok () + | `copy -> + if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + Error + ( Api_errors.operation_not_allowed + , [ + "VDI containing HA statefile or redo log cannot be copied \ + (check the VDI's allowed operations)." + ] + ) + else + Ok () + | `enable_cbt | `disable_cbt -> + if record.Db_actions.vDI_is_a_snapshot then + Error + (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) + else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] + ) + else if reset_on_boot then + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + else + Ok () + | `mirror + | `clone + | `generate_config + | `force_unlock + | `set_on_boot + | `list_changed_blocks + | `blocked + | `update -> Ok () - | `mirror - | `clone - | `generate_config - | `force_unlock - | `set_on_boot - | `list_changed_blocks - | `blocked - | `update -> - Ok () let assert_operation_valid ~__context ~self ~(op : API.vdi_operations) = let pool = Helpers.get_pool ~__context in @@ -486,16 +496,11 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records v in let allowed = - let check x = - match - check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records - ha_enabled all self x - with - | Ok () -> - true - | _ -> - false + let check' = + check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records + ha_enabled all self in + let check x = match check' x with Ok () -> true | _ -> false in all_ops |> Xapi_globs.Vdi_operations_set.filter check in let allowed = diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index fc281c70de..6e3a3955fc 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -777,12 +777,9 @@ let allowable_ops = List.filter (fun op -> not (List.mem op ignored_ops)) API.vm_operations__all let update_allowed_operations ~__context ~self = + let check' = check_operation_error ~__context ~ref:self in let check accu op = - match check_operation_error ~__context ~ref:self ~op ~strict:true with - | None -> - op :: accu - | Some _err -> - accu + match check' ~op ~strict:true with None -> op :: accu | Some _err -> accu in let allowed = List.fold_left check [] allowable_ops in (* FIXME: need to be able to deal with rolling-upgrade for orlando as well *) From e8440b3a01720f09baf04eec5b8482c8e3e93ff8 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 05:47:09 +0000 Subject: [PATCH 2/3] CA-408552: 2/3 Improve bootstrom performance by save db ops Define db ops into variables and keep them inside returned function for code review Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 40 ++++++++++++++++----------------- ocaml/xapi/xapi_vm_lifecycle.ml | 32 +++++++++++++++++--------- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 9691f3831c..1090ae01f1 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -85,8 +85,11 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) *) fun op -> let* () = - if + let rolling_upgrade_in_progress = Helpers.rolling_upgrade_in_progress ~__context + in + if + rolling_upgrade_in_progress && not (Xapi_globs.Vdi_operations_set.mem op Xapi_globs.rpu_allowed_vdi_operations @@ -338,25 +341,31 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in + let vdi_is_ha_state_or_redolog = + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + in let check_destroy () = + let ha_enable_in_progress = + Xapi_pool_helpers.ha_enable_in_progress ~__context + in + let ha_disable_in_progress = + Xapi_pool_helpers.ha_disable_in_progress ~__context + in if sr_type = "udev" then Error (Api_errors.vdi_is_a_physical_device, [_ref]) else if is_tools_sr then Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) else if List.mem record.Db_actions.vDI_type [`rrd] then Error (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + else if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + vdi_is_ha_state_or_redolog && Xapi_pool_helpers.ha_enable_in_progress ~__context then Error (Api_errors.ha_enable_in_progress, []) else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + vdi_is_ha_state_or_redolog && Xapi_pool_helpers.ha_disable_in_progress ~__context then Error (Api_errors.ha_disable_in_progress, []) @@ -365,10 +374,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) in match op with | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else if List.mem record.Db_actions.vDI_type [`rrd] then Error (Api_errors.vdi_has_rrds, [_ref]) @@ -387,18 +393,12 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else check_destroy () | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else Ok () | `resize_online -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then + if ha_enabled && vdi_is_ha_state_or_redolog then Error (Api_errors.ha_is_enabled, []) else Ok () @@ -415,7 +415,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () | `copy -> - if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + if vdi_is_ha_state_or_redolog then Error ( Api_errors.operation_not_allowed , [ diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 6e3a3955fc..7e95933d8d 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -528,10 +528,10 @@ let check_operation_error ~__context ~ref = in let current_error = let metrics = Db.VM.get_metrics ~__context ~self:ref in + let is_nested_virt = nested_virt ~__context ref metrics in check current_error (fun () -> match op with - | `changing_dynamic_range - when nested_virt ~__context ref metrics && strict -> + | `changing_dynamic_range when is_nested_virt && strict -> Some (Api_errors.vm_is_using_nested_virt, [ref_str]) | _ -> None @@ -542,13 +542,11 @@ let check_operation_error ~__context ~ref = (* make use of the Helpers.ballooning_enabled_for_vm function. *) let current_error = check current_error (fun () -> - let vm_ref () = + let is_domain_zero = Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid + |> Helpers.is_domain_zero ~__context in - if - (op = `changing_VCPUs || op = `destroy) - && Helpers.is_domain_zero ~__context (vm_ref ()) - then + if (op = `changing_VCPUs || op = `destroy) && is_domain_zero then Some ( Api_errors.operation_not_allowed , ["This operation is not allowed on dom0"] @@ -668,8 +666,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being in an appliance. *) let current_error = + let is_appliance_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_appliance + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_appliance then + if is_appliance_valid then check_appliance ~vmr ~op ~ref_str else None @@ -677,8 +678,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a protection policy. *) let current_error = + let is_protection_policy_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy then + if is_protection_policy_valid then check_protection_policy ~vmr ~op ~ref_str else None @@ -686,8 +690,11 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a snapshot schedule. *) let current_error = + let is_snapshort_schedule_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule + in check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule then + if is_snapshort_schedule_valid then check_snapshot_schedule ~vmr ~ref_str op else None @@ -709,9 +716,12 @@ let check_operation_error ~__context ~ref = ) in let current_error = + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in check current_error (fun () -> if - Helpers.rolling_upgrade_in_progress ~__context + rolling_upgrade_in_progress && not (List.mem op Xapi_globs.rpu_allowed_vm_operations) then Some (Api_errors.not_supported_during_upgrade, []) From e1a57fa5ef4b97adab32bcf1a24d9af61bccc3c4 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 20 Jun 2025 06:35:20 +0000 Subject: [PATCH 3/3] CA-408552: 3/3 Improve bootstrom performance by save db ops Move ops unrelated db operation outside of returned function Signed-off-by: Lin Liu --- ocaml/xapi/xapi_vdi.ml | 176 +++++++++++++++----------------- ocaml/xapi/xapi_vm_lifecycle.ml | 76 +++++++------- 2 files changed, 123 insertions(+), 129 deletions(-) diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 1090ae01f1..0f9904d72f 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -68,6 +68,84 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in let reset_on_boot = record.Db_actions.vDI_on_boot = `reset in + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) + ) + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached + ) + pbd_records + in + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") + ) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + in + let my_active_rw_vbd_records = + List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + in + (* Policy: 1. any current_operation besides copy implies exclusivity; fail everything else; except vdi mirroring is in current operations and destroy is performed @@ -83,11 +161,15 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let vdi_is_ha_state_or_redolog = + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + in + fun op -> let* () = - let rolling_upgrade_in_progress = - Helpers.rolling_upgrade_in_progress ~__context - in if rolling_upgrade_in_progress && not @@ -113,30 +195,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) - ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr - && pbd_record.API.pBD_currently_attached - ) - pbd_records - in let* () = if pbds_attached = [] && op = `resize then Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) @@ -146,58 +204,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) (* check to see whether VBDs exist which are using this VDI *) - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - in - let my_active_rw_vbd_records = - List.filter - (fun vbd -> vbd.Db_actions.vBD_mode = `RW) - my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.filter - (fun vbd_record -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - in (* If the VBD is currently_attached then some operations can still be performed ie: VDI.clone (if the VM is suspended we have to have the 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; @@ -275,9 +281,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in let* () = check_sm_feature_error op sm_features sr in let allowed_for_cbt_metadata_vdi = match op with @@ -341,16 +344,7 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) else Ok () in - let vdi_is_ha_state_or_redolog = - List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - in let check_destroy () = - let ha_enable_in_progress = - Xapi_pool_helpers.ha_enable_in_progress ~__context - in - let ha_disable_in_progress = - Xapi_pool_helpers.ha_disable_in_progress ~__context - in if sr_type = "udev" then Error (Api_errors.vdi_is_a_physical_device, [_ref]) else if is_tools_sr then diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 7e95933d8d..5ec4ca6d79 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -393,8 +393,7 @@ let nested_virt ~__context vm metrics = let key = "nested-virt" in Vm_platform.is_true ~key ~platformdata ~default:false -let is_mobile ~__context vm strict = - let metrics = Db.VM.get_metrics ~__context ~self:vm in +let is_mobile ~__context vm strict metrics = (not @@ nomigrate ~__context vm metrics) && (not @@ nested_virt ~__context vm metrics) || not strict @@ -447,6 +446,42 @@ let check_operation_error ~__context ~ref = vmr.Db_actions.vM_VBDs |> List.filter (Db.is_valid_ref __context) in + let current_ops = vmr.Db_actions.vM_current_operations in + let metrics = Db.VM.get_metrics ~__context ~self:ref in + let is_nested_virt = nested_virt ~__context ref metrics in + let is_domain_zero = + Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid + |> Helpers.is_domain_zero ~__context + in + let vdis_reset_and_caching = + List.filter_map + (fun vdi -> + try + let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in + Some + ( List.assoc_opt "on_boot" sm_config = Some "reset" + , bool_of_assoc "caching" sm_config + ) + with _ -> None + ) + vdis + in + let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in + let is_not_sriov pci = not @@ List.mem pci sriov_pcis in + let pcis = vmr.Db_actions.vM_attached_PCIs in + let is_appliance_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_appliance + in + let is_protection_policy_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy + in + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in + let is_snapshort_schedule_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule + in + fun ~op ~strict -> let current_error = None in let check c f = match c with Some e -> Some e | None -> f () in @@ -470,7 +505,6 @@ let check_operation_error ~__context ~ref = (* if other operations are in progress, check that the new operation is allowed concurrently with them. *) let current_error = check current_error (fun () -> - let current_ops = vmr.Db_actions.vM_current_operations in if List.length current_ops <> 0 && not (is_allowed_concurrently ~op ~current_ops) @@ -520,15 +554,13 @@ let check_operation_error ~__context ~ref = check current_error (fun () -> match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) - when not (is_mobile ~__context ref strict) -> + when not (is_mobile ~__context ref strict metrics) -> Some (Api_errors.vm_is_immobile, [ref_str]) | _ -> None ) in let current_error = - let metrics = Db.VM.get_metrics ~__context ~self:ref in - let is_nested_virt = nested_virt ~__context ref metrics in check current_error (fun () -> match op with | `changing_dynamic_range when is_nested_virt && strict -> @@ -542,10 +574,6 @@ let check_operation_error ~__context ~ref = (* make use of the Helpers.ballooning_enabled_for_vm function. *) let current_error = check current_error (fun () -> - let is_domain_zero = - Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid - |> Helpers.is_domain_zero ~__context - in if (op = `changing_VCPUs || op = `destroy) && is_domain_zero then Some ( Api_errors.operation_not_allowed @@ -592,19 +620,6 @@ let check_operation_error ~__context ~ref = (* Check for an error due to VDI caching/reset behaviour *) let current_error = check current_error (fun () -> - let vdis_reset_and_caching = - List.filter_map - (fun vdi -> - try - let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in - Some - ( List.assoc_opt "on_boot" sm_config = Some "reset" - , bool_of_assoc "caching" sm_config - ) - with _ -> None - ) - vdis - in if op = `checkpoint || op = `snapshot @@ -633,9 +648,6 @@ let check_operation_error ~__context ~ref = (* If a PCI device is passed-through, check if the operation is allowed *) let current_error = check current_error @@ fun () -> - let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in - let is_not_sriov pci = not @@ List.mem pci sriov_pcis in - let pcis = vmr.Db_actions.vM_attached_PCIs in match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) when List.exists is_not_sriov pcis -> @@ -666,9 +678,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being in an appliance. *) let current_error = - let is_appliance_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_appliance - in check current_error (fun () -> if is_appliance_valid then check_appliance ~vmr ~op ~ref_str @@ -678,9 +687,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a protection policy. *) let current_error = - let is_protection_policy_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy - in check current_error (fun () -> if is_protection_policy_valid then check_protection_policy ~vmr ~op ~ref_str @@ -690,9 +696,6 @@ let check_operation_error ~__context ~ref = in (* Check for errors caused by VM being assigned to a snapshot schedule. *) let current_error = - let is_snapshort_schedule_valid = - Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule - in check current_error (fun () -> if is_snapshort_schedule_valid then check_snapshot_schedule ~vmr ~ref_str op @@ -716,9 +719,6 @@ let check_operation_error ~__context ~ref = ) in let current_error = - let rolling_upgrade_in_progress = - Helpers.rolling_upgrade_in_progress ~__context - in check current_error (fun () -> if rolling_upgrade_in_progress