Skip to content

Improve other_operation_in_progress error users, fix handling of its delays #6588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
6005231
xapi_sr_operations: Report more useful info when raising other_operat…
last-genius Jul 11, 2025
2ec4461
xapi_cluster_helpers: Correctly report other_operation_in_progress error
last-genius Jul 11, 2025
b6fb47c
xapi_vm_lifecycle: Correctly report other_operation_in_progress error
last-genius Jul 11, 2025
e8ade77
xapi_vbd_helpers: Fix operation reporting when raising other_operatio…
last-genius Jul 11, 2025
d8a24ef
xapi_vdi: Report more useful information when raising other_operation…
last-genius Jul 11, 2025
8015edb
xapi_{vif,vusb}_helpers: Report more useful information when raising …
last-genius Jul 11, 2025
cbd5f17
message_forwarding: Report more info when raising other_operation_in_…
last-genius Jul 11, 2025
c5773da
xapi_pool_helpers: Report more info when raising other_operation_in_p…
last-genius Jul 11, 2025
8923f7a
xapi_pif: Report more info when raising other_operation_in_progress e…
last-genius Jul 11, 2025
443a317
xapi_vm_appliance_lifecycle: Report more info when raising other_oper…
last-genius Jul 11, 2025
0b843b6
xapi_vbd: Report more useful info when raising other_operation_in_pro…
last-genius Jul 11, 2025
d797d79
xapi_host_helpers: Report more useful info when raising other_operati…
last-genius Jul 14, 2025
3500de1
xapi/helpers: Fix handling of other_operation_in_progress delays
last-genius Jul 11, 2025
615aa21
idl/datamodel_errors: Add operation_{type,ref} to other_operation_in_…
last-genius Jul 14, 2025
5e895af
Adjust tests after other_operation_in_progress refactoring
last-genius Jul 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ocaml/idl/datamodel_errors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,8 @@ let _ =
~doc:"You attempted an operation on a VM which is not suspendable." () ;
error Api_errors.vm_is_template ["vm"]
~doc:"The operation attempted is not valid for a template VM" () ;
error Api_errors.other_operation_in_progress ["class"; "object"]
error Api_errors.other_operation_in_progress
["class"; "object"; "operation_type"; "operation_ref"]
~doc:"Another operation involving the object is currently in progress" () ;
error Api_errors.vbd_not_removable_media ["vbd"]
~doc:"Media could not be ejected because it is not removable" () ;
Expand Down
18 changes: 14 additions & 4 deletions ocaml/tests/test_clustering.ml
Original file line number Diff line number Diff line change
Expand Up @@ -581,11 +581,21 @@ let test_disallow_unplug_during_cluster_host_create () =
let key = Context.get_task_id __context |> Ref.string_of in
Db.Cluster.add_to_current_operations ~__context ~self:cluster ~key ~value
in
let check_disallow_unplug_false_fails self msg =
let check_disallow_unplug_false_fails self op msg =
let op_ref, _ =
List.hd (Db.Cluster.get_current_operations ~__context ~self:cluster)
in
Alcotest.check_raises msg
Api_errors.(
Server_error
(other_operation_in_progress, ["Cluster"; Ref.string_of cluster])
( other_operation_in_progress
, [
"Cluster"
; Ref.string_of cluster
; API.cluster_operation_to_string op
; op_ref
]
)
)
(fun () -> Xapi_pif.set_disallow_unplug ~__context ~self ~value:false)
in
Expand All @@ -598,14 +608,14 @@ let test_disallow_unplug_during_cluster_host_create () =
let test_with_current op =
Xapi_pif.set_disallow_unplug ~__context ~self:pIF ~value:true ;
add_op op ;
check_disallow_unplug_false_fails pIF
check_disallow_unplug_false_fails pIF op
"disallow_unplug cannot be set to false during cluster_host creation or \
enable on same PIF" ;
let other_pif = T.make_pif ~__context ~network ~host () in
check_successful_disallow_unplug true other_pif
"Should always be able to set disallow_unplug:true regardless of \
clustering operations" ;
check_disallow_unplug_false_fails other_pif
check_disallow_unplug_false_fails other_pif op
"disallow_unplug cannot be set to false during cluster_host creation or \
enable on any PIF" ;
let key = Context.get_task_id __context |> Ref.string_of in
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xapi/helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1685,7 +1685,7 @@ module Repeat_with_uniform_backoff : POLICY = struct
debug "Waiting for up to %f seconds before retrying..." this_timeout ;
let start = Unix.gettimeofday () in
( match e with
| Api_errors.Server_error (code, [cls; objref])
| Api_errors.Server_error (code, cls :: objref :: _)
when code = Api_errors.other_operation_in_progress ->
Early_wakeup.wait (cls, objref) this_timeout
| _ ->
Expand Down
23 changes: 15 additions & 8 deletions ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5720,14 +5720,21 @@ functor
if Helpers.i_am_srmaster ~__context ~sr then
List.iter
(fun vdi ->
if Db.VDI.get_current_operations ~__context ~self:vdi <> []
then
raise
(Api_errors.Server_error
( Api_errors.other_operation_in_progress
, [Datamodel_common._vdi; Ref.string_of vdi]
)
)
match Db.VDI.get_current_operations ~__context ~self:vdi with
| (op_ref, op_type) :: _ ->
raise
(Api_errors.Server_error
( Api_errors.other_operation_in_progress
, [
Datamodel_common._vdi
; Ref.string_of vdi
; API.vdi_operations_to_string op_type
; op_ref
]
)
)
| [] ->
()
)
(Db.SR.get_VDIs ~__context ~self:sr) ;
SR.mark_sr ~__context ~sr ~doc ~op
Expand Down
14 changes: 9 additions & 5 deletions ocaml/xapi/xapi_cluster_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ =
false

let report_concurrent_operations_error ~current_ops ~ref_str =
let current_ops_str =
let current_ops_ref_str, current_ops_str =
let op_to_str = Record_util.cluster_operation_to_string in
let ( >> ) f g x = g (f x) in
match current_ops with
| [] ->
failwith "No concurrent operation to report"
| [(_, cop)] ->
op_to_str cop
| [(op_ref, cop)] ->
(op_ref, op_to_str cop)
| l ->
"{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}"
( Printf.sprintf "{%s}" (String.concat "," (List.map fst l))
, Printf.sprintf "{%s}"
(String.concat "," (List.map (snd >> op_to_str) l))
)
in
Some
( Api_errors.other_operation_in_progress
, ["Cluster." ^ current_ops_str; ref_str]
, ["Cluster"; ref_str; current_ops_str; current_ops_ref_str]
)

(** Take an internal Cluster record and a proposed operation. Return None iff the operation
Expand Down
14 changes: 9 additions & 5 deletions ocaml/xapi/xapi_cluster_host_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ =
false

let report_concurrent_operations_error ~current_ops ~ref_str =
let current_ops_str =
let current_ops_ref_str, current_ops_str =
let op_to_str = Record_util.cluster_host_operation_to_string in
let ( >> ) f g x = g (f x) in
match current_ops with
| [] ->
failwith "No concurrent operation to report"
| [(_, cop)] ->
op_to_str cop
| [(op_ref, cop)] ->
(op_ref, op_to_str cop)
| l ->
"{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}"
( Printf.sprintf "{%s}" (String.concat "," (List.map fst l))
, Printf.sprintf "{%s}"
(String.concat "," (List.map (snd >> op_to_str) l))
)
in
Some
( Api_errors.other_operation_in_progress
, ["Cluster_host." ^ current_ops_str; ref_str]
, ["Cluster_host"; ref_str; current_ops_str; current_ops_ref_str]
)

(** Take an internal Cluster_host record and a proposed operation. Return None iff the operation
Expand Down
65 changes: 39 additions & 26 deletions ocaml/xapi/xapi_host_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ let all_operations = API.host_allowed_operations__all
(** Returns a table of operations -> API error options (None if the operation would be ok) *)
let valid_operations ~__context record _ref' =
let _ref = Ref.string_of _ref' in
let current_ops = List.map snd record.Db_actions.host_current_operations in
let current_ops = record.Db_actions.host_current_operations in
let table = Hashtbl.create 10 in
List.iter (fun x -> Hashtbl.replace table x None) all_operations ;
let set_errors (code : string) (params : string list)
Expand All @@ -49,40 +49,53 @@ let valid_operations ~__context record _ref' =
let is_creating_new x = List.mem x [`provision; `vm_resume; `vm_migrate] in
let is_removing x = List.mem x [`evacuate; `reboot; `shutdown] in
let creating_new =
List.fold_left (fun acc op -> acc || is_creating_new op) false current_ops
in
let removing =
List.fold_left (fun acc op -> acc || is_removing op) false current_ops
List.find_opt (fun (_, op) -> is_creating_new op) current_ops
in
let removing = List.find_opt (fun (_, op) -> is_removing op) current_ops in
List.iter
(fun op ->
if (is_creating_new op && removing) || (is_removing op && creating_new)
then
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string (List.hd current_ops)]
[op]
match (is_creating_new op, removing, is_removing op, creating_new) with
| true, Some (op_ref, op_type), _, _ | _, _, true, Some (op_ref, op_type)
->
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string op_type; op_ref]
[op]
| _ ->
()
)
(List.filter (fun x -> x <> `power_on) all_operations) ;
(* reboot, shutdown and apply_updates cannot run concurrently *)
if List.mem `reboot current_ops then
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `reboot]
[`shutdown; `apply_updates] ;
if List.mem `shutdown current_ops then
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `shutdown]
[`reboot; `apply_updates] ;
if List.mem `apply_updates current_ops then
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `apply_updates]
[`reboot; `shutdown; `enable] ;
Option.iter
(fun (op_ref, _op_type) ->
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `reboot; op_ref]
[`shutdown; `apply_updates]
)
(List.find_opt (fun (_, op) -> op = `reboot) current_ops) ;
Option.iter
(fun (op_ref, _op_type) ->
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `shutdown; op_ref]
[`reboot; `apply_updates]
)
(List.find_opt (fun (_, op) -> op = `shutdown) current_ops) ;
Option.iter
(fun (op_ref, _op_type) ->
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `apply_updates; op_ref]
[`reboot; `shutdown; `enable]
)
(List.find_opt (fun (_, op) -> op = `apply_updates) current_ops) ;
(* Prevent more than one provision happening at a time to prevent extreme dom0
load (in the case of the debian template). Once the template becomes a 'real'
template we can relax this. *)
if List.mem `provision current_ops then
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `provision]
[`provision] ;
Option.iter
(fun (op_ref, _op_type) ->
set_errors Api_errors.other_operation_in_progress
["host"; _ref; host_operation_to_string `provision; op_ref]
[`provision]
)
(List.find_opt (fun (_, op) -> op = `provision) current_ops) ;
(* The host must be disabled before reboots or shutdowns are permitted *)
if record.Db_actions.host_enabled then
set_errors Api_errors.host_not_disabled []
Expand Down
30 changes: 19 additions & 11 deletions ocaml/xapi/xapi_pif.ml
Original file line number Diff line number Diff line change
Expand Up @@ -926,17 +926,25 @@ let assert_cluster_host_operation_not_in_progress ~__context =
match Db.Cluster.get_all ~__context with
| [] ->
()
| cluster :: _ ->
let ops =
Db.Cluster.get_current_operations ~__context ~self:cluster
|> List.map snd
in
if List.mem `enable ops || List.mem `add ops then
raise
Api_errors.(
Server_error
(other_operation_in_progress, ["Cluster"; Ref.string_of cluster])
)
| cluster :: _ -> (
let ops = Db.Cluster.get_current_operations ~__context ~self:cluster in
match List.find_opt (fun (_, op) -> op = `enable || op = `add) ops with
| Some (op_ref, op_type) ->
raise
Api_errors.(
Server_error
( other_operation_in_progress
, [
"Cluster"
; Ref.string_of cluster
; API.cluster_operation_to_string op_type
; op_ref
]
)
)
| None ->
()
)

(* Block allowing unplug if
- a cluster host is enabled on this PIF
Expand Down
33 changes: 24 additions & 9 deletions ocaml/xapi/xapi_pool_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ type validity = Unknown | Allowed | Disallowed of string * string list
let compute_valid_operations ~__context record pool :
API.pool_allowed_operations -> validity =
let ref = Ref.string_of pool in
let current_ops = List.map snd record.Db_actions.pool_current_operations in
let current_ops = record.Db_actions.pool_current_operations in
let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in
let set_validity = Hashtbl.replace table in
(* Start by assuming all operations are allowed. *)
Expand All @@ -118,30 +118,45 @@ let compute_valid_operations ~__context record pool :
in
List.iter populate ops
in
let other_operation_in_progress =
(Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref])
let other_operation_in_progress waiting_op =
let additional_info =
match waiting_op with
| Some (op_ref, op_type) ->
[API.pool_allowed_operations_to_string op_type; op_ref]
| _ ->
[]
in
( Api_errors.other_operation_in_progress
, [Datamodel_common._pool; ref] @ additional_info
)
in
let is_current_op op =
List.exists (fun (_, current_op) -> op = current_op) current_ops
in
let is_current_op = Fun.flip List.mem current_ops in
let blocking =
List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table
in
let waiting = List.find_opt is_current_op waiting_ops in
let waiting =
List.find_opt
(fun (_, current_op) -> List.mem current_op waiting_ops)
current_ops
in
( match (blocking, waiting) with
| Some (_, reason), _ ->
| Some (_, reason), waiting_current_op ->
(* Mark all potentially blocking operations as invalid due
to the specific blocking operation's "in progress" error. *)
set_errors blocking_ops (reason, []) ;
(* Mark all waiting operations as invalid for the generic
"OTHER_OPERATION_IN_PROGRESS" reason. *)
set_errors waiting_ops other_operation_in_progress
set_errors waiting_ops (other_operation_in_progress waiting_current_op)
(* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this
invalidates all operations (with the reason partitioned
between whether the operation is blocking or waiting). *)
| None, Some _ ->
| None, (Some _ as waiting_current_op) ->
(* If there's no blocking operation in current operations, but
there is a waiting operation, invalidate all operations for the
generic reason. Again, this covers every operation. *)
set_errors all_operations other_operation_in_progress
set_errors all_operations (other_operation_in_progress waiting_current_op)
| None, None -> (
(* If there's no blocking or waiting operation in current
operations (i.e. current operations is empty), we can report
Expand Down
41 changes: 26 additions & 15 deletions ocaml/xapi/xapi_sr_operations.ml
Original file line number Diff line number Diff line change
Expand Up @@ -200,24 +200,35 @@ let valid_operations ~__context ?op record _ref' : table =
let check_parallel_ops ~__context _record =
let safe_to_parallelise = [`plug] in
let current_ops =
Xapi_stdext_std.Listext.List.setify (List.map snd current_ops)
List.sort_uniq
(fun (_ref1, op1) (_ref2, op2) -> compare op1 op2)
current_ops
in
(* If there are any current operations, all the non_parallelisable operations
must definitely be stopped *)
if current_ops <> [] then
set_errors Api_errors.other_operation_in_progress
["SR"; _ref; sr_operation_to_string (List.hd current_ops)]
(Xapi_stdext_std.Listext.List.set_difference all_ops safe_to_parallelise) ;
let all_are_parallelisable =
List.fold_left ( && ) true
(List.map (fun op -> List.mem op safe_to_parallelise) current_ops)
in
(* If not all are parallelisable (eg a vdi_resize), ban the otherwise
parallelisable operations too *)
if not all_are_parallelisable then
set_errors Api_errors.other_operation_in_progress
["SR"; _ref; sr_operation_to_string (List.hd current_ops)]
safe_to_parallelise
match current_ops with
| (current_op_ref, current_op_type) :: _ ->
set_errors Api_errors.other_operation_in_progress
["SR"; _ref; sr_operation_to_string current_op_type; current_op_ref]
(Xapi_stdext_std.Listext.List.set_difference all_ops
safe_to_parallelise
) ;
let non_parallelisable_op =
List.find_opt
(fun (_, op) -> not (List.mem op safe_to_parallelise))
current_ops
in
(* If not all are parallelisable (eg a vdi_resize), ban the otherwise
parallelisable operations too *)
Option.iter
(fun (op_ref, op_type) ->
set_errors Api_errors.other_operation_in_progress
["SR"; _ref; sr_operation_to_string op_type; op_ref]
safe_to_parallelise
)
non_parallelisable_op
| [] ->
()
in
let check_cluster_stack_compatible ~__context _record =
(* Check whether there are any conflicts with HA that prevent us from
Expand Down
Loading
Loading