12
12
from crmsh import sh
13
13
from crmsh import xmlutil
14
14
from crmsh import constants
15
+ from crmsh import cibquery
15
16
from crmsh .service_manager import ServiceManager
16
17
17
18
@@ -88,8 +89,8 @@ class SBD(command.UI):
88
89
- sbd purge
89
90
'''
90
91
name = "sbd"
91
- TIMEOUT_TYPES = ("watchdog" , "allocate" , "loop" , "msgwait" )
92
- DISKLESS_TIMEOUT_TYPES = ("watchdog" ,)
92
+ TIMEOUT_TYPES = ("watchdog" , "allocate" , "loop" , "msgwait" , "crashdump-watchdog" )
93
+ DISKLESS_TIMEOUT_TYPES = ("watchdog" , "crashdump-watchdog" )
93
94
SHOW_TYPES = ("disk_metadata" , "sysconfig" , "property" )
94
95
DISKLESS_SHOW_TYPES = ("sysconfig" , "property" )
95
96
PCMK_ATTRS = (
@@ -102,12 +103,15 @@ class SBD(command.UI):
102
103
PCMK_ATTRS_DISKLESS = ('stonith-watchdog-timeout' ,)
103
104
PARSE_RE = re .compile (
104
105
# Match keys with non-empty values, capturing possible suffix
105
- r'(\w+)(?:-(\w+))?=("[^"]+"|[ \w/\d; ]+)'
106
+ r'([\w-]+)-([\w-]+)=([ \w/\d]+)'
106
107
)
107
108
108
109
class SyntaxError (Exception ):
109
110
pass
110
111
112
+ class MissingRequiredException (Exception ):
113
+ pass
114
+
111
115
def __init__ (self ):
112
116
self .device_list_from_config : list [str ] = None
113
117
self .device_meta_dict_runtime : dict [str , int ] = None
@@ -271,43 +275,63 @@ def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str]:
271
275
logger .debug ("Parsed arguments: %s" , parameter_dict )
272
276
return parameter_dict
273
277
274
- @staticmethod
275
- def _adjust_timeout_dict (timeout_dict : dict ) -> dict :
276
- watchdog_timeout = timeout_dict .get ("watchdog" )
277
- msgwait_timeout = timeout_dict .get ("msgwait" )
278
- if watchdog_timeout and msgwait_timeout and msgwait_timeout < 2 * watchdog_timeout :
279
- logger .warning ("It's recommended to set msgwait timeout >= 2*watchdog timeout" )
280
- return timeout_dict
281
- if watchdog_timeout and not msgwait_timeout :
282
- timeout_dict ["msgwait" ] = 2 * watchdog_timeout
283
- logger .info ("No msgwait timeout specified, use 2*watchdog timeout: %s" , 2 * watchdog_timeout )
284
- return timeout_dict
285
- if msgwait_timeout and not watchdog_timeout :
286
- watchdog_timeout = msgwait_timeout // 2
287
- timeout_dict ["watchdog" ] = watchdog_timeout
288
- logger .info ("No watchdog timeout specified, use msgwait timeout/2: %s" , watchdog_timeout )
289
- return timeout_dict
278
+ def set_crashdump_option (self ):
279
+ '''
280
+ Set crashdump option for fence_sbd resource
281
+ '''
282
+ shell = sh .LocalShell ()
283
+ cib = xmlutil .text2elem (shell .get_stdout_or_raise_error (None , 'crm configure show xml' ))
284
+ ra = cibquery .ResourceAgent ("stonith" , "" , "fence_sbd" )
285
+ res_id_list = cibquery .has_primitive (cib , ra )
286
+ if not res_id_list :
287
+ logger .error ("No fence_sbd resource found" )
288
+ raise self .MissingRequiredException
289
+ crashdump_value = cibquery .get_parameter_value (cib , res_id_list [0 ], "crashdump" )
290
+ if utils .is_boolean_false (crashdump_value ):
291
+ cmd = f"crm resource param { res_id_list [0 ]} set crashdump 1"
292
+ shell .get_stdout_or_raise_error (None , cmd )
293
+ logger .info ("Set crashdump option for fence_sbd resource" )
294
+
295
+ def is_kdump_service_active (self ) -> bool :
296
+ result = True
297
+ for node in self .cluster_nodes :
298
+ if not self .service_manager .service_is_active ("kdump.service" , node ):
299
+ logger .error ("Kdump service is not active on %s" , node )
300
+ result = False
301
+ return result
290
302
291
303
def _configure_diskbase (self , parameter_dict : dict ):
292
304
'''
293
305
Configure disk-based SBD based on input parameters and runtime config
294
306
'''
295
307
update_dict = {}
308
+ timeout_dict = {
309
+ item : parameter_dict .get (item ) or self .device_meta_dict_runtime .get (item )
310
+ for item in self .TIMEOUT_TYPES if item != "crashdump-watchdog"
311
+ }
312
+
313
+ crashdump_watchdog_timeout = parameter_dict .get ("crashdump-watchdog" )
314
+ if crashdump_watchdog_timeout :
315
+ if not self .is_kdump_service_active ():
316
+ raise self .MissingRequiredException
317
+ self .set_crashdump_option ()
318
+ timeout_dict ["msgwait" ] = 2 * timeout_dict ["watchdog" ] + crashdump_watchdog_timeout
319
+ logger .info ("Set msgwait timeout to 2*watchdog + crashdump-watchdog: %s" , timeout_dict ["msgwait" ])
320
+ update_dict ["SBD_TIMEOUT_ACTION" ] = "flush,crashdump"
321
+ update_dict ["SBD_OPTS" ] = f"-C { crashdump_watchdog_timeout } "
322
+
323
+ if timeout_dict ["msgwait" ] < 2 * timeout_dict ["watchdog" ]:
324
+ logger .warning ("It's recommended to set msgwait timeout >= 2*watchdog timeout" )
325
+ return
326
+
296
327
watchdog_device = parameter_dict .get ("watchdog-device" )
297
328
if watchdog_device != self .watchdog_device_from_config :
298
329
update_dict ["SBD_WATCHDOG_DEV" ] = watchdog_device
299
- timeout_dict = {k : v for k , v in parameter_dict .items () if k in self .TIMEOUT_TYPES }
300
- is_subdict_timeout = utils .is_subdict (timeout_dict , self .device_meta_dict_runtime )
301
330
302
- if is_subdict_timeout and not update_dict :
331
+ if timeout_dict == self . device_meta_dict_runtime and not update_dict :
303
332
logger .info ("No change in SBD configuration" )
304
333
return
305
334
306
- if not is_subdict_timeout :
307
- timeout_dict = self ._adjust_timeout_dict (timeout_dict )
308
- # merge runtime timeout dict into parameter timeout dict without overwriting
309
- timeout_dict = {** self .device_meta_dict_runtime , ** timeout_dict }
310
-
311
335
sbd_manager = sbd .SBDManager (
312
336
device_list_to_init = self .device_list_from_config ,
313
337
timeout_dict = timeout_dict ,
@@ -320,17 +344,29 @@ def _configure_diskless(self, parameter_dict: dict):
320
344
Configure diskless SBD based on input parameters and runtime config
321
345
'''
322
346
update_dict = {}
347
+ timeout_dict = {}
323
348
watchdog_timeout = parameter_dict .get ("watchdog" )
324
349
if watchdog_timeout and watchdog_timeout != self .watchdog_timeout_from_config :
325
350
update_dict ["SBD_WATCHDOG_TIMEOUT" ] = str (watchdog_timeout )
326
351
watchdog_device = parameter_dict .get ("watchdog-device" )
327
352
if watchdog_device != self .watchdog_device_from_config :
328
353
update_dict ["SBD_WATCHDOG_DEV" ] = watchdog_device
354
+ crashdump_watchdog_timeout = parameter_dict .get ("crashdump-watchdog" )
355
+ if crashdump_watchdog_timeout :
356
+ if not self .is_kdump_service_active ():
357
+ raise self .MissingRequiredException
358
+ update_dict ["SBD_TIMEOUT_ACTION" ] = "flush,crashdump"
359
+ update_dict ["SBD_OPTS" ] = f"-C { crashdump_watchdog_timeout } -Z"
360
+ sbd_watchdog_timeout = watchdog_timeout or self .watchdog_timeout_from_config
361
+ stonith_watchdog_timeout = sbd_watchdog_timeout + crashdump_watchdog_timeout
362
+ logger .info ("Set stonith-watchdog-timeout to SBD_WATCHDOG_TIMEOUT + crashdump-watchdog: %s" , stonith_watchdog_timeout )
363
+ timeout_dict ["stonith-watchdog" ] = stonith_watchdog_timeout
329
364
if not update_dict :
330
365
logger .info ("No change in SBD configuration" )
331
366
return
332
367
333
368
sbd_manager = sbd .SBDManager (
369
+ timeout_dict = timeout_dict ,
334
370
update_dict = update_dict ,
335
371
diskless_sbd = True
336
372
)
@@ -426,6 +462,7 @@ def do_configure(self, context, *args) -> bool:
426
462
if args [0 ] == "show" :
427
463
self ._configure_show (args )
428
464
return True
465
+
429
466
parameter_dict = self ._parse_args (args )
430
467
if sbd .SBDUtils .is_using_disk_based_sbd ():
431
468
self ._configure_diskbase (parameter_dict )
@@ -439,6 +476,8 @@ def do_configure(self, context, *args) -> bool:
439
476
if usage :
440
477
print (usage )
441
478
return False
479
+ except self .MissingRequiredException :
480
+ return False
442
481
443
482
def do_purge (self , context ) -> bool :
444
483
'''
0 commit comments