47
47
from .sh import ShellUtils
48
48
from .ui_node import NodeMgmt
49
49
from .user_of_host import UserOfHost , UserNotFoundError
50
+ import crmsh .healthcheck
50
51
51
52
logger = log .setup_logger (__name__ )
52
53
logger_utils = log .LoggerUtils (logger )
74
75
"/etc/drbd.conf" , "/etc/drbd.d" , "/etc/ha.d/ldirectord.cf" , "/etc/lvm/lvm.conf" , "/etc/multipath.conf" ,
75
76
"/etc/samba/smb.conf" , SYSCONFIG_NFS , SYSCONFIG_PCMK , SYSCONFIG_SBD , PCMK_REMOTE_AUTH , WATCHDOG_CFG ,
76
77
PROFILES_FILE , CRM_CFG , SBD_SYSTEMD_DELAY_START_DIR )
77
- INIT_STAGES = ("ssh" , "csync2" , "csync2_remote" , "qnetd_remote" , "corosync" , "remote_auth" , "sbd" , "cluster" , "ocfs2" , "admin" , "qdevice" )
78
+
79
+ INIT_STAGES_EXTERNAL = ("ssh" , "csync2" , "corosync" , "sbd" , "cluster" , "ocfs2" , "admin" , "qdevice" )
80
+ INIT_STAGES_INTERNAL = ("csync2_remote" , "qnetd_remote" , "remote_auth" )
81
+ INIT_STAGES_ALL = INIT_STAGES_EXTERNAL + INIT_STAGES_INTERNAL
82
+ JOIN_STAGES_EXTERNAL = ("ssh" , "csync2" , "ssh_merge" , "cluster" )
78
83
79
84
80
85
class Context (object ):
@@ -232,15 +237,40 @@ def _validate_cluster_node(self):
232
237
"""
233
238
Validate cluster_node on join side
234
239
"""
235
- if self .cluster_node and self . type == ' join' :
240
+ if self .type == " join" and self . cluster_node :
236
241
user , node = _parse_user_at_host (self .cluster_node , None )
237
242
try :
238
243
# self.cluster_node might be hostname or IP address
239
244
ip_addr = socket .gethostbyname (node )
240
245
if utils .InterfacesInfo .ip_in_local (ip_addr ):
241
246
utils .fatal ("Please specify peer node's hostname or IP address" )
242
247
except socket .gaierror as err :
243
- utils .fatal ("\" {}\" : {}" .format (node , err ))
248
+ utils .fatal (f"\" { node } \" : { err } " )
249
+
250
+ def _validate_stage (self ):
251
+ """
252
+ Validate stage argument
253
+ """
254
+ if not self .stage :
255
+ if self .cluster_is_running :
256
+ utils .fatal ("Cluster is already running!" )
257
+ return
258
+
259
+ if self .type == "init" :
260
+ if self .stage not in INIT_STAGES_ALL :
261
+ utils .fatal (f"Invalid stage: { self .stage } (available stages: { ', ' .join (INIT_STAGES_EXTERNAL )} )" )
262
+ if self .stage in ("admin" , "qdevice" , "ocfs2" ) and not self .cluster_is_running :
263
+ utils .fatal (f"Cluster is inactive, can't run '{ self .stage } ' stage" )
264
+ if self .stage in ("corosync" , "cluster" ) and self .cluster_is_running :
265
+ utils .fatal (f"Cluster is active, can't run '{ self .stage } ' stage" )
266
+
267
+ elif self .type == "join" :
268
+ if self .stage not in JOIN_STAGES_EXTERNAL :
269
+ utils .fatal (f"Invalid stage: { self .stage } (available stages: { ', ' .join (JOIN_STAGES_EXTERNAL )} )" )
270
+ if self .stage and self .cluster_node is None :
271
+ utils .fatal (f"Can't use stage({ self .stage } ) without specifying cluster node" )
272
+ if self .stage in ("cluster" , ) and self .cluster_is_running :
273
+ utils .fatal (f"Cluster is active, can't run '{ self .stage } ' stage" )
244
274
245
275
def validate_option (self ):
246
276
"""
@@ -263,6 +293,7 @@ def validate_option(self):
263
293
self .skip_csync2 = utils .get_boolean (os .getenv ("SKIP_CSYNC2_SYNC" ))
264
294
if self .skip_csync2 and self .stage :
265
295
utils .fatal ("-x option or SKIP_CSYNC2_SYNC can't be used with any stage" )
296
+ self ._validate_stage ()
266
297
self ._validate_cluster_node ()
267
298
self ._validate_nodes_option ()
268
299
self ._validate_sbd_option ()
@@ -553,7 +584,7 @@ def my_hostname_resolves():
553
584
return False
554
585
555
586
556
- def check_prereqs (stage ):
587
+ def check_prereqs ():
557
588
warned = False
558
589
559
590
if not my_hostname_resolves ():
@@ -1710,6 +1741,9 @@ def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List
1710
1741
change_user_shell ('hacluster' )
1711
1742
swap_public_ssh_key_for_secondary_user (sh .cluster_shell (), seed_host , 'hacluster' )
1712
1743
1744
+ if _context .stage :
1745
+ setup_passwordless_with_other_nodes (seed_host , seed_user )
1746
+
1713
1747
1714
1748
def join_ssh_with_ssh_agent (
1715
1749
local_shell : sh .LocalShell ,
@@ -2367,46 +2401,87 @@ def decrease_expected_votes():
2367
2401
corosync .set_value ("quorum.expected_votes" , str (new_quorum ))
2368
2402
2369
2403
2404
+ def ssh_stage_finished ():
2405
+ """
2406
+ Dectect if the ssh stage is finished
2407
+ """
2408
+ feature_check = crmsh .healthcheck .PasswordlessHaclusterAuthenticationFeature ()
2409
+ return feature_check .check_quick () and feature_check .check_local ([utils .this_node ()])
2410
+
2411
+
2412
+ def csync2_stage_finished ():
2413
+ """
2414
+ Dectect if the csync2 stage is finished
2415
+ """
2416
+ return ServiceManager ().service_is_active (CSYNC2_SERVICE )
2417
+
2418
+
2419
+ def corosync_stage_finished ():
2420
+ """
2421
+ Dectect if the corosync stage is finished
2422
+ """
2423
+ return os .path .exists (corosync .conf ())
2424
+
2425
+
2426
+ INIT_STAGE_CHECKER = {
2427
+ # stage: (function, is_internal)
2428
+ "ssh" : (ssh_stage_finished , False ),
2429
+ "csync2" : (csync2_stage_finished , False ),
2430
+ "corosync" : (corosync_stage_finished , False ),
2431
+ "remote_auth" : (init_remote_auth , True ),
2432
+ "sbd" : (lambda : True , False ),
2433
+ "upgradeutil" : (init_upgradeutil , True ),
2434
+ "cluster" : (is_online , False )
2435
+ }
2436
+
2437
+
2438
+ JOIN_STAGE_CHECKER = {
2439
+ # stage: (function, is_internal)
2440
+ "ssh" : (ssh_stage_finished , False ),
2441
+ "csync2" : (csync2_stage_finished , False ),
2442
+ "ssh_merge" : (lambda : True , False ),
2443
+ "cluster" : (is_online , False )
2444
+ }
2445
+
2446
+
2447
+ def check_stage_dependency (stage ):
2448
+ stage_checker = INIT_STAGE_CHECKER if _context .type == "init" else JOIN_STAGE_CHECKER
2449
+ if stage not in stage_checker :
2450
+ return
2451
+ stage_order = list (stage_checker .keys ())
2452
+ for stage_name in stage_order :
2453
+ if stage == stage_name :
2454
+ break
2455
+ func , is_internal = stage_checker [stage_name ]
2456
+ if is_internal :
2457
+ func ()
2458
+ elif not func ():
2459
+ utils .fatal (f"Please run '{ stage_name } ' stage first" )
2460
+
2461
+
2370
2462
def bootstrap_init (context ):
2371
2463
"""
2372
2464
Init cluster process
2373
2465
"""
2374
2466
global _context
2375
2467
_context = context
2468
+ stage = _context .stage
2376
2469
2377
2470
init ()
2378
2471
2379
- stage = _context .stage
2380
- if stage is None :
2381
- stage = ""
2382
-
2383
- # vgfs stage requires running cluster, everything else requires inactive cluster,
2384
- # except ssh and csync2 (which don't care) and csync2_remote (which mustn't care,
2385
- # just in case this breaks ha-cluster-join on another node).
2386
- if stage in ("vgfs" , "admin" , "qdevice" , "ocfs2" ):
2387
- if not _context .cluster_is_running :
2388
- utils .fatal ("Cluster is inactive - can't run %s stage" % (stage ))
2389
- elif stage == "" :
2390
- if _context .cluster_is_running :
2391
- utils .fatal ("Cluster is currently active - can't run" )
2392
- elif stage not in ("ssh" , "csync2" , "csync2_remote" , "qnetd_remote" , "sbd" , "ocfs2" ):
2393
- if _context .cluster_is_running :
2394
- utils .fatal ("Cluster is currently active - can't run %s stage" % (stage ))
2395
-
2396
2472
_context .load_profiles ()
2397
2473
_context .init_sbd_manager ()
2398
2474
2399
- # Need hostname resolution to work, want NTP (but don't block csync2_remote)
2400
- if stage not in ('csync2_remote' , 'qnetd_remote' ):
2401
- check_tty ()
2402
- if not check_prereqs (stage ):
2403
- return
2404
- else :
2475
+ if stage in ('csync2_remote' , 'qnetd_remote' ):
2405
2476
args = _context .args
2406
- logger_utils .log_only_to_file ("args: {}" . format ( args ) )
2477
+ logger_utils .log_only_to_file (f "args: { args } " )
2407
2478
if len (args ) != 2 :
2408
- utils .fatal (f"Expected NODE argument to { stage } stage" )
2479
+ utils .fatal (f"Expected NODE argument for ' { stage } ' stage" )
2409
2480
_context .cluster_node = args [1 ]
2481
+ else :
2482
+ check_tty ()
2483
+ if not check_prereqs ():
2484
+ return
2410
2485
2411
2486
if stage and _context .cluster_is_running and \
2412
2487
not ServiceManager (shell = sh .ClusterShellAdaptorForLocalShell (sh .LocalShell ())).service_is_active (CSYNC2_SERVICE ):
@@ -2416,6 +2491,7 @@ def bootstrap_init(context):
2416
2491
_context .node_list_in_cluster = [utils .this_node ()]
2417
2492
2418
2493
if stage != "" :
2494
+ check_stage_dependency (stage )
2419
2495
globals ()["init_" + stage ]()
2420
2496
else :
2421
2497
init_ssh ()
@@ -2492,15 +2568,13 @@ def bootstrap_join(context):
2492
2568
2493
2569
check_tty ()
2494
2570
2495
- corosync_active = ServiceManager (sh .ClusterShellAdaptorForLocalShell (sh .LocalShell ())).service_is_active ("corosync.service" )
2496
- if corosync_active and _context .stage != "ssh" :
2497
- utils .fatal ("Abort: Cluster is currently active. Run this command on a node joining the cluster." )
2498
-
2499
- if not check_prereqs ("join" ):
2571
+ if not check_prereqs ():
2500
2572
return
2501
2573
2502
2574
if _context .stage != "" :
2503
2575
remote_user , cluster_node = _parse_user_at_host (_context .cluster_node , _context .current_user )
2576
+ init_upgradeutil ()
2577
+ check_stage_dependency (_context .stage )
2504
2578
globals ()["join_" + _context .stage ](cluster_node , remote_user )
2505
2579
else :
2506
2580
if not _context .yes_to_all and _context .cluster_node is None :
@@ -2527,7 +2601,6 @@ def bootstrap_join(context):
2527
2601
service_manager = ServiceManager ()
2528
2602
_context .node_list_in_cluster = utils .fetch_cluster_node_list_from_node (cluster_node )
2529
2603
setup_passwordless_with_other_nodes (cluster_node , remote_user )
2530
- join_remote_auth (cluster_node , remote_user )
2531
2604
_context .skip_csync2 = not service_manager .service_is_active (CSYNC2_SERVICE , cluster_node )
2532
2605
if _context .skip_csync2 :
2533
2606
service_manager .stop_service (CSYNC2_SERVICE , disable = True )
@@ -2557,14 +2630,6 @@ def join_ocfs2(peer_host, peer_user):
2557
2630
ocfs2_inst .join_ocfs2 (peer_host )
2558
2631
2559
2632
2560
- def join_remote_auth (node , user ):
2561
- if os .path .exists (PCMK_REMOTE_AUTH ):
2562
- utils .rmfile (PCMK_REMOTE_AUTH )
2563
- pcmk_remote_dir = os .path .dirname (PCMK_REMOTE_AUTH )
2564
- utils .mkdirs_owned (pcmk_remote_dir , mode = 0o750 , gid = "haclient" )
2565
- utils .touch (PCMK_REMOTE_AUTH )
2566
-
2567
-
2568
2633
def remove_qdevice ():
2569
2634
"""
2570
2635
Remove qdevice service and configuration from cluster
0 commit comments