diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml
index 0bfe67ce80..6dc6b89781 100644
--- a/.github/workflows/crmsh-ci.yml
+++ b/.github/workflows/crmsh-ci.yml
@@ -415,6 +415,21 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
flags: integration
+ functional_test_migration:
+ runs-on: ubuntu-24.04
+ timeout-minutes: 40
+ steps:
+ - uses: actions/checkout@v4
+ - name: functional test for migration
+ run: |
+ echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json
+ sudo systemctl restart docker.service
+ $CONTAINER_SCRIPT `$GET_INDEX_OF migration` && $CONTAINER_SCRIPT -d && $CONTAINER_SCRIPT -u `$GET_INDEX_OF migration`
+ - uses: codecov/codecov-action@v4
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+ flags: integration
+
original_regression_test:
runs-on: ubuntu-24.04
timeout-minutes: 40
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000000..05a3e1ed69
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include crmsh *.txt
diff --git a/codecov.yml b/codecov.yml
index 47000a652d..9c67c4c4a8 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -8,7 +8,7 @@ coverage:
threshold: 0.35%
codecov:
notify:
- after_n_builds: 28
+ after_n_builds: 29
comment:
- after_n_builds: 28
+ after_n_builds: 29
layout: "condensed_header, flags, files, condensed_footer"
diff --git a/crmsh/cibquery.py b/crmsh/cibquery.py
new file mode 100644
index 0000000000..c2ffa9d347
--- /dev/null
+++ b/crmsh/cibquery.py
@@ -0,0 +1,51 @@
+"""utilities for parsing CIB xml"""
+import dataclasses
+import typing
+
+import lxml.etree
+
+from crmsh import constants
+
+
+@dataclasses.dataclass(frozen=True)
+class ResourceAgent:
+ m_class: str
+ m_provider: typing.Optional[str]
+ m_type: str
+
+
+@dataclasses.dataclass(frozen=True)
+class ClusterNode:
+ node_id: int
+ uname: str
+
+
+def get_configured_resource_agents(cib: lxml.etree.Element) -> typing.Set[ResourceAgent]:
+ return set(
+ ResourceAgent(e.get('class'), e.get('provider'), e.get('type'))
+ for e in cib.xpath('/cib/configuration/resources//primitive')
+ )
+
+
+def has_primitive_filesystem_with_fstype(cib: lxml.etree.Element, fstype: str) -> bool:
+ return bool(cib.xpath(
+ '/cib/configuration/resources//primitive[@class="ocf" and @provider="heartbeat" and @type="Filesystem"]'
+ f'/instance_attributes/nvpair[@name="fstype" and @value="{fstype}"]'
+ ))
+
+def get_cluster_nodes(cib: lxml.etree.Element) -> list[ClusterNode]:
+ """Return a list of cluster nodes, excluding pacemaker-remote nodes"""
+ result = list()
+ for element in cib.xpath(constants.XML_NODE_PATH):
+ node_id = element.get('id')
+ uname = element.get('uname')
+ if element.get('type') == 'remote':
+ xpath = "//primitive[@provider='pacemaker' and @type='remote']/instance_attributes/nvpair[@name='server' and @value='{}']".format(
+ uname if uname is not None else node_id
+ )
+ if cib.xpath(xpath):
+ continue
+ assert node_id
+ assert uname
+ result.append(ClusterNode(int(node_id), uname))
+ return result
diff --git a/crmsh/corosync.py b/crmsh/corosync.py
index e2919f7c4e..d73b26ec8f 100644
--- a/crmsh/corosync.py
+++ b/crmsh/corosync.py
@@ -85,7 +85,7 @@ def configure_two_node(removing: bool = False, qdevice_adding: bool = False) ->
def conf():
- return os.getenv('COROSYNC_MAIN_CONFIG_FILE', '/etc/corosync/corosync.conf')
+ return os.environ.get('COROSYNC_MAIN_CONFIG_FILE', '/etc/corosync/corosync.conf')
def check_tools():
diff --git a/crmsh/migration-supported-resource-agents.txt b/crmsh/migration-supported-resource-agents.txt
new file mode 100644
index 0000000000..af24a367f9
--- /dev/null
+++ b/crmsh/migration-supported-resource-agents.txt
@@ -0,0 +1,147 @@
+ocf:heartbeat:CTDB
+ocf:heartbeat:ClusterMon
+ocf:heartbeat:Delay
+ocf:heartbeat:Dummy
+ocf:heartbeat:Filesystem
+ocf:heartbeat:IPaddr2
+ocf:heartbeat:IPsrcaddr
+ocf:heartbeat:IPv6addr
+ocf:heartbeat:LVM-activate
+ocf:heartbeat:MailTo
+ocf:heartbeat:NodeUtilization
+ocf:heartbeat:Raid1
+ocf:heartbeat:Route
+ocf:heartbeat:SAPDatabase
+ocf:heartbeat:SAPInstance
+ocf:heartbeat:SendArp
+ocf:heartbeat:Squid
+ocf:heartbeat:Stateful
+ocf:heartbeat:VirtualDomain
+ocf:heartbeat:WAS
+ocf:heartbeat:WAS6
+ocf:heartbeat:Xinetd
+ocf:heartbeat:aliyun-vpc-move-ip
+ocf:heartbeat:apache
+ocf:heartbeat:aws-vpc-move-ip
+ocf:heartbeat:aws-vpc-route53
+ocf:heartbeat:awseip
+ocf:heartbeat:awsvip
+ocf:heartbeat:azure-events
+ocf:heartbeat:azure-events-az
+ocf:heartbeat:azure-lb
+ocf:heartbeat:conntrackd
+ocf:heartbeat:corosync-qnetd
+ocf:heartbeat:crypt
+ocf:heartbeat:db2
+ocf:heartbeat:dhcpd
+ocf:heartbeat:docker
+ocf:heartbeat:docker-compose
+ocf:heartbeat:dummypy
+ocf:heartbeat:ethmonitor
+ocf:heartbeat:exportfs
+ocf:heartbeat:galera
+ocf:heartbeat:garbd
+ocf:heartbeat:gcp-ilb
+ocf:heartbeat:gcp-pd-move
+ocf:heartbeat:gcp-vpc-move-ip
+ocf:heartbeat:gcp-vpc-move-vip
+ocf:heartbeat:iSCSILogicalUnit
+ocf:heartbeat:iSCSITarget
+ocf:heartbeat:iface-bridge
+ocf:heartbeat:iface-macvlan
+ocf:heartbeat:iface-vlan
+ocf:heartbeat:ldirectord
+ocf:heartbeat:lvmlockd
+ocf:heartbeat:mariadb
+ocf:heartbeat:mdraid
+ocf:heartbeat:mpathpersist
+ocf:heartbeat:mysql
+ocf:heartbeat:mysql-proxy
+ocf:heartbeat:named
+ocf:heartbeat:nfsnotify
+ocf:heartbeat:nfsserver
+ocf:heartbeat:nginx
+ocf:heartbeat:nvmet-namespace
+ocf:heartbeat:nvmet-port
+ocf:heartbeat:nvmet-subsystem
+ocf:heartbeat:oraasm
+ocf:heartbeat:oracle
+ocf:heartbeat:oralsnr
+ocf:heartbeat:osceip
+ocf:heartbeat:ovsmonitor
+ocf:heartbeat:pgagent
+ocf:heartbeat:pgsql
+ocf:heartbeat:podman
+ocf:heartbeat:portblock
+ocf:heartbeat:postfix
+ocf:heartbeat:powervs-subnet
+ocf:heartbeat:rabbitmq-cluster
+ocf:heartbeat:rabbitmq-server-ha
+ocf:heartbeat:redis
+ocf:heartbeat:rsyncd
+ocf:heartbeat:sfex
+ocf:heartbeat:sg_persist
+ocf:heartbeat:slapd
+ocf:heartbeat:storage-mon
+ocf:heartbeat:symlink
+ocf:heartbeat:tomcat
+ocf:suse:aws-vpc-move-ip
+ocf:suse:SAPHanaController
+ocf:suse:SAPHanaFilesystem
+ocf:suse:SAPHanaTopology
+stonith:fence_aliyun
+stonith:fence_alom
+stonith:fence_apc
+stonith:fence_apc-snmp
+stonith:fence_aws
+stonith:fence_azure-arm
+stonith:fence_bladecenter
+stonith:fence_brocade
+stonith:fence_cisco-mds
+stonith:fence_cisco-ucs
+stonith:fence_compute
+stonith:fence_docker
+stonith:fence_drac5
+stonith:fence_eaton-snmp
+stonith:fence_eaton-ssh
+stonith:fence_emerson
+stonith:fence_eps
+stonith:fence_gce
+stonith:fence_hds-cb
+stonith:fence_hpblade
+stonith:fence_ibm-powervs
+stonith:fence_ibm-vpc
+stonith:fence_ibmblade
+stonith:fence_ibmz
+stonith:fence_ifmib
+stonith:fence_ilo-moonshot
+stonith:fence_ilo-mp
+stonith:fence_ilo-ssh
+stonith:fence_ilo2
+stonith:fence_intelmodular
+stonith:fence_ipdu
+stonith:fence_ipmilan
+stonith:fence_ironic
+stonith:fence_kdump
+stonith:fence_ldom
+stonith:fence_lpar
+stonith:fence_mpath
+stonith:fence_netio
+stonith:fence_openstack
+stonith:fence_pve
+stonith:fence_raritan
+stonith:fence_rcd-serial
+stonith:fence_redfish
+stonith:fence_rhevm
+stonith:fence_rsa
+stonith:fence_rsb
+stonith:fence_sanbox2
+stonith:fence_sbd
+stonith:fence_scsi
+stonith:fence_vbox
+stonith:fence_virsh
+stonith:fence_vmware
+stonith:fence_vmware-rest
+stonith:fence_wti
+stonith:fence_xenapi
+stonith:fence_zvm
diff --git a/crmsh/migration.py b/crmsh/migration.py
new file mode 100644
index 0000000000..7e54cbd4c2
--- /dev/null
+++ b/crmsh/migration.py
@@ -0,0 +1,670 @@
+import argparse
+import dataclasses
+import enum
+import glob
+import importlib.resources
+import ipaddress
+import itertools
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import threading
+import tempfile
+import typing
+
+import lxml.etree
+
+from crmsh import cibquery
+from crmsh import constants
+from crmsh import corosync
+from crmsh import corosync_config_format
+from crmsh import iproute2
+from crmsh import parallax
+from crmsh import sh
+from crmsh import utils
+from crmsh import xmlutil
+from crmsh.prun import prun
+
+logger = logging.getLogger(__name__)
+
+
+SAP_HANA_RESOURCE_AGENTS = {
+ cibquery.ResourceAgent('ocf', 'suse', 'SAPHana'),
+ cibquery.ResourceAgent('ocf', 'suse', 'SAPHanaController'),
+ cibquery.ResourceAgent('ocf', 'suse', 'SAPHanaTopology'),
+}
+
+
+class MigrationFailure(Exception):
+ pass
+
+
+class CheckReturnCode(enum.IntEnum):
+ ALREADY_MIGRATED = 0
+ PASS_NO_AUTO_FIX = 1
+ PASS_NEED_AUTO_FIX = 2
+ BLOCKED_NEED_MANUAL_FIX = 3
+ SSH_ERROR = 255
+
+
+class CheckResultHandler:
+ LEVEL_ERROR = 1
+ LEVEL_WARN = 2
+
+ def log_info(self, fmt: str, *args):
+ raise NotImplementedError
+
+ def handle_problem(self, need_auto_fix: bool, is_blocker: bool, level: int, title: str, detail: typing.Iterable[str]):
+ raise NotImplementedError
+
+ def end(self):
+ raise NotImplementedError
+
+ def to_check_return_code(self) -> CheckReturnCode:
+ raise NotImplementedError
+
+
+class CheckResultJsonHandler(CheckResultHandler):
+ def __init__(self, indent: typing.Optional[int] = None):
+ self._indent = indent
+ self.json_result = {
+ "problems": [],
+ }
+
+ def log_info(self, fmt: str, *args):
+ logger.debug(fmt, *args)
+
+ def handle_problem(self, need_auto_fix: bool, is_blocker: bool, level: int, title: str, detail: typing.Iterable[str]):
+ self.json_result["problems"].append({
+ "need_auto_fix": need_auto_fix,
+ "is_blocker": is_blocker,
+ "level": level,
+ "title": title,
+ "descriptions": detail if isinstance(detail, list) else list(detail),
+ })
+
+ def end(self):
+ json.dump(
+ self.json_result,
+ sys.stdout,
+ ensure_ascii=False,
+ indent=self._indent,
+ )
+ sys.stdout.write('\n')
+
+ def to_check_return_code(self) -> CheckReturnCode:
+ ret = CheckReturnCode.ALREADY_MIGRATED
+ for problem in self.json_result['problems']:
+ if problem.get('is_blocker', False):
+ ret = max(CheckReturnCode.BLOCKED_NEED_MANUAL_FIX, ret)
+ elif problem.get('need_auto_fix'):
+ ret = max(CheckReturnCode.PASS_NEED_AUTO_FIX, ret)
+ else:
+ ret = max(CheckReturnCode.PASS_NO_AUTO_FIX, ret)
+ return ret
+
+
+class CheckResultInteractiveHandler(CheckResultHandler):
+ def __init__(self):
+ self.block_migration = False
+ self.has_problems = False
+ self.need_auto_fix = False
+
+ def log_info(self, fmt: str, *args):
+ self.write_in_color(sys.stdout, constants.GREEN, '[INFO] ')
+ print(fmt % args)
+
+ def handle_problem(self, need_auto_fix: bool, is_blocker: bool, level:int, title: str, details: typing.Iterable[str]):
+ self.has_problems = True
+ self.block_migration = self.block_migration or is_blocker
+ self.need_auto_fix = self.need_auto_fix or need_auto_fix
+ match level:
+ case self.LEVEL_ERROR:
+ self.write_in_color(sys.stdout, constants.YELLOW, '[FAIL] ')
+ case self.LEVEL_WARN:
+ self.write_in_color(sys.stdout, constants.YELLOW, '[WARN] ')
+ print(title)
+ for line in details:
+ sys.stdout.write(' ')
+ print(line)
+
+ @staticmethod
+ def write_in_color(f, color: str, text: str):
+ if f.isatty():
+ f.write(color)
+ f.write(text)
+ f.write(constants.END)
+ else:
+ f.write(text)
+
+ def end(self):
+ sys.stdout.write('\n')
+
+ def to_check_return_code(self) -> CheckReturnCode:
+ if self.block_migration:
+ ret = CheckReturnCode.BLOCKED_NEED_MANUAL_FIX
+ elif self.need_auto_fix:
+ ret = CheckReturnCode.PASS_NEED_AUTO_FIX
+ elif self.has_problems:
+ ret = CheckReturnCode.PASS_NO_AUTO_FIX
+ else:
+ ret = CheckReturnCode.ALREADY_MIGRATED
+ return ret
+
+
+def migrate():
+ try:
+ match _check_impl(local=False, json='', summary=False):
+ case CheckReturnCode.ALREADY_MIGRATED:
+ logger.info("This cluster works on SLES 16. No migration is needed.")
+ return 0
+ case CheckReturnCode.PASS_NO_AUTO_FIX:
+ logger.info("This cluster works on SLES 16 with some warnings. Please fix the remaining warnings manually.")
+ return 0
+ case CheckReturnCode.PASS_NEED_AUTO_FIX:
+ logger.info('Starting migration...')
+ migrate_corosync_conf()
+ logger.info('Finished migration.')
+ return 0
+ case _:
+ raise MigrationFailure('Unable to start migration.')
+ except MigrationFailure as e:
+ logger.error('%s', e)
+ return 1
+
+
+def check(args: typing.Sequence[str]) -> int:
+ parser = argparse.ArgumentParser(args[0])
+ parser.add_argument('--json', nargs='?', const='pretty', choices=['oneline', 'pretty'])
+ parser.add_argument('--local', action='store_true')
+ parsed_args = parser.parse_args(args[1:])
+ ret = _check_impl(parsed_args.local or parsed_args.json is not None, parsed_args.json, parsed_args.json is None)
+ if not parsed_args.json:
+ print('****** summary ******')
+ match ret:
+ case CheckReturnCode.ALREADY_MIGRATED:
+ CheckResultInteractiveHandler.write_in_color(sys.stdout, constants.GREEN, '[INFO]')
+ sys.stdout.write(' This cluster works on SLES 16. No migration is needed.\n')
+ case CheckReturnCode.PASS_NO_AUTO_FIX:
+ CheckResultInteractiveHandler.write_in_color(sys.stdout, constants.GREEN, '[PASS]')
+ sys.stdout.write(' This cluster works on SLES 16 with some warnings. Please fix the remaining warnings manually.\n')
+ case CheckReturnCode.PASS_NEED_AUTO_FIX:
+ CheckResultInteractiveHandler.write_in_color(sys.stdout, constants.GREEN, '[INFO]')
+ sys.stdout.write(' Please run "crm cluster health sles16 --fix" on on any one of above nodes.\n')
+ CheckResultInteractiveHandler.write_in_color(sys.stdout, constants.GREEN, '[PASS]')
+ sys.stdout.write(' This cluster is good to migrate to SLES 16.\n')
+ case _:
+ CheckResultInteractiveHandler.write_in_color(sys.stdout, constants.RED, '[FAIL]')
+ sys.stdout.write(' The pacemaker cluster stack can not migrate to SLES 16.\n')
+ return ret
+
+
+def _check_impl(local: bool, json: str, summary: bool) -> CheckReturnCode:
+ assert not summary or not bool(json)
+ assert local or not bool(json)
+ if not local:
+ check_remote_yield = check_remote()
+ next(check_remote_yield)
+ else:
+ check_remote_yield = itertools.repeat(0)
+ match json:
+ case 'oneline':
+ handler = CheckResultJsonHandler()
+ case 'pretty':
+ handler = CheckResultJsonHandler(indent=2)
+ case _:
+ handler = CheckResultInteractiveHandler()
+ if local:
+ check_remote_yield = itertools.repeat(0)
+ check_local(handler)
+ else:
+ check_remote_yield = check_remote()
+ next(check_remote_yield)
+ print('------ node: localhost ------')
+ check_local(handler)
+ print('\n------ cib ------')
+ check_global(handler)
+ handler.end()
+ ret = handler.to_check_return_code()
+ if check_remote_yield:
+ remote_ret = next(check_remote_yield)
+ ret = max(remote_ret, ret)
+ return ret
+
+
+def check_local(handler: CheckResultHandler):
+ check_dependency_version(handler)
+ check_unsupported_corosync_features(handler)
+
+
+def check_remote():
+ handler = CheckResultInteractiveHandler()
+ class CheckRemoteThread(threading.Thread):
+ def run(self):
+ self.result = prun.prun({
+ node: 'crm cluster health sles16 --local --json=oneline'
+ for node in utils.list_cluster_nodes_except_me()
+ })
+ prun_thread = CheckRemoteThread()
+ prun_thread.start()
+ yield
+ prun_thread.join()
+ ret = CheckReturnCode.ALREADY_MIGRATED
+ for host, result in prun_thread.result.items():
+ sys.stdout.write(f'------ node: {host} ------\n')
+ match result:
+ case prun.SSHError() as e:
+ handler.write_in_color(
+ sys.stdout, constants.YELLOW,
+ str(e)
+ )
+ sys.stdout.write('\n')
+ ret = CheckReturnCode.SSH_ERROR
+ case prun.ProcessResult() as result:
+ try:
+ check_result = json.loads(result.stdout.decode('utf-8'))
+ except (UnicodeDecodeError, json.JSONDecodeError):
+ print(result.stdout.decode('utf-8', 'backslashreplace'))
+ handler.write_in_color(
+ sys.stderr, constants.YELLOW,
+ result.stderr.decode('utf-8', 'backslashreplace')
+ )
+ sys.stdout.write('\n')
+ # cannot pass the exit status through,
+ # as all failed exit status become 1 in ui_context.Context.run()
+ ret = CheckReturnCode.BLOCKED_NEED_MANUAL_FIX
+ else:
+ handler = CheckResultInteractiveHandler()
+ problems = check_result.get("problems", list())
+ for problem in problems:
+ handler.handle_problem(
+ problem.get("need_auto_fix", False), problem.get("is_blocker", False),
+ problem.get("level", handler.LEVEL_ERROR),
+ problem.get("title", ""), problem.get("descriptions"),
+ )
+ handler.end()
+ ret = handler.to_check_return_code()
+ yield ret
+
+
+def check_global(handler: CheckResultHandler):
+ cib = xmlutil.text2elem(sh.LocalShell().get_stdout_or_raise_error(None, 'crm configure show xml'))
+ check_cib_schema_version(handler, cib)
+ check_unsupported_resource_agents(handler, cib)
+
+
+def check_dependency_version(handler: CheckResultHandler):
+ handler.log_info('Checking dependency version...')
+ shell = sh.LocalShell()
+ out = shell.get_stdout_or_raise_error(None, 'corosync -v')
+ _check_version_range(
+ handler,
+ 'Corosync', (3,),
+ re.compile(r"version\s+'(\d+(?:\.\d+)*)'"),
+ shell.get_stdout_or_raise_error(None, 'corosync -v'),
+ )
+
+
+def _check_version_range(
+ handler: CheckResultHandler, component_name: str,
+ minimum: tuple,
+ pattern,
+ text: str,
+):
+ match = pattern.search(text)
+ if not match:
+ handler.handle_problem(
+ True, True, handler.LEVEL_ERROR,
+ f'{component_name} version not supported',
+ [
+ 'Unknown version:',
+ text,
+ ],
+ )
+ else:
+ version = tuple(int(x) for x in match.group(1).split('.'))
+ if not minimum <= version:
+ handler.handle_problem(
+ True, True, handler.LEVEL_ERROR,
+ f'{component_name} version not supported', [
+ 'Supported version: {} >= {}'.format(
+ component_name,
+ '.'.join(str(x) for x in minimum),
+ ),
+ f'Actual version: {component_name} == {match.group(1)}',
+ ],
+ )
+
+
+def check_unsupported_corosync_features(handler: CheckResultHandler):
+ handler.log_info("Checking used corosync features...")
+ conf_path = corosync.conf()
+ with open(conf_path, 'r', encoding='utf-8') as f:
+ config = corosync_config_format.DomParser(f).dom()
+ corosync.ConfParser.transform_dom_with_list_schema(config)
+ if config['totem'].get('rrp_mode', None) in {'active', 'passive'}:
+ handler.handle_problem(
+ True, False, handler.LEVEL_WARN,
+ 'Corosync RRP is deprecated in corosync 3.', [
+ 'Run "crm health sles16 --fix" to migrate it to knet multilink.',
+ ],
+ )
+ _check_unsupported_corosync_transport(handler, config)
+
+
+def _check_unsupported_corosync_transport(handler: CheckResultHandler, dom):
+ transport = dom['totem'].get('transport', None)
+ if transport == 'knet':
+ return
+ if transport is None:
+ try:
+ dom['totem']['interface'][0]['bindnetaddr']
+ except KeyError:
+ # looks like a corosync 3 config
+ return
+ handler.handle_problem(
+ True, False, handler.LEVEL_WARN,
+ f'Corosync transport "{transport}" is deprecated in corosync 3. Please use knet.', [],
+ )
+
+
+def migrate_corosync_conf():
+ conf_path = corosync.conf()
+ with open(conf_path, 'r', encoding='utf-8') as f:
+ config = corosync_config_format.DomParser(f).dom()
+ corosync.ConfParser.transform_dom_with_list_schema(config)
+ logger.info('Migrating corosync configuration...')
+ migrate_corosync_conf_impl(config)
+ shutil.copy(conf_path, conf_path + '.bak')
+ with utils.open_atomic(conf_path, 'w', fsync=True, encoding='utf-8') as f:
+ corosync_config_format.DomSerializer(config, f)
+ os.fchmod(f.fileno(), 0o644)
+ logger.info(
+ 'Finish migrating corosync configuration. The original configuration is renamed to %s.bak',
+ os.path.basename(conf_path),
+ )
+ for host, result in prun.pcopy_to_remote(conf_path, utils.list_cluster_nodes_except_me(), conf_path).items():
+ match result:
+ case None:
+ pass
+ case prun.PRunError() as e:
+ logger.error("Failed to copy crmsh.conf to host %s: %s", host, e)
+
+
+def migrate_corosync_conf_impl(config):
+ assert 'totem' in config
+ corosync.ConfParser.transform_dom_with_list_schema(config)
+ migrate_transport(config)
+ migrate_crypto(config)
+ migrate_rrp(config)
+ # TODO: other migrations
+
+
+def migrate_transport(dom):
+ match dom['totem'].get('transport', None):
+ case 'knet':
+ return
+ case 'udpu':
+ migrate_udpu(dom)
+ case 'udp':
+ migrate_multicast(dom)
+ case _:
+ # corosync 2 defaults to "udp"
+ try:
+ dom['totem']['interface'][0]['bindnetaddr']
+ except KeyError:
+ # looks like a corosync 3 config
+ pass
+ if 'nodelist' not in dom:
+ migrate_multicast(dom)
+ else:
+ # looks like a corosync 3 config
+ pass
+
+
+def migrate_udpu(dom):
+ dom['totem']['transport'] = 'knet'
+ if 'interface' in dom['totem']:
+ for interface in dom['totem']['interface']:
+ _migrate_totem_interface(interface)
+ if 'quorum' in dom:
+ dom['quorum'].pop('expected_votes', None)
+ logger.info("Upgrade totem.transport to knet.")
+
+
+def migrate_multicast(dom):
+ dom['totem']['transport'] = 'knet'
+ for interface in dom['totem']['interface']:
+ _migrate_totem_interface(interface)
+ logger.info("Generating nodelist according to CIB...")
+ with open(constants.CIB_RAW_FILE, 'rb') as f:
+ cib = lxml.etree.parse(f)
+ cib_nodes = cibquery.get_cluster_nodes(cib)
+ assert 'nodelist' not in dom
+ nodelist = list()
+ node_interfaces = {
+ x[0]: iproute2.IPAddr(json.loads(x[1][1]))
+ for x in parallax.parallax_call([x.uname for x in cib_nodes], 'ip -j addr')
+ if x[1][0] == 0
+ }
+ with tempfile.TemporaryDirectory(prefix='crmsh-migration-') as dir_name:
+ node_configs = {
+ x[0]: x[1]
+ for x in parallax.parallax_slurp([x.uname for x in cib_nodes], dir_name, corosync.conf())
+ }
+ for node in cib_nodes:
+ assert node.uname in node_configs
+ bindnetaddr_fixer = _BindnetaddrFixer(node_interfaces[node.uname].interfaces())
+ with open(node_configs[node.uname], 'r', encoding='utf-8') as f:
+ root = corosync_config_format.DomParser(f).dom()
+ corosync.ConfParser.transform_dom_with_list_schema(root)
+ interfaces = root['totem']['interface']
+ addresses = {f'ring{i}_addr': bindnetaddr_fixer.fix_bindnetaddr(x['bindnetaddr']) for i, x in enumerate(interfaces)}
+ logger.info("Node %s: %s: %s", node.node_id, node.uname, addresses)
+ nodelist.append({
+ 'nodeid': node.node_id,
+ 'name': node.uname,
+ } | addresses)
+ dom['nodelist'] = {'node': nodelist}
+ if 'quorum' in dom:
+ dom['quorum'].pop('expected_votes', None)
+ logger.info("Unset quorum.expected_votes.")
+ logger.info("Upgrade totem.transport to knet.")
+
+
+def _migrate_totem_interface(interface):
+ # remove udp-only items
+ interface.pop('mcastaddr', None)
+ interface.pop('bindnetaddr', None)
+ interface.pop('broadcast', None)
+ interface.pop('ttl', None)
+ ringnumber = interface.pop('ringnumber', None)
+ if ringnumber is not None:
+ interface['linknumber'] = ringnumber
+
+
+class _BindnetaddrFixer:
+ # crmsh generates incorrect bindnetaddr when joining a corosync 2 multicast cluster
+ def __init__(self, interfaces: typing.Iterable[iproute2.IPInterface]):
+ self._interface_addresses = {addr_info for interface in interfaces for addr_info in interface.addr_info}
+
+ def fix_bindnetaddr(self, bindnetaddr: str):
+ bind_address = ipaddress.ip_address(bindnetaddr)
+ for interface_address in self._interface_addresses:
+ if bind_address in interface_address.network:
+ return str(interface_address.ip)
+ return bindnetaddr
+
+
+def migrate_crypto(dom):
+ try:
+ # corosync 3 change the default hash algorithm to sha256 when `secauth` is enabled
+ if dom['totem'].get('crypto_hash', None) == 'sha1':
+ dom['totem']['crypto_hash'] = 'sha256'
+ logger.info('Upgrade totem.crypto_hash from "sha1" to "sha256".')
+ except KeyError:
+ dom['totem']['crypto_hash'] = 'sha256'
+
+
+def migrate_rrp(dom):
+ try:
+ nodes = dom['nodelist']['node']
+ except KeyError:
+ return
+ is_rrp = any('ring1_addr' in node for node in nodes)
+ if not is_rrp:
+ return
+ try:
+ rrp_mode = dom['totem']['rrp_mode']
+ del dom['totem']['rrp_mode']
+ if rrp_mode == 'active':
+ dom['totem']['link_mode'] = 'active'
+ except KeyError:
+ pass
+ assert all('nodeid' in node for node in nodes)
+ if any('name' not in node for node in nodes):
+ populate_node_name(nodes)
+
+
+def populate_node_name(nodelist):
+ # cannot use utils.list_cluster_nodes, as pacemaker is not running
+ with open(constants.CIB_RAW_FILE, 'rb') as f:
+ cib = lxml.etree.parse(f)
+ cib_nodes = {node.node_id: node for node in cibquery.get_cluster_nodes(cib)}
+ for node in nodelist:
+ node_id = int(node['nodeid'])
+ node['name'] = cib_nodes[node_id].uname
+
+
+def check_unsupported_resource_agents(handler: CheckResultHandler, cib: lxml.etree.Element):
+ handler.log_info("Checking used resource agents...")
+ ocf_resource_agents = list()
+ stonith_resource_agents = list()
+ for resource_agent in cibquery.get_configured_resource_agents(cib):
+ if resource_agent.m_class == 'ocf':
+ ocf_resource_agents.append(resource_agent)
+ elif resource_agent.m_class == 'stonith':
+ if resource_agent.m_type == 'external/sbd':
+ handler.handle_problem(
+ False, False, handler.LEVEL_ERROR,
+ 'stonith:external/sbd is removed. Please use stonith:fence_sbd', [
+ ])
+ else:
+ stonith_resource_agents.append(resource_agent)
+ else:
+ raise ValueError(f'Unrecognized resource agent {resource_agent}')
+ _check_saphana_resource_agent(handler, ocf_resource_agents)
+ class TitledCheckResourceHandler(CheckResultHandler):
+ def __init__(self, parent: CheckResultHandler, title: str):
+ self._parent = parent
+ self._title= title
+ def log_info(self, fmt: str, *args):
+ return self._parent.log_info(fmt, *args)
+ def handle_problem(self, need_auto_fix: bool, is_blocker: bool, level: int, title: str, detail: typing.Iterable[str]):
+ return self._parent.handle_problem(need_auto_fix, is_blocker, level, self._title, detail)
+ supported_resource_agents = _load_supported_resource_agents()
+ _check_removed_resource_agents(
+ TitledCheckResourceHandler(handler, "The following resource agents is removed in SLES 16."),
+ supported_resource_agents,
+ (agent for agent in ocf_resource_agents if agent not in SAP_HANA_RESOURCE_AGENTS),
+ )
+ _check_removed_resource_agents(
+ TitledCheckResourceHandler(handler, "The following fence agents is removed in SLES 16."),
+ supported_resource_agents,
+ stonith_resource_agents,
+ )
+ _check_ocfs2(handler, cib)
+
+
+def _check_saphana_resource_agent(handler: CheckResultHandler, resource_agents: typing.Iterable[cibquery.ResourceAgent]):
+ # "SAPHana" appears only in SAPHanaSR Classic
+ has_sap_hana_sr_resources = any(agent in SAP_HANA_RESOURCE_AGENTS for agent in resource_agents)
+ if has_sap_hana_sr_resources:
+ if 0 != subprocess.run(
+ ['rpm', '-q', 'SAPHanaSR-angi'],
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ ).returncode:
+ handler.handle_problem(
+ False, True, handler.LEVEL_ERROR,
+ 'SAPHanaSR Classic is removed in SLES 16.', [
+ 'Before migrating to SLES 16, replace it with SAPHanaSR-angi.',
+ ],
+ )
+
+
+def _load_supported_resource_agents() -> typing.Set[cibquery.ResourceAgent]:
+ ret = set()
+ with importlib.resources.files('crmsh').joinpath('migration-supported-resource-agents.txt').open(
+ 'r', encoding='ascii',
+ ) as r:
+ for line in r:
+ parts = line.strip().split(':', 3)
+ m_class = parts[0]
+ m_provider = parts[1] if len(parts) == 3 else None
+ m_type = parts[-1]
+ ret.add(cibquery.ResourceAgent(m_class, m_provider, m_type))
+ return ret
+
+
+
+def _check_removed_resource_agents(
+ handler: CheckResultHandler,
+ supported_resource_agents: typing.Set[cibquery.ResourceAgent],
+ resource_agents: typing.Iterable[cibquery.ResourceAgent],
+):
+ unsupported_resource_agents = [x for x in resource_agents if x not in supported_resource_agents]
+ if unsupported_resource_agents:
+ handler.handle_problem(
+ False, True, handler.LEVEL_ERROR,
+ '', [
+ '* ' + ':'.join(x for x in dataclasses.astuple(resource_agent) if x is not None)
+ for resource_agent in unsupported_resource_agents
+ ],
+ )
+
+
+def _check_ocfs2(handler: CheckResultHandler, cib: lxml.etree.Element):
+ if cibquery.has_primitive_filesystem_with_fstype(cib, 'ocfs2'):
+ handler.handle_problem(
+ False, True, handler.LEVEL_ERROR,
+ 'OCFS2 is not supported in SLES 16. Please use GFS2.', [],
+ )
+
+def check_cib_schema_version(handler: CheckResultHandler, cib: lxml.etree.Element):
+ schema_version = cib.get('validate-with')
+ if schema_version is None:
+ handler.handle_problem(
+ False, False, handler.LEVEL_WARN,
+ "The CIB is validated with unknown schema version.", []
+ )
+ return
+ version_match = re.match(r'^pacemaker-(\d+)\.(\d+)$', schema_version)
+ if version_match is None:
+ handler.handle_problem(
+ False, False, handler.LEVEL_WARN,
+ f"The CIB is validated with unknown schema version {schema_version}", []
+ )
+ return
+ version = tuple(int(x) for x in version_match.groups())
+ latest_schema_version = _get_latest_cib_schema_version()
+ if version != latest_schema_version:
+ handler.handle_problem(
+ False, False, handler.LEVEL_WARN,
+ "The CIB is not validated with the latest schema version.", [
+ f'* Latest version: {".".join(str(i) for i in latest_schema_version)}',
+ f'* Current version: {".".join(str(i) for i in version)}',
+ ]
+ )
+
+
+def _get_latest_cib_schema_version() -> tuple[int, int]:
+ return max(tuple(int(s) for s in x.groups()) for x in (
+ re.match(r'^pacemaker-(\d+)\.(\d+)\.rng$', filename)
+ for filename in glob.iglob('pacemaker-*.rng', root_dir='/usr/share/pacemaker')
+ ) if x is not None)
diff --git a/crmsh/prun/runner.py b/crmsh/prun/runner.py
index fc5f577c52..12682f4f7a 100644
--- a/crmsh/prun/runner.py
+++ b/crmsh/prun/runner.py
@@ -1,5 +1,6 @@
# runner.py - fork and exec multiple child processes concurrently
import asyncio
+import contextlib
import fcntl
import os
import select
@@ -46,16 +47,18 @@ def add_task(self, task: Task):
self._tasks.append(task)
def run(self, timeout_seconds: int = -1):
- awaitable = asyncio.gather(
- *[
- self._concurrency_limit(self._concurrency_limiter, self._run(task))
- for task in self._tasks
- ],
- return_exceptions=True,
- )
- if timeout_seconds > 0:
- awaitable = self._timeout_limit(timeout_seconds, awaitable)
- return asyncio.get_event_loop_policy().get_event_loop().run_until_complete(awaitable)
+ with contextlib.closing(asyncio.new_event_loop()) as event_loop:
+ asyncio.set_event_loop(event_loop)
+ awaitable = asyncio.gather(
+ *[
+ self._concurrency_limit(self._concurrency_limiter, self._run(task))
+ for task in self._tasks
+ ],
+ return_exceptions=True,
+ )
+ if timeout_seconds > 0:
+ awaitable = self._timeout_limit(timeout_seconds, awaitable)
+ return event_loop.run_until_complete(awaitable)
async def _timeout_limit(self, timeout_seconds: int, awaitable: typing.Awaitable):
assert timeout_seconds > 0
diff --git a/crmsh/sh.py b/crmsh/sh.py
index c4f537eb37..006e964ae2 100644
--- a/crmsh/sh.py
+++ b/crmsh/sh.py
@@ -206,7 +206,7 @@ def get_stdout_or_raise_error(
user: typing.Optional[str],
cmd: str,
success_exit_status: typing.Optional[typing.Set[int]] = None,
- ):
+ ) -> str:
result = self.su_subprocess_run(
user, cmd,
stdout=subprocess.PIPE,
diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py
index 12b56fef5d..3406f166a3 100644
--- a/crmsh/ui_cluster.py
+++ b/crmsh/ui_cluster.py
@@ -9,7 +9,7 @@
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import crmsh.parallax
-from . import command, sh, healthcheck
+from . import command, sh, healthcheck, migration
from . import utils
from . import scripts
from . import completers as compl
@@ -780,45 +780,64 @@ def do_geo_init_arbitrator(self, context, *args):
bootstrap.bootstrap_arbitrator(geo_context)
return True
- @command.completers(compl.choice(['hawk2']))
+ @command.completers(compl.choice([
+ 'hawk2',
+ 'sles16',
+ ]))
def do_health(self, context, *args):
'''
Extensive health check.
'''
if not args:
return self._do_health_legacy(context, *args)
- parser = argparse.ArgumentParser()
- parser.add_argument('component', choices=['hawk2'])
+ parser = argparse.ArgumentParser('health')
+ parser.add_argument('component', choices=['hawk2', 'sles16'])
parser.add_argument('-f', '--fix', action='store_true')
- parsed_args = parser.parse_args(args)
- if parsed_args.component == 'hawk2':
- nodes = utils.list_cluster_nodes()
- if parsed_args.fix:
- if not healthcheck.feature_full_check(healthcheck.PasswordlessPrimaryUserAuthenticationFeature(), nodes):
+ parsed_args, remaining_args = parser.parse_known_args(args)
+ match parsed_args.component:
+ case 'hawk2':
+ if remaining_args:
+ logger.error('Known arguments: %s', ' '.join(remaining_args))
+ return False
+ nodes = utils.list_cluster_nodes()
+ if parsed_args.fix:
+ if not healthcheck.feature_full_check(healthcheck.PasswordlessPrimaryUserAuthenticationFeature(), nodes):
+ try:
+ healthcheck.feature_fix(
+ healthcheck.PasswordlessPrimaryUserAuthenticationFeature(),
+ nodes,
+ utils.ask,
+ )
+ except healthcheck.FixFailure:
+ logger.error('Cannot fix automatically.')
+ return False
try:
- healthcheck.feature_fix(
- healthcheck.PasswordlessPrimaryUserAuthenticationFeature(),
- nodes,
- utils.ask,
- )
+ healthcheck.feature_fix(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes, utils.ask)
+ logger.info("hawk2: passwordless ssh authentication: OK.")
+ return True
except healthcheck.FixFailure:
- logger.error('Cannot fix automatically.')
+ logger.error("hawk2: passwordless ssh authentication: FAIL.")
return False
- try:
- healthcheck.feature_fix(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes, utils.ask)
- logger.info("hawk2: passwordless ssh authentication: OK.")
- return True
- except healthcheck.FixFailure:
- logger.error("hawk2: passwordless ssh authentication: FAIL.")
- return False
- else:
- if healthcheck.feature_full_check(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes):
- logger.info("hawk2: passwordless ssh authentication: OK.")
- return True
else:
- logger.error("hawk2: passwordless ssh authentication: FAIL.")
- logger.warning('Please run "crm cluster health hawk2 --fix"')
+ if healthcheck.feature_full_check(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes):
+ logger.info("hawk2: passwordless ssh authentication: OK.")
+ return True
+ else:
+ logger.error("hawk2: passwordless ssh authentication: FAIL.")
+ logger.warning('Please run "crm cluster health hawk2 --fix"')
+ return False
+ case 'sles16':
+ try:
+ if parsed_args.fix:
+ migration.migrate()
+ else:
+ return 0 == migration.check(['sles16'] + remaining_args)
+ except migration.MigrationFailure as e:
+ logger.error('%s', e)
return False
+ case _:
+ logger.error('Unknown component: %s', parsed_args.component)
+ return False
def _do_health_legacy(self, context, *args):
params = self._args_implicit(context, args, 'nodes')
diff --git a/crmsh/utils.py b/crmsh/utils.py
index 24f99549b7..27bafcf768 100644
--- a/crmsh/utils.py
+++ b/crmsh/utils.py
@@ -36,7 +36,7 @@
import crmsh.parallax
import crmsh.user_of_host
-from . import config, sh, corosync
+from . import config, sh, corosync, cibquery
from . import userdir
from . import constants
from . import options
@@ -1731,12 +1731,11 @@ def get_address_list_from_corosync_conf():
return corosync.get_values("nodelist.node.ring0_addr")
-def list_cluster_nodes(no_reg=False):
+def list_cluster_nodes(no_reg=False) -> list[str]:
'''
Returns a list of nodes in the cluster.
'''
from . import xmlutil
- cib = None
rc, out, err = ShellUtils().get_stdout_stderr(constants.CIB_QUERY, no_reg=no_reg)
# When cluster service running
if rc == 0:
@@ -1749,17 +1748,7 @@ def list_cluster_nodes(no_reg=False):
cib = xmlutil.file2cib_elem(cib_path)
if cib is None:
return None
-
- node_list = []
- for node in cib.xpath(constants.XML_NODE_PATH):
- name = node.get('uname') or node.get('id')
- # exclude remote node
- if node.get('type') == 'remote':
- xpath = f"//primitive[@provider='pacemaker' and @type='remote']/instance_attributes/nvpair[@name='server' and @value='{name}']"
- if cib.xpath(xpath):
- continue
- node_list.append(name)
- return node_list
+ return [x.uname for x in cibquery.get_cluster_nodes(cib)]
def cluster_run_cmd(cmd, node_list=[]):
diff --git a/crmsh/xmlutil.py b/crmsh/xmlutil.py
index f85a7d749a..eddf8b51cb 100644
--- a/crmsh/xmlutil.py
+++ b/crmsh/xmlutil.py
@@ -4,6 +4,8 @@
import os
import subprocess
+import typing
+
from lxml import etree, doctestcompare
import copy
import bz2
@@ -26,7 +28,7 @@
logger_utils = log.LoggerUtils(logger)
-def xmlparse(f):
+def xmlparse(f: typing.IO[typing.AnyStr]) -> etree.Element:
try:
cib_elem = etree.parse(f).getroot()
except Exception as msg:
@@ -122,7 +124,7 @@ def cibdump2tmp():
return None
-def text2elem(text):
+def text2elem(text: str) -> etree.Element:
"""
Convert a text format CIB to
an XML tree.
diff --git a/data-manifest b/data-manifest
index 1b549b114e..ce5f25d4d1 100644
--- a/data-manifest
+++ b/data-manifest
@@ -83,6 +83,7 @@ test/features/environment.py
test/features/geo_setup.feature
test/features/gfs2.feature
test/features/healthcheck.feature
+test/features/migration.feature
test/features/ocfs2.feature
test/features/qdevice_options.feature
test/features/qdevice_setup_remove.feature
@@ -187,6 +188,7 @@ test/unittests/scripts/workflows/10-webserver.xml
test/unittests/test_bootstrap.py
test/unittests/test_bugs.py
test/unittests/test_cib.py
+test/unittests/test_cibquery.py
test/unittests/test_cliformat.py
test/unittests/test_cluster_fs.py
test/unittests/test.conf
@@ -199,6 +201,7 @@ test/unittests/test_crashtest_utils.py
test/unittests/test_gv.py
test/unittests/test_handles.py
test/unittests/test_lock.py
+test/unittests/test_migration.py
test/unittests/test_objset.py
test/unittests/test_parallax.py
test/unittests/test_parse.py
diff --git a/doc/crm.8.adoc b/doc/crm.8.adoc
index 7a165f56c0..b4ce1f7339 100644
--- a/doc/crm.8.adoc
+++ b/doc/crm.8.adoc
@@ -996,14 +996,34 @@ See "crm cluster help geo_join" or "crm cluster geo_join --help"
[[cmdhelp.cluster.health,Cluster health check]]
==== `health`
+Usage 1: General Health Check
+
Runs a larger set of tests and queries on all nodes in the cluster to
verify the general system health and detect potential problems.
-Usage:
...............
health
...............
+Usage 2: Topic-Specified Health Check
+
+Verifies the health of a specified topic.
+
+...............
+health hawk2|sles16 [--fix]
+...............
+
+* `hawk2`: check or fix key-based ssh authentication for user hacluster, which
+is needed by hawk2.
+* `sles16`: check whether the cluster is good to migrate to SLES 16.
+
+The optional `--fix` argument attempts to automatically resolve any detected
+issues.
+
+.Note on sles16
+****************************
+`--fix` is only available after the OS is migrated to SLES 16.
+
[[cmdhelp.cluster.init,Initializes a new HA cluster,From Code]]
==== `init`
See "crm cluster help init" or "crm cluster init --help"
diff --git a/etc/profiles.yml b/etc/profiles.yml
index ad66cac5ab..3ab980c705 100644
--- a/etc/profiles.yml
+++ b/etc/profiles.yml
@@ -23,7 +23,7 @@ default:
sbd.watchdog_timeout: 15
knet-default:
- corosync.totem.crypto_hash: sha1
+ corosync.totem.crypto_hash: sha256
corosync.totem.crypto_cipher: aes256
microsoft-azure:
diff --git a/test/features/migration.feature b/test/features/migration.feature
new file mode 100644
index 0000000000..51d8a4507d
--- /dev/null
+++ b/test/features/migration.feature
@@ -0,0 +1,158 @@
+# vim: sw=2 sts=2
+Feature: migration
+
+ Test migration and pre-migration checks
+ Need nodes: hanode1 hanode2
+
+ Scenario: Run pre-migration checks when cluster services are running.
+ Given Cluster service is "stopped" on "hanode1"
+ And Cluster service is "stopped" on "hanode2"
+ And Run "crm cluster init -y -N hanode2" OK on "hanode1"
+ When Try "crm cluster health sles16" on "hanode1"
+ Then Expected return code is "0"
+ And Expected "[INFO] This cluster works on SLES 16. No migration is needed." in stdout
+
+ Scenario: Run pre-migration checks with cluster services stopped.
+ When Run "crm cluster stop --all" on "hanode1"
+ And Run "crm cluster stop --all" on "hanode2"
+ And Try "crm cluster health sles16" on "hanode1"
+ Then Expected return code is "0"
+ And Expected "[INFO] This cluster works on SLES 16. No migration is needed." in stdout
+
+
+ Scenario: Should run fixes.
+ When Try "crm cluster health sles16 --fix" on "hanode1"
+ Then Expected return code is "0"
+
+ Scenario: run pre-migration checks against corosync.conf generated in crmsh-4.6
+ When Run "rm -f /etc/corosync/corosync.conf" on "hanode1"
+ And Write multi lines to file "/etc/corosync/corosync.conf" on "hanode1"
+ """
+ # Please read the corosync.conf.5 manual page
+ totem {
+ version: 2
+ cluster_name: hacluster
+ clear_node_high_bit: yes
+ interface {
+ ringnumber: 0
+ mcastport: 5405
+ ttl: 1
+ }
+
+ transport: udpu
+ crypto_hash: sha1
+ crypto_cipher: aes256
+ token: 5000
+ join: 60
+ max_messages: 20
+ token_retransmits_before_loss_const: 10
+ }
+
+ logging {
+ fileline: off
+ to_stderr: no
+ to_logfile: yes
+ logfile: /var/log/cluster/corosync.log
+ to_syslog: yes
+ debug: off
+ timestamp: on
+ logger_subsys {
+ subsys: QUORUM
+ debug: off
+ }
+
+ }
+
+ quorum {
+
+ # Enable and configure quorum subsystem (default: off)
+ # see also corosync.conf.5 and votequorum.5
+ provider: corosync_votequorum
+ expected_votes: 2
+ two_node: 1
+ }
+
+ nodelist {
+ node {
+ ring0_addr: @hanode1.ip.0
+ name: hanode1
+ nodeid: 1
+ }
+
+ node {
+ ring0_addr: @hanode2.ip.0
+ name: hanode2
+ nodeid: 2
+ }
+
+ }
+ """
+ And Run "crm cluster copy /etc/corosync/corosync.conf" on "hanode1"
+ And Try "crm cluster health sles16" on "hanode1"
+ Then Expected return code is "1"
+ And Expect stdout contains snippets ["[PASS] This cluster is good to migrate to SLES 16.", "[INFO] Please run \"crm cluster health sles16 --fix\" on on any one of above nodes.", "[WARN] Corosync transport \"udpu\" is deprecated in corosync 3. Please use knet.", "----- node: localhost -----", "----- node: hanode2 -----"].
+
+ Scenario: Run fixes against corosync.conf generated in crmsh-4.6
+ When Try "crm cluster health sles16 --fix" on "hanode1"
+ Then Expected return code is "0"
+
+ Scenario: Run fixes against multicast corosync.conf containing incorrect bindnetaddr.
+ When Run "rm -f /etc/corosync/corosync.conf" on "hanode1"
+ And Write multi lines to file "/etc/corosync/corosync.conf" on "hanode1"
+ """
+ # Please read the corosync.conf.5 manual page
+ totem {
+ version: 2
+ cluster_name: hacluster
+ clear_node_high_bit: yes
+ interface {
+ ringnumber: 0
+ bindnetaddr: @hanode1.ip.0
+ mcastaddr: 239.247.90.152
+ mcastport: 5405
+ ttl: 1
+ }
+
+ crypto_hash: sha1
+ crypto_cipher: aes256
+ token: 5000
+ join: 60
+ max_messages: 20
+ token_retransmits_before_loss_const: 10
+ }
+
+ logging {
+ fileline: off
+ to_stderr: no
+ to_logfile: no
+ logfile: /var/log/cluster/corosync.log
+ to_syslog: yes
+ debug: off
+ timestamp: on
+ logger_subsys {
+ subsys: QUORUM
+ debug: off
+ }
+
+ }
+
+ quorum {
+
+ # Enable and configure quorum subsystem (default: off)
+ # see also corosync.conf.5 and votequorum.5
+ provider: corosync_votequorum
+ expected_votes: 2
+ two_node: 1
+ }
+ """
+ And Run "crm cluster copy /etc/corosync/corosync.conf" on "hanode1"
+ And Try "crm cluster health sles16 --fix" on "hanode1"
+ Then Expected return code is "0"
+ And Run "grep -F 'ring0_addr: @hanode2.ip.0' /etc/corosync/corosync.conf" OK
+
+ Scenario: Run pre-migration checks when some of the nodes are offline.
+ When Run "systemctl stop sshd" on "hanode2"
+ And Try "crm cluster health sles16" on "hanode1"
+ Then Expected return code is "1"
+ And Expect stdout contains snippets ["Cannot create SSH connection to", "----- node: localhost -----", "----- node: hanode2 -----"].
+
diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py
index 0a8a5063d6..b44d1660cd 100644
--- a/test/features/steps/step_implementation.py
+++ b/test/features/steps/step_implementation.py
@@ -1,3 +1,4 @@
+import ast
import re
import time
import os
@@ -18,8 +19,8 @@
def _parse_str(text):
- return text[1:-1].encode('utf-8').decode('unicode_escape')
-_parse_str.pattern='".*"'
+ return ast.literal_eval(text)
+_parse_str.pattern='"([^"]|\\")*?"'
behave.use_step_matcher("cfparse")
@@ -170,6 +171,13 @@ def step_impl(context, msg):
context.stderr = None
+@then('Expect stdout contains snippets [{snippets:str+}].')
+def step_impl(context, snippets):
+ for snippet in snippets:
+ assert_in(snippet, context.stdout)
+ context.stdout = None
+
+
@then('Expected regex "{reg_str}" in stdout')
def step_impl(context, reg_str):
res = re.search(reg_str, context.stdout)
diff --git a/test/run-functional-tests b/test/run-functional-tests
index dbdc255455..20dfb20183 100755
--- a/test/run-functional-tests
+++ b/test/run-functional-tests
@@ -228,8 +228,6 @@ deploy_ha_node() {
create_node() {
- info "Loading container image $CONTAINER_IMAGE..."
-
echo 16777216 > /proc/sys/net/core/rmem_max
echo 16777216 > /proc/sys/net/core/wmem_max
info "Create ha specific container networks..."
diff --git a/test/unittests/test_cibquery.py b/test/unittests/test_cibquery.py
new file mode 100644
index 0000000000..e8b7e268ff
--- /dev/null
+++ b/test/unittests/test_cibquery.py
@@ -0,0 +1,110 @@
+import unittest
+
+import lxml.etree
+
+from crmsh import cibquery
+from crmsh.cibquery import ResourceAgent
+
+
+class TestDataObjectResourceAgent(unittest.TestCase):
+ def test_eq(self):
+ self.assertEqual(ResourceAgent('foo', None, 'bar'), ResourceAgent('foo', None, 'bar'))
+
+ def test_set_eq(self):
+ self.assertSetEqual({ResourceAgent('foo', None, 'bar')}, {ResourceAgent('foo', None, 'bar')})
+
+
+class TestCibQuery(unittest.TestCase):
+ _TEST_DATA = """
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+ def setUp(self) -> None:
+ self.cib = lxml.etree.fromstring(self._TEST_DATA)
+
+ def test_get_resource_agents(self):
+ self.assertSetEqual(
+ {
+ ResourceAgent('ocf', 'heartbeat', 'IPaddr2'),
+ ResourceAgent('stonith', None, 'external/sbd'),
+ ResourceAgent('ocf', 'heartbeat', 'Filesystem'),
+ ResourceAgent('ocf', 'pacemaker', 'controld'),
+ },
+ cibquery.get_configured_resource_agents(self.cib),
+ )
+
+ def test_has_primitive_filesystem_with_fstype(self):
+ self.assertTrue(cibquery.has_primitive_filesystem_with_fstype(self.cib, 'ocfs2'))
+ self.assertFalse(cibquery.has_primitive_filesystem_with_fstype(self.cib, 'foo'))
diff --git a/test/unittests/test_migration.py b/test/unittests/test_migration.py
new file mode 100644
index 0000000000..1ab78cad10
--- /dev/null
+++ b/test/unittests/test_migration.py
@@ -0,0 +1,81 @@
+import re
+import unittest
+from unittest import mock
+
+import lxml.etree
+
+from crmsh import migration, cibquery
+
+
+class TestCheckRemovedResourceAgents(unittest.TestCase):
+ def setUp(self):
+ self._handler = mock.Mock(migration.CheckResultHandler)
+
+ def test_load_supported_resource_agents(self):
+ s = migration._load_supported_resource_agents()
+ self.assertIn(cibquery.ResourceAgent('ocf', 'heartbeat', 'IPaddr2'), s)
+ self.assertIn(cibquery.ResourceAgent('stonith', None, 'fence_sbd'), s)
+ self.assertNotIn(cibquery.ResourceAgent('foo', None, 'bar'), s)
+
+ def test_check_removed_resource_agents(self):
+ migration._check_removed_resource_agents(
+ self._handler,
+ {cibquery.ResourceAgent('foo', None, 'bar')},
+ [cibquery.ResourceAgent('foo', 'bar', 'qux')]
+ )
+ self._handler.handle_problem.assert_called()
+
+ def test_check_version_range(self):
+ def check_fn(x):
+ migration._check_version_range(
+ self._handler,
+ 'foo',
+ (1, 1,),
+ re.compile(r'^foo\s+(\d+(?:.\d+)*)'),
+ x,
+ )
+ check_fn('foo 0')
+ self._handler.handle_problem.assert_called()
+ self._handler.handle_problem.reset_mock()
+ check_fn('foo 0.9')
+ self._handler.handle_problem.assert_called()
+ self._handler.handle_problem.reset_mock()
+ check_fn('foo 0.9.99')
+ self._handler.handle_problem.assert_called()
+ self._handler.handle_problem.reset_mock()
+ check_fn('foo 1')
+ self._handler.handle_problem.assert_called()
+ self._handler.handle_problem.reset_mock()
+ check_fn('foo 1.1')
+ self._handler.handle_problem.assert_not_called()
+ check_fn('foo 1.1.0')
+ self._handler.handle_problem.assert_not_called()
+ check_fn('foo 1.1.1')
+ self._handler.handle_problem.assert_not_called()
+ check_fn('foo 1.2')
+ self._handler.handle_problem.assert_not_called()
+ check_fn('foo 2')
+ self._handler.handle_problem.assert_not_called()
+ check_fn('foo 2.0')
+ self._handler.handle_problem.assert_not_called()
+
+ @mock.patch('glob.iglob')
+ def test_get_latest_cib_schema_version(self, mock_iglob: mock.MagicMock):
+ mock_iglob.return_value = iter([
+ 'pacemaker-0.1.rng', 'pacemaker-1.9.rng', 'pacemaker-1.11.rng', 'pacemaker-next.rng',
+ ])
+ self.assertEqual((1, 11), migration._get_latest_cib_schema_version())
+
+ @mock.patch('crmsh.migration._get_latest_cib_schema_version')
+ def test_check_cib_schema_version(self, mock_get_latest_cib_schema_version):
+ cib = lxml.etree.fromstring('')
+ mock_get_latest_cib_schema_version.return_value = (3, 10)
+ handler = mock.Mock(migration.CheckResultHandler)
+ migration.check_cib_schema_version(handler, cib)
+ handler.handle_problem.assert_called_with(
+ False, False, handler.LEVEL_WARN,
+ "The CIB is not validated with the latest schema version.", [
+ '* Latest version: 3.10',
+ '* Current version: 3.9',
+ ]
+ )
diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py
index fd1237571c..8ecf5c9eda 100644
--- a/test/unittests/test_utils.py
+++ b/test/unittests/test_utils.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals
# Copyright (C) 2014 Kristoffer Gronlund
# See COPYING for license information.
#
@@ -7,7 +6,6 @@
import os
import socket
import re
-import subprocess
import unittest
import pytest
import logging
@@ -1068,21 +1066,24 @@ def test_is_quorate(mock_run):
mock_run_inst.get_stdout_or_raise_error.assert_called_once_with("corosync-quorumtool -s", None, success_exit_status={0, 2})
+@mock.patch('crmsh.cibquery.get_cluster_nodes')
@mock.patch('crmsh.utils.etree.fromstring')
@mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr')
-def test_list_cluster_nodes_none(mock_run, mock_etree):
+def test_list_cluster_nodes_none(mock_run, mock_etree, mock_get_cluster_nodes):
mock_run.return_value = (0, "data", None)
mock_etree.return_value = None
res = utils.list_cluster_nodes()
assert res is None
mock_run.assert_called_once_with(constants.CIB_QUERY, no_reg=False)
mock_etree.assert_called_once_with("data")
+ mock_get_cluster_nodes.assert_not_called()
+@mock.patch('crmsh.cibquery.get_cluster_nodes')
@mock.patch('os.path.isfile')
@mock.patch('os.getenv')
@mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr')
-def test_list_cluster_nodes_cib_not_exist(mock_run, mock_env, mock_isfile):
+def test_list_cluster_nodes_cib_not_exist(mock_run, mock_env, mock_isfile, mock_get_cluster_nodes):
mock_run.return_value = (1, None, None)
mock_env.return_value = constants.CIB_RAW_FILE
mock_isfile.return_value = False
@@ -1091,35 +1092,28 @@ def test_list_cluster_nodes_cib_not_exist(mock_run, mock_env, mock_isfile):
mock_run.assert_called_once_with(constants.CIB_QUERY, no_reg=False)
mock_env.assert_called_once_with("CIB_file", constants.CIB_RAW_FILE)
mock_isfile.assert_called_once_with(constants.CIB_RAW_FILE)
+ mock_get_cluster_nodes.assert_not_called()
+@mock.patch('crmsh.cibquery.get_cluster_nodes')
@mock.patch('crmsh.xmlutil.file2cib_elem')
@mock.patch('os.path.isfile')
@mock.patch('os.getenv')
@mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr')
-def test_list_cluster_nodes(mock_run, mock_env, mock_isfile, mock_file2elem):
+def test_list_cluster_nodes(mock_run, mock_env, mock_isfile, mock_file2elem, mock_get_cluster_nodes):
mock_run.return_value = (1, None, None)
mock_env.return_value = constants.CIB_RAW_FILE
mock_isfile.return_value = True
mock_cib_inst = mock.Mock()
mock_file2elem.return_value = mock_cib_inst
- mock_node_inst1 = mock.Mock()
- mock_node_inst2 = mock.Mock()
- mock_node_inst1.get.side_effect = ["node1", "remote"]
- mock_node_inst2.get.side_effect = ["node2", "member"]
- mock_cib_inst.xpath.side_effect = [[mock_node_inst1, mock_node_inst2], "data"]
res = utils.list_cluster_nodes()
- assert res == ["node2"]
mock_run.assert_called_once_with(constants.CIB_QUERY, no_reg=False)
mock_env.assert_called_once_with("CIB_file", constants.CIB_RAW_FILE)
mock_isfile.assert_called_once_with(constants.CIB_RAW_FILE)
mock_file2elem.assert_called_once_with(constants.CIB_RAW_FILE)
- mock_cib_inst.xpath.assert_has_calls([
- mock.call(constants.XML_NODE_PATH),
- mock.call("//primitive[@provider='pacemaker' and @type='remote']/instance_attributes/nvpair[@name='server' and @value='node1']")
- ])
+ mock_get_cluster_nodes.assert_called_once_with(mock_cib_inst)
@mock.patch('os.getenv')