Skip to content

NO-ISSUE: Add filtered controller logs when timeout during installation. #2587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions src/assisted_test_infra/test_infra/helper_classes/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
from collections import Counter
from typing import List, Optional, Set

import waiting
import yaml
from assisted_service_client import models
from assisted_service_client.models.operator_type import OperatorType

import consts

from junit_report import JunitTestCase
from netaddr import IPAddress, IPNetwork

import consts
import waiting

import yaml

from assisted_test_infra.test_infra import BaseClusterConfig, BaseInfraEnvConfig, ClusterName, exceptions, utils
from assisted_test_infra.test_infra.controllers.load_balancer_controller import LoadBalancerController
from assisted_test_infra.test_infra.helper_classes.base_cluster import BaseCluster
Expand All @@ -22,6 +26,7 @@
from assisted_test_infra.test_infra.helper_classes.nodes import Nodes
from assisted_test_infra.test_infra.tools import terraform_utils
from assisted_test_infra.test_infra.utils import logs_utils, network_utils, operators_utils
from assisted_test_infra.test_infra.utils.logs_utils import filter_controllers_logs
from assisted_test_infra.test_infra.utils.waiting import (
wait_till_all_hosts_are_in_status,
wait_till_all_hosts_use_agent_image,
Expand Down Expand Up @@ -689,12 +694,20 @@ def start_install_and_wait_for_installed(
fall_on_pending_status=False,
):
self.start_install()
if wait_for_hosts:
self.wait_for_hosts_to_install(fall_on_pending_status=fall_on_pending_status)
if wait_for_operators:
self.wait_for_operators_to_finish()
if wait_for_cluster_install:
self.wait_for_install()
try:
if wait_for_hosts:
self.wait_for_hosts_to_install(fall_on_pending_status=fall_on_pending_status)
if wait_for_operators:
self.wait_for_operators_to_finish()
if wait_for_cluster_install:
self.wait_for_install()
except Exception as e:
filtered_messages = filter_controllers_logs(self.api_client.client, self.id,
filters=["level=warning", "level=error"])
# In case of error or timeout we will add last filter_controllers_logs with more info
e.filter_message = str(filtered_messages)
raise Exception(e.__class__.__name__, e.__dict__)

if download_kubeconfig:
self.download_kubeconfig()

Expand Down
54 changes: 53 additions & 1 deletion src/assisted_test_infra/test_infra/utils/logs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@
import copy
import hashlib
import os
import random
import shutil
import tarfile
import time
from pathlib import Path
from string import ascii_lowercase
from tempfile import TemporaryDirectory

import waiting
from assisted_service_client.rest import ApiException

from consts import NUMBER_OF_MASTERS

from service_client import log

import waiting

OC_DOWNLOAD_LOGS_INTERVAL = 5 * 60
OC_DOWNLOAD_LOGS_TIMEOUT = 60 * 60
MAX_LOG_SIZE_BYTES = 1_000_000
Expand Down Expand Up @@ -342,3 +347,50 @@ def _are_logs_in_status(client, cluster_id, statuses, check_host_logs_only=False
except BaseException:
log.exception("Failed to get cluster %s log info", cluster_id)
return False


def filter_controllers_logs(client, cluster_id, filters, last_messages=10):
"""Get controller log file when possible when master's in finalized stage and filter messages.

Used to detect error or warning when timeout expires , will be added to the exception last known errors/warning
:param client:
:param cluster_id:
:param filters: list of string to match from assisted
:param last_messages:
:return:
"""
installer_controller = 'assisted-installer-controller.logs'
tmp_file = f"/tmp/{''.join(random.choice(ascii_lowercase) for _ in range(10))}.tar.gz"
filtered_content = []
try:
log.info(f"Collecting controllers info from {installer_controller}")
controller_data = client.v2_download_cluster_logs(cluster_id, logs_type="controller",
_preload_content=False).data
with open(tmp_file, "wb") as _file:
log.info(f"Write controllers content to file {tmp_file}")
_file.write(controller_data)

with tarfile.open(tmp_file, 'r') as tar:
log.info(f"Extract {tmp_file} and create object content")
content = tar.extractfile(installer_controller).readlines()
reversed_iterator = reversed(content)
content_lines = [(next(reversed_iterator).decode("utf-8")) for _ in range(last_messages)]

# filtered content - check if one of the filters in a line
log.info(f"Searching filters in object content {content_lines}")
for line in content_lines:
for f in filters:
if f in line:
filtered_content.append(line)

except ApiException as api_error:
log.info("Failed to get controller logs %s", api_error)

except Exception as e:
log.info("Error during controller log filtering %s", str(e))
finally:
try:
os.remove(tmp_file)
except FileNotFoundError:
log.info("Unable to remove tmp file %s", tmp_file)
return filtered_content