Skip to content

Commit 1399624

Browse files
jiyongjungtfx-copybara
jiyongjung
authored andcommitted
Leaves log for failed components in e2e tests.
PiperOrigin-RevId: 327372570
1 parent 6457261 commit 1399624

File tree

2 files changed

+47
-6
lines changed

2 files changed

+47
-6
lines changed

tfx/experimental/templates/taxi/e2e_tests/kubeflow_e2e_test.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def setUp(self):
6363
random_id = orchestration_test_utils.random_id()
6464
self._pipeline_name = 'taxi-template-kubeflow-e2e-test-' + random_id
6565
logging.info('Pipeline: %s', self._pipeline_name)
66-
self._endpoint = self._get_endpoint()
66+
self._namespace = 'kubeflow'
67+
self._endpoint = self._get_endpoint(self._namespace)
6768
self._kfp_client = kfp.Client(host=self._endpoint)
6869
logging.info('ENDPOINT: %s', self._endpoint)
6970

@@ -136,9 +137,10 @@ def _delete_target_container_image(self):
136137
'gcloud', 'container', 'images', 'delete', self._target_container_image
137138
])
138139

139-
def _get_endpoint(self):
140-
output = subprocess.check_output(
141-
'kubectl describe configmap inverse-proxy-config -n kubeflow'.split())
140+
def _get_endpoint(self, namespace):
141+
cmd = 'kubectl describe configmap inverse-proxy-config -n {}'.format(
142+
namespace)
143+
output = subprocess.check_output(cmd.split())
142144
for line in output.decode('utf-8').split('\n'):
143145
if line.endswith('googleusercontent.com'):
144146
return line
@@ -207,7 +209,10 @@ def _run_pipeline(self):
207209
self._endpoint,
208210
])
209211
self.assertEqual(0, result.exit_code)
210-
self._wait_until_completed(self._parse_run_id(result.output))
212+
run_id = self._parse_run_id(result.output)
213+
self._wait_until_completed(run_id)
214+
kubeflow_test_utils.print_failure_log_for_run(self._endpoint, run_id,
215+
self._namespace)
211216

212217
def _parse_run_id(self, output: str):
213218
run_id_lines = [

tfx/orchestration/kubeflow/test_utils.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from __future__ import print_function
2020

2121
import datetime
22+
import json
2223
import os
2324
import re
2425
import shutil
@@ -59,8 +60,10 @@
5960
from tfx.types import component_spec
6061
from tfx.types import standard_artifacts
6162
from tfx.types.standard_artifacts import Model
63+
from tfx.utils import kube_utils
6264

6365

66+
# TODO(jiyongjung): Merge with kube_utils.PodStatus
6467
# Various execution status of a KFP pipeline.
6568
KFP_RUNNING_STATUS = 'running'
6669
KFP_SUCCESS_STATUS = 'succeeded'
@@ -113,7 +116,7 @@ def poll_kfp_with_retry(host: Text, run_id: Text, retry_limit: int,
113116
# https://github.com/kubeflow/pipelines/issues/3669
114117
# by wait-and-retry when ApiException is hit.
115118
try:
116-
get_run_response = client._run_api.get_run(run_id=run_id) # pylint: disable=protected-access
119+
get_run_response = client.get_run(run_id=run_id)
117120
except rest.ApiException as api_err:
118121
# If get_run failed with ApiException, wait _POLLING_INTERVAL and retry.
119122
if retry_count < retry_limit:
@@ -144,6 +147,39 @@ def poll_kfp_with_retry(host: Text, run_id: Text, retry_limit: int,
144147
time.sleep(polling_interval)
145148

146149

150+
def print_failure_log_for_run(host: Text, run_id: Text, namespace: Text):
151+
"""Prints logs of failed components of a run.
152+
153+
Prints execution logs for failed componentsusing `logging.info`.
154+
This resembles the behavior of `argo logs` but uses K8s API directly.
155+
Don't print anything if the run was successful.
156+
157+
Args:
158+
host: address of the KFP deployment.
159+
run_id: id of the execution of the pipeline.
160+
namespace: namespace of K8s cluster.
161+
"""
162+
client = kfp.Client(host=host)
163+
run = client.get_run(run_id=run_id)
164+
workflow_manifest = json.loads(run.pipeline_runtime.workflow_manifest)
165+
if kube_utils.PodPhase(
166+
workflow_manifest['status']['phase']) != kube_utils.PodPhase.FAILED:
167+
return
168+
169+
k8s_client = kube_utils.make_core_v1_api()
170+
pods = [i for i in workflow_manifest['status']['nodes'] if i['type'] == 'Pod']
171+
for pod in pods:
172+
if kube_utils.PodPhase(pod['phase']) != kube_utils.PodPhase.FAILED:
173+
continue
174+
display_name = pod['displayName']
175+
pod_id = pod['id']
176+
177+
log = k8s_client.read_namespaced_pod_log(
178+
pod_id, namespace=namespace, container='main')
179+
for line in log.splitlines():
180+
logging.info('%s:%s', display_name, line)
181+
182+
147183
# Custom component definitions for testing purpose.
148184
class _HelloWorldSpec(component_spec.ComponentSpec):
149185
INPUTS = {}

0 commit comments

Comments
 (0)