Skip to content

Commit dd0e83b

Browse files
Merge pull request #346 from supertetelman/kubeflow-6-2
Kubeflow 6 2
2 parents c8beb2d + b58936e commit dd0e83b

File tree

4 files changed

+384
-112
lines changed

4 files changed

+384
-112
lines changed

docs/kubeflow.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Kubeflow
2+
3+
[Kubeflow](https://www.kubeflow.org/docs/) is a K8S native tool that eases the Deep Learning and Machine learning lifecycle.
4+
5+
Kubeflow allows users to request specific resources (such as number of GPUs and CPUs), specify Docker images, and easily launch and develop through Jupyter models. Kubeflow makes it easy to create persistent home directories, mount data volumes, and share notebooks within a team.
6+
7+
Kubeflow also offers a full deep learning [pipeline](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/) platform that allows you to run, track, and version experiments. Pipelines can be used to deploy code to production and can include all steps in the training process (data prep, training, tuning, etc.) each done through different Docker images.
8+
9+
Additionally Kubeflow offers [hyper-parameter tuning](https://github.com/kubeflow/katib) options.
10+
11+
Kubeflow is an [open source project](https://github.com/kubeflow/kubeflow) and is regularly evolving and adding [new features](https://github.com/kubeflow/kubeflow/blob/master/ROADMAP.md).
12+
13+
## Installation
14+
15+
Deploy Kubernetes by following the [Kubernetes GPU Cluster Deployment Guide](kubernetes-cluster.md)
16+
17+
Deploy [Ceph](kubernetes-cluster.md#persistent-storage)
18+
19+
Deploy the [LoadBalancer](ingress.md#on-prem-loadbalancer). This step is not required if you specify the `-x` option, however doing so will not include built-in multi-user support.
20+
21+
22+
Deploy Kubeflow:
23+
24+
```sh
25+
# Deploy
26+
./scripts/k8s_deploy_kubeflow.sh
27+
28+
```
29+
30+
See the [install docs](https://www.kubeflow.org/docs/started/k8s/overview/) for additional install configuration options.
31+
32+
Deploy older version of Kubeflow with built-in NGC support:
33+
34+
```sh
35+
./scripts/k8s_deploy_kubeflow.v0.5.1.sh
36+
```

docs/rapids-dask.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ Dask has tight kubernetes integration that allows you to scale up/down your Dask
99

1010
## Installation
1111

12+
### Kubeflow
13+
14+
If Kubeflow has already been installed using the [Kubeflow Deployment Guide](kubeflow.md) there are no additional K8S setup steps required.
15+
16+
When deploying through Kubeflow, it is necessary to ensure that a proper Docker image, entrypoint, and cmd have been specified; or Kubeflow will not properly start Jupyter and the service will immediately fail. See the [Dask Kubernetes](../examples/k8s-dask-rapids/docker/Dockerfile) Dockerfile for an example.
17+
18+
### Stand-alone
19+
1220
Deploy Kubernetes by following the [Kubernetes GPU Cluster Deployment Guide](kubernetes-cluster.md)
1321

1422
Deploy the [LoadBalancer](ingress.md#on-prem-loadbalancer)

scripts/k8s_deploy_kubeflow.sh

Lines changed: 219 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,120 +1,227 @@
11
#!/usr/bin/env bash
22

3-
export KS_VER=0.13.1
4-
export KS_PKG=ks_${KS_VER}_linux_amd64
5-
export KS_INSTALL_DIR=/usr/local/bin
6-
7-
export KUBEFLOW_TAG=v0.5.1
8-
export KFAPP=kubeflow
9-
export KUBEFLOW_SRC=/opt/kubeflow
10-
11-
export DEEPOPS_DIR=$(dirname $(dirname $(readlink -f $0)))
12-
13-
KSONNET_URL="${KSONNET_URL:-https://github.com/ksonnet/ksonnet/releases/download/v${KS_VER}/${KS_PKG}.tar.gz}"
14-
KUBEFLOW_URL="${KUBEFLOW_URL:-https://raw.githubusercontent.com/kubeflow/kubeflow/${KUBEFLOW_TAG}/scripts/download.sh}"
15-
16-
###
17-
18-
# Install dependencies
19-
. /etc/os-release
20-
case "$ID_LIKE" in
21-
rhel*)
22-
type curl >/dev/null 2>&1
23-
if [ $? -ne 0 ] ; then
24-
sudo yum -y install curl wget
25-
fi
3+
# Local files/directories to create and place scripts
4+
export KFAPP=${KFAPP:-~/kubeflow}
5+
export KFCTL=${KFCTL:-~/kfctl}
6+
export KUBEFLOW_DEL_SCRIPT="${KFAPP}/deepops-delete-kubeflow.sh"
7+
8+
# Download URLs and versions
9+
export KUBEFLOW_TAG=v0.6.2
10+
export KFCTL_URL=https://github.com/kubeflow/kubeflow/releases/download/${KUBEFLOW_TAG}/kfctl_${KUBEFLOW_TAG}_linux.tar.gz
11+
export CONFIG="https://raw.githubusercontent.com/kubeflow/kubeflow/v0.6-branch/bootstrap/config/kfctl_existing_arrikto.0.6.2.yaml"
12+
export NO_AUTH_CONFIG="https://raw.githubusercontent.com/kubeflow/kubeflow/v0.6-branch/bootstrap/config/kfctl_k8s_istio.0.6.2.yaml"
13+
14+
15+
# Specify credentials for the default user.
16+
export KUBEFLOW_USER_EMAIL="${KUBEFLOW_USER_EMAIL:-admin@kubeflow.org}"
17+
export KUBEFLOW_PASSWORD="${KUBEFLOW_PASSWORD:-12341234}"
18+
19+
20+
function help_me() {
21+
echo "Usage:"
22+
echo "-h This message."
23+
echo "-p Print out the connection info for Kubeflow"
24+
echo "-d Delete Kubeflow from your system (skipping the istio-system namespace that may have been installed with Kubeflow"
25+
echo "-D Delete Kubeflow from your system along with the istio-system namespace. WARNING, do not use this option if other components depend on istio."
26+
echo "-x Install Kubeflow without multi-user auth (this does not require loadbalancing"
27+
echo "-c Specify a different Kubeflow config to install with"
28+
}
29+
30+
31+
function get_opts() {
32+
while getopts "hpc:xdD" option; do
33+
case $option in
34+
p)
35+
KUBEFLOW_PRINT=true
2636
;;
27-
debian*)
28-
type curl >/dev/null 2>&1
29-
if [ $? -ne 0 ] ; then
30-
sudo apt-get -y install curl wget
31-
fi
37+
c)
38+
CONFIG=$OPTARG
3239
;;
33-
*)
34-
echo "Unsupported Operating System $ID_LIKE"
40+
x)
41+
CONFIG=${NO_AUTH_CONFIG}
42+
SKIP_LB=true
43+
;;
44+
d)
45+
KUBEFLOW_DELETE=true
46+
;;
47+
D)
48+
KUBEFLOW_DELETE=true
49+
KUBEFLOW_FULL_DELETE=true
50+
;;
51+
h)
52+
help_me
3553
exit 1
3654
;;
37-
esac
38-
39-
# Rook
40-
kubectl get storageclass 2>&1 | grep "No resources found." >/dev/null 2>&1
41-
if [ $? -eq 0 ] ; then
42-
echo "No storageclass found"
43-
echo "To provision Ceph storage, run: ./scripts/k8s_deploy_rook.sh"
44-
exit 1
45-
fi
46-
47-
# Ksonnet
48-
wget -O /tmp/${KS_PKG}.tar.gz "${KSONNET_URL}" \
49-
--no-check-certificate
50-
mkdir -p ${KS_INSTALL_DIR}
51-
tempd=$(mktemp -d)
52-
tar -xvf /tmp/${KS_PKG}.tar.gz -C ${tempd}
53-
sudo mv ${tempd}/${KS_PKG}/ks ${KS_INSTALL_DIR}
54-
rm -rf ${tempd} /tmp/${KS_PKG}.tar.gz
55-
56-
# Kubeflow
57-
if [ ! -d ${KUBEFLOW_SRC} ] ; then
58-
tempd=$(mktemp -d)
59-
cd ${tempd}
60-
curl "${KUBEFLOW_URL}" | bash
61-
cd -
62-
sudo mv ${tempd} ${KUBEFLOW_SRC}
63-
fi
64-
65-
# Get master ip
66-
master_ip=$(kubectl get nodes -l node-role.kubernetes.io/master= --no-headers -o custom-columns=IP:.status.addresses.*.address | cut -f1 -d, | head -1)
67-
68-
# Check for ingress controller
69-
ingress_name="nginx-ingress"
70-
ingress_ip_string="$(echo ${master_ip} | tr '.' '-')"
71-
if kubectl describe service -l "app=${ingress_name},component=controller" | grep 'LoadBalancer Ingress' >/dev/null 2>&1; then
72-
lb_ip="$(kubectl describe service -l "app=${ingress_name},component=controller" | grep 'LoadBalancer Ingress' | awk '{print $3}')"
73-
ingress_ip_string="$(echo ${lb_ip} | tr '.' '-').nip.io"
74-
echo "Using load balancer url: ${ingress_ip_string}"
75-
fi
76-
77-
# Initialize and generate kubeflow
78-
set -e # XXX: Fail if anything in the initialization or configuration fail
79-
pushd ${HOME}
80-
${KUBEFLOW_SRC}/scripts/kfctl.sh init ${KFAPP} --platform none
81-
cd ${KFAPP}
82-
83-
# Update the Kubeflow Jupyter UI
84-
export KSAPP_DIR="$(pwd)/ks_app"
85-
export KUBEFLOW_SRC
86-
${DEEPOPS_DIR}/scripts/update_kubeflow_config.py
87-
88-
${KUBEFLOW_SRC}/scripts/kfctl.sh generate k8s
89-
pushd ${KSAPP_DIR}
90-
set +e
91-
92-
# NOTE: temporarily using a custom image, to add custom command functionality
93-
ks param set jupyter-web-app image deepops/kubeflow-jupyter-web-app:v0.5-custom-command
94-
95-
# Use NodePort directly if the IP string uses the master IP, otherwise use Ingress URL
96-
if echo "${ingress_ip_string}" | grep "${master_ip}" >/dev/null 2>&1; then
97-
ks param set ambassador ambassadorServiceType NodePort
98-
popd
99-
${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s
100-
popd
101-
kf_ip=$master_ip
102-
kf_port=$(kubectl -n kubeflow get svc ambassador --no-headers -o custom-columns=:.spec.ports.*.nodePort)
103-
kf_url="http://${kf_ip}:${kf_port}"
55+
* )
56+
help_me
57+
exit 1
58+
;;
59+
esac
60+
done
61+
}
62+
63+
function install_dependencies() {
64+
# Install dependencies
65+
. /etc/os-release
66+
case "$ID_LIKE" in
67+
rhel*)
68+
type curl >/dev/null 2>&1
69+
if [ $? -ne 0 ] ; then
70+
sudo yum -y install curl wget
71+
fi
72+
;;
73+
debian*)
74+
type curl >/dev/null 2>&1
75+
if [ $? -ne 0 ] ; then
76+
sudo apt -y install curl wget
77+
fi
78+
;;
79+
*)
80+
echo "Unsupported Operating System $ID_LIKE"
81+
exit 1
82+
;;
83+
esac
84+
85+
# Rook
86+
kubectl get storageclass 2>&1 | grep "No resources found." >/dev/null 2>&1
87+
if [ $? -eq 0 ] ; then
88+
echo "No storageclass found"
89+
echo "To provision Ceph storage, run: ./scripts/k8s_deploy_rook.sh"
90+
exit 1
91+
fi
92+
93+
# MetalLB
94+
helm list | grep metallb >/dev/null 2>&1
95+
if [ $? -ne 0 ]; then
96+
echo "LoadBalancer not found (MetalLB)"
97+
if [ ${SKIP_LB} ]; then
98+
echo "LoadBalancer not required for alternative install"
99+
else
100+
echo "To support Kubeflow on-prem with multi-user-auth please install a load balancer by running"
101+
echo "./scripts/k8s_deploy_loadbalancer.sh"
102+
exit 2
103+
fi
104+
fi
105+
}
106+
107+
108+
function stand_up() {
109+
# Download the kfctl binary and move it to the default location
110+
pushd .
111+
mkdir /tmp/kf-download
112+
cd /tmp/kf-download
113+
curl -O -L ${KFCTL_URL}
114+
tar -xvf kfctl_${KUBEFLOW_TAG}_linux.tar.gz
115+
mv kfctl ${KFCTL}
116+
popd
117+
rm -rf /tmp/kf-download
118+
119+
# Initialize and apply the Kubeflow project using the specified config
120+
${KFCTL} init ${KFAPP} --config=${CONFIG} -V
121+
cd ${KFAPP}
122+
${KFCTL} generate all -V
123+
${KFCTL} apply all -V
124+
125+
echo "cd ${KFAPP} && ${KFCTL} delete -V k8s; cd && sudo rm -rf ${KFAPP}; sudo rm ${KFCTL}" > ${KUBEFLOW_DEL_SCRIPT}
126+
echo "cd ${KFAPP} && ${KFCTL} delete -V all; cd && sudo rm -rf ${KFAPP}; sudo rm ${KFCTL}" > ${KUBEFLOW_DEL_SCRIPT}_full.sh
127+
chmod +x ${KUBEFLOW_DEL_SCRIPT}
128+
}
129+
130+
131+
function tear_down() {
132+
if [ ${KUBEFLOW_FULL_DELETE} ]; then
133+
bash ${KUBEFLOW_DEL_SCRIPT}_full.sh
134+
135+
# Kubeflow use leads to some user created namespaces that are not torn down during kfctl delete
136+
additional_namespaces="kubeflow-anonymous ${KUBEFLOW_EXTRA_NS}"
137+
echo "Deleting additional namespaces ${additional_namespaces}, this may take several minutes"
138+
kubectl delete ns ${additional_namespaces}
139+
else
140+
bash ${KUBEFLOW_DEL_SCRIPT}
141+
fi
142+
rm ${KFCTL}
143+
}
144+
145+
146+
function get_url() {
147+
# Get LoadBalancer and NodePorts
148+
master_ip=$(kubectl get nodes -l node-role.kubernetes.io/master= --no-headers -o custom-columns=IP:.status.addresses.*.address | cut -f1 -d, | head -1)
149+
nodePort="$(kubectl get svc -n istio-system istio-ingressgateway --no-headers -o custom-columns=PORT:.spec.ports[?(@.name==\"http2\")].nodePort)"
150+
secure_nodePort="$(kubectl get svc -n istio-system istio-ingressgateway --no-headers -o custom-columns=PORT:.spec.ports[?(@.name==\"https\")].nodePort)"
151+
lb_ip="$(kubectl get svc -n istio-system istio-ingressgateway --no-headers -o custom-columns=:.status.loadBalancer.ingress[0].ip)"
152+
export kf_url="http://${master_ip}:${nodePort}"
153+
export secure_kf_url="https://${master_ip}:${secure_nodePort}"
154+
export lb_url="https://${lb_ip}"
155+
}
156+
157+
158+
function print_info() {
159+
echo
160+
echo "Kubeflow app installed to: ${KFAPP}"
161+
echo "To remove, run: cd ${KFAPP} && ${KFCTL} delete -V k8s"
162+
echo "To remove the kfctl binary: rm ${KFCTL}"
163+
echo "To fully remove everything:"
164+
echo "bash ${KUBEFLOW_DEL_SCRIPT}"
165+
echo
166+
echo "Kubeflow Dashboard (HTTP NodePort): ${kf_url}"
167+
echo "Kubeflow Dashboard (HTTPS NodePort, required for auth): ${secure_kf_url}"
168+
echo "Kubeflow Dashboard (DEFAULT - LoadBalancer, required for auth w/Dex): ${lb_url}"
169+
echo
170+
}
171+
172+
173+
function test_script() {
174+
# Don't test recursively
175+
if [ ${KUBEFLOW_TEST} ]; then
176+
export KUBEFLOW_TEST=""
177+
else
178+
return
179+
fi
180+
181+
./${0} -dp
182+
if [ ${?} -eq 0 ]; then
183+
exit 10
184+
fi
185+
./${0} -h
186+
if [ ${?} -eq 0 ]; then
187+
exit 11
188+
fi
189+
190+
./${0}
191+
if [ ${?} -ne 0 ]; then
192+
exit 12 # we should really test with a curl
193+
fi
194+
./${0} -D
195+
if [ ${?} -ne 0 ]; then
196+
exit 13
197+
fi
198+
./${0} -x
199+
if [ ${?} -ne 0 ]; then
200+
exit 14
201+
fi
202+
./${0} -e
203+
if [ ${?} -ne 0 ]; then
204+
exit 15
205+
fi
206+
207+
exit 0
208+
}
209+
210+
test_script
211+
212+
get_opts ${@}
213+
214+
if [ ${KUBEFLOW_PRINT} ] && [ ${KUBEFLOW_DELETE} ]; then
215+
echo "Cannot specify print flag and delete flag"
216+
exit 2
217+
elif [ ${KUBEFLOW_PRINT} ]; then
218+
get_url
219+
print_info
220+
elif [ ${KUBEFLOW_DELETE} ]; then
221+
tear_down
104222
else
105-
ks param set ambassador ambassadorServiceType LoadBalancer
106-
popd
107-
${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s
108-
popd
109-
kf_ip=$(kubectl -n kubeflow get svc ambassador --no-headers -o custom-columns=:.status.loadBalancer.ingress[0].ip)
110-
kf_url="http://${kf_ip}"
223+
install_dependencies
224+
stand_up
225+
get_url
226+
print_info
111227
fi
112-
113-
echo
114-
echo "Kubeflow app installed to: ${HOME}/${KFAPP}"
115-
echo "To remove, run: cd ${HOME}/${KFAPP} && ${KUBEFLOW_SRC}/scripts/kfctl.sh delete k8s"
116-
echo "To fully remove all source and application code run: cd ${HOME} && rm -rf ${KFAPP}; rm -rf ${KUBEFLOW_SRC}"
117-
echo "To fully remove everything: cd ${HOME}/${KFAPP} && ${KUBEFLOW_SRC}/scripts/kfctl.sh delete k8s; cd ${DEEPOPS_DIR} && sudo rm -rf ${KFAPP}; sudo rm -rf ${KUBEFLOW_SRC}"
118-
echo
119-
echo "Kubeflow Dashboard: ${kf_url}"
120-
echo

0 commit comments

Comments
 (0)