File tree 3 files changed +20
-8
lines changed
tools/kubernetes/terraform 3 files changed +20
-8
lines changed Original file line number Diff line number Diff line change @@ -7,23 +7,31 @@ tpu_node_pools = [{
7
7
machine_type = " ct5lp-hightpu-4t"
8
8
topology = " 16x16"
9
9
policy = " sb-compact-1"
10
+ disk_type = " pd-balanced"
11
+ disk_size_gb = 50
10
12
}, {
11
13
zone = " us-east5-b"
12
14
node_count = 64
13
15
machine_type = " ct5lp-hightpu-4t"
14
16
topology = " 16x16"
15
17
policy = " sb-compact-1"
18
+ disk_type = " pd-balanced"
19
+ disk_size_gb = 50
16
20
}, {
17
21
zone = " us-east5-b"
18
22
node_count = 64
19
23
machine_type = " ct5lp-hightpu-4t"
20
24
topology = " 16x16"
21
25
policy = " sb-compact-1"
26
+ disk_type = " pd-balanced"
27
+ disk_size_gb = 50
22
28
}, {
23
29
zone = " us-east5-b"
24
30
node_count = 64
25
31
machine_type = " ct5lp-hightpu-4t"
26
32
topology = " 16x16"
27
33
policy = " sb-compact-1"
28
- }]
34
+ disk_type = " pd-balanced"
35
+ disk_size_gb = 50
36
+ }]
29
37
maintenance_interval = " PERIODIC"
Original file line number Diff line number Diff line change @@ -58,7 +58,7 @@ resource "google_container_cluster" "tpu_cluster" {
58
58
release_channel {
59
59
channel = " UNSPECIFIED"
60
60
}
61
-
61
+
62
62
network = google_compute_network. vpc . name
63
63
subnetwork = google_compute_subnetwork. subnet . name
64
64
logging_service = " logging.googleapis.com/kubernetes"
@@ -81,7 +81,7 @@ resource "google_container_node_pool" "multihost_tpu" {
81
81
cluster = google_container_cluster. tpu_cluster . name
82
82
83
83
initial_node_count = var. tpu_node_pools [count . index ]. node_count
84
-
84
+
85
85
management {
86
86
auto_upgrade = false
87
87
}
@@ -104,16 +104,18 @@ resource "google_container_node_pool" "multihost_tpu" {
104
104
gcfs_config {
105
105
enabled = true
106
106
}
107
-
108
- image_type = " COS_CONTAINERD"
107
+
108
+ image_type = " COS_CONTAINERD"
109
109
machine_type = var. tpu_node_pools [count . index ]. machine_type
110
+ disk_type = var. tpu_node_pools [count . index ]. disk_type
111
+ disk_size_gb = var. tpu_node_pools [count . index ]. disk_size_gb
110
112
tags = [" gke-node" ]
111
113
metadata = {
112
114
disable-legacy-endpoints = " true"
113
115
}
114
116
}
115
117
placement_policy {
116
- type = " COMPACT"
117
- policy_name = var. tpu_node_pools [count . index ]. policy
118
+ type = " COMPACT"
119
+ policy_name = var. tpu_node_pools [count . index ]. policy
118
120
}
119
121
}
Original file line number Diff line number Diff line change @@ -35,10 +35,12 @@ variable "tpu_node_pools" {
35
35
machine_type = string ,
36
36
topology = string ,
37
37
policy = string ,
38
+ disk_type = string ,
39
+ disk_size_gb = number ,
38
40
}))
39
41
}
40
42
41
43
variable "maintenance_interval" {
42
- default = " AS_NEEDED"
44
+ default = " AS_NEEDED"
43
45
description = " maintenance interval for TPU machines."
44
46
}
You can’t perform that action at this time.
0 commit comments