1
+ provider "aws" {
2
+ region = var. region
3
+ profile = var. profile
4
+ }
5
+
6
+ provider "hopsworksai" {
7
+
8
+ }
9
+
10
+
11
+ # Step 1: Create required aws resources, an ssh key, an s3 bucket, and an instance profile with the required hopsworks permissions
12
+ module "aws" {
13
+ source = " logicalclocks/helpers/hopsworksai//modules/aws"
14
+ region = var. region
15
+ version = " 2.3.0"
16
+ }
17
+
18
+
19
+ # Step 2: Create a VPC
20
+ data "aws_availability_zones" "available" {
21
+ }
22
+
23
+ module "vpc" {
24
+ source = " terraform-aws-modules/vpc/aws"
25
+ version = " 3.1.0"
26
+
27
+ name = " ${ var . cluster_name } -vpc"
28
+ cidr = " 172.16.0.0/16"
29
+ azs = data. aws_availability_zones . available . names
30
+ public_subnets = [" 172.16.4.0/24" ]
31
+ enable_dns_hostnames = true
32
+ }
33
+
34
+ # Step 3: Create a security group and open required ports
35
+ resource "aws_security_group" "security_group" {
36
+ name = " ${ var . cluster_name } -security-group"
37
+ description = " Allow access for Hopsworks cluster"
38
+ vpc_id = module. vpc . vpc_id
39
+
40
+ ingress {
41
+ description = " HTTPS"
42
+ from_port = 443
43
+ to_port = 443
44
+ protocol = " tcp"
45
+ cidr_blocks = [" 0.0.0.0/0" ]
46
+ }
47
+
48
+ ingress {
49
+ description = " HTTP"
50
+ from_port = 80
51
+ to_port = 80
52
+ protocol = " tcp"
53
+ cidr_blocks = [" 0.0.0.0/0" ]
54
+ }
55
+
56
+ ingress {
57
+ description = " MYSQL"
58
+ from_port = 3306
59
+ to_port = 3306
60
+ protocol = " tcp"
61
+ cidr_blocks = [" 0.0.0.0/0" ]
62
+ }
63
+
64
+ ingress {
65
+ description = " ArrowFlight"
66
+ from_port = 5005
67
+ to_port = 5005
68
+ protocol = " tcp"
69
+ cidr_blocks = [" 0.0.0.0/0" ]
70
+ }
71
+
72
+ ingress {
73
+ description = " HiveServer"
74
+ from_port = 9085
75
+ to_port = 9085
76
+ protocol = " tcp"
77
+ cidr_blocks = [" 0.0.0.0/0" ]
78
+ }
79
+
80
+ ingress {
81
+ description = " HiveMetastore"
82
+ from_port = 9083
83
+ to_port = 9083
84
+ protocol = " tcp"
85
+ cidr_blocks = [" 0.0.0.0/0" ]
86
+ }
87
+
88
+ ingress {
89
+ description = " Kafka"
90
+ from_port = 9092
91
+ to_port = 9092
92
+ protocol = " tcp"
93
+ cidr_blocks = [" 0.0.0.0/0" ]
94
+ }
95
+
96
+ ingress {
97
+ from_port = 0
98
+ to_port = 0
99
+ protocol = - 1
100
+ self = true
101
+ }
102
+
103
+ egress {
104
+ from_port = 0
105
+ to_port = 0
106
+ protocol = " -1"
107
+ cidr_blocks = [" 0.0.0.0/0" ]
108
+ ipv6_cidr_blocks = [" ::/0" ]
109
+ }
110
+ }
111
+
112
+
113
+ # Step 3: Create a network load balancer
114
+ resource "aws_lb" "lb" {
115
+ name = " ${ var . cluster_name } -lb"
116
+ internal = false
117
+ load_balancer_type = " network"
118
+ subnets = module. vpc . public_subnets
119
+ }
120
+
121
+ # Step 4: create a cluster with 1 worker
122
+ data "hopsworksai_instance_type" "head" {
123
+ cloud_provider = " AWS"
124
+ node_type = " head"
125
+ region = var. region
126
+ }
127
+
128
+ data "hopsworksai_instance_type" "rondb_mgm" {
129
+ cloud_provider = " AWS"
130
+ node_type = " rondb_management"
131
+ region = var. region
132
+ }
133
+
134
+ data "hopsworksai_instance_type" "rondb_data" {
135
+ cloud_provider = " AWS"
136
+ node_type = " rondb_data"
137
+ region = var. region
138
+ }
139
+
140
+ data "hopsworksai_instance_type" "rondb_mysql" {
141
+ cloud_provider = " AWS"
142
+ node_type = " rondb_mysql"
143
+ region = var. region
144
+ min_cpus = 8
145
+ min_memory_gb = 16
146
+ }
147
+
148
+ data "hopsworksai_instance_type" "smallest_worker" {
149
+ cloud_provider = " AWS"
150
+ node_type = " worker"
151
+ region = var. region
152
+ min_cpus = 8
153
+ }
154
+
155
+ resource "hopsworksai_cluster" "cluster" {
156
+ name = var. cluster_name
157
+ ssh_key = module. aws . ssh_key_pair_name
158
+
159
+ head {
160
+ instance_type = data. hopsworksai_instance_type . head . id
161
+ }
162
+
163
+ workers {
164
+ instance_type = data. hopsworksai_instance_type . smallest_worker . id
165
+ count = 1
166
+ }
167
+
168
+ aws_attributes {
169
+ region = var. region
170
+ bucket {
171
+ name = module. aws . bucket_name
172
+ }
173
+ instance_profile_arn = module. aws . instance_profile_arn
174
+ network {
175
+ vpc_id = module. vpc . vpc_id
176
+ subnet_id = module. vpc . public_subnets [0 ]
177
+ security_group_id = aws_security_group. security_group . id
178
+ }
179
+ }
180
+
181
+ rondb {
182
+ configuration {
183
+ ndbd_default {
184
+ replication_factor = 2
185
+ }
186
+ }
187
+
188
+ management_nodes {
189
+ instance_type = data. hopsworksai_instance_type . rondb_mgm . id
190
+ disk_size = 30
191
+ }
192
+ data_nodes {
193
+ instance_type = data. hopsworksai_instance_type . rondb_data . id
194
+ count = 2
195
+ disk_size = 512
196
+ }
197
+ mysql_nodes {
198
+ instance_type = data. hopsworksai_instance_type . rondb_mysql . id
199
+ count = var. num_mysql_servers
200
+ disk_size = 256
201
+ arrow_flight_with_duckdb = true
202
+ }
203
+ }
204
+
205
+ init_script = << EOF
206
+ #!/usr/bin/env bash
207
+ set -e
208
+ IS_MASTER=`grep -c "INSTANCE_TYPE=master" /var/lib/cloud/instance/scripts/part-001`
209
+ if [[ $IS_MASTER != 2 ]];
210
+ then
211
+ exit 0
212
+ fi
213
+ /srv/hops/mysql-cluster/ndb/scripts/mysql-client.sh hopsworks -e "UPDATE variables SET value='${ aws_lb . lb . dns_name } ' WHERE id='loadbalancer_external_domain';"
214
+ EOF
215
+
216
+ }
217
+
218
+ # Step 5: Create target groups and register them with the load balancer
219
+
220
+ data "aws_instances" "mysqld" {
221
+ instance_tags = {
222
+ Name = " ${ hopsworksai_cluster . cluster . name } -mysqld*"
223
+ }
224
+ }
225
+
226
+ resource "aws_lb_target_group" "mysqld_target_group" {
227
+ name = " ${ hopsworksai_cluster . cluster . name } -mysqld"
228
+ port = 3306
229
+ protocol = " TCP"
230
+ vpc_id = hopsworksai_cluster. cluster . aws_attributes [0 ]. network [0 ]. vpc_id
231
+ health_check {
232
+ enabled = true
233
+ protocol = " TCP"
234
+ }
235
+ }
236
+
237
+ resource "aws_lb_target_group_attachment" "mysqld_target_group" {
238
+ count = var. num_mysql_servers
239
+ target_group_arn = aws_lb_target_group. mysqld_target_group . arn
240
+ target_id = data. aws_instances . mysqld . ids [count . index ]
241
+ port = 3306
242
+ }
243
+
244
+ resource "aws_lb_target_group" "arrow_flight_target_group" {
245
+ name = " ${ hopsworksai_cluster . cluster . name } -arrowflight"
246
+ port = 5005
247
+ protocol = " TCP"
248
+ vpc_id = hopsworksai_cluster. cluster . aws_attributes [0 ]. network [0 ]. vpc_id
249
+ health_check {
250
+ enabled = true
251
+ protocol = " TCP"
252
+ }
253
+ }
254
+
255
+ resource "aws_lb_target_group_attachment" "arrow_flight_target_group" {
256
+ count = var. num_mysql_servers
257
+ target_group_arn = aws_lb_target_group. arrow_flight_target_group . arn
258
+ target_id = data. aws_instances . mysqld . ids [count . index ]
259
+ port = 5005
260
+ }
261
+
262
+ resource "aws_lb_target_group" "hiveserver_target_group" {
263
+ name = " ${ hopsworksai_cluster . cluster . name } -hiveserver"
264
+ port = 9085
265
+ protocol = " TCP"
266
+ vpc_id = hopsworksai_cluster. cluster . aws_attributes [0 ]. network [0 ]. vpc_id
267
+ health_check {
268
+ enabled = true
269
+ protocol = " TCP"
270
+ }
271
+ }
272
+
273
+ resource "aws_lb_target_group_attachment" "hiveserver_target_group" {
274
+ target_group_arn = aws_lb_target_group. hiveserver_target_group . arn
275
+ target_id = hopsworksai_cluster. cluster . head . 0 . node_id
276
+ port = 9085
277
+ }
278
+
279
+ resource "aws_lb_target_group" "hivemetastore_target_group" {
280
+ name = " ${ hopsworksai_cluster . cluster . name } -hivemetastore"
281
+ port = 9083
282
+ protocol = " TCP"
283
+ vpc_id = hopsworksai_cluster. cluster . aws_attributes [0 ]. network [0 ]. vpc_id
284
+ health_check {
285
+ enabled = true
286
+ protocol = " TCP"
287
+ }
288
+ }
289
+
290
+ resource "aws_lb_target_group_attachment" "hivemetastore_target_group" {
291
+ target_group_arn = aws_lb_target_group. hivemetastore_target_group . arn
292
+ target_id = hopsworksai_cluster. cluster . head . 0 . node_id
293
+ port = 9083
294
+ }
295
+
296
+
297
+ resource "aws_lb_listener" "mysqld" {
298
+ load_balancer_arn = aws_lb. lb . arn
299
+ protocol = " TCP"
300
+ port = 3306
301
+ default_action {
302
+ type = " forward"
303
+ target_group_arn = aws_lb_target_group. mysqld_target_group . arn
304
+ }
305
+ }
306
+
307
+ resource "aws_lb_listener" "arrowflight" {
308
+ load_balancer_arn = aws_lb. lb . arn
309
+ protocol = " TCP"
310
+ port = 5005
311
+ default_action {
312
+ type = " forward"
313
+ target_group_arn = aws_lb_target_group. arrow_flight_target_group . arn
314
+ }
315
+ }
316
+
317
+ resource "aws_lb_listener" "hiveserver" {
318
+ load_balancer_arn = aws_lb. lb . arn
319
+ protocol = " TCP"
320
+ port = 9085
321
+ default_action {
322
+ type = " forward"
323
+ target_group_arn = aws_lb_target_group. hiveserver_target_group . arn
324
+ }
325
+ }
326
+
327
+ resource "aws_lb_listener" "hivemetastore" {
328
+ load_balancer_arn = aws_lb. lb . arn
329
+ protocol = " TCP"
330
+ port = 9083
331
+ default_action {
332
+ type = " forward"
333
+ target_group_arn = aws_lb_target_group. hivemetastore_target_group . arn
334
+ }
335
+ }
0 commit comments