Skip to content

Commit

Permalink
Merge pull request #359 from DFE-Digital/clone-cluster
Browse files Browse the repository at this point in the history
Clone cluster
  • Loading branch information
RMcVelia authored Jan 30, 2025
2 parents 58797e9 + 3ff2149 commit e398954
Show file tree
Hide file tree
Showing 16 changed files with 330 additions and 28 deletions.
4 changes: 2 additions & 2 deletions cluster/terraform_aks_cluster/azure_metric_alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ data "azurerm_subscription" "current" {}
resource "azurerm_monitor_metric_alert" "port_exhaustion" {
name = "${var.resource_prefix}-tsc-${var.environment}-port-exhaustion"
resource_group_name = "${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg"
scopes = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg/providers/Microsoft.Network/loadBalancers/kubernetes"]
scopes = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${azurerm_kubernetes_cluster.main.node_resource_group}/providers/Microsoft.Network/loadBalancers/kubernetes"]
severity = 1
criteria {
metric_namespace = "microsoft.network/loadbalancers"
Expand All @@ -62,7 +62,7 @@ resource "azurerm_monitor_metric_alert" "port_exhaustion" {
resource "azurerm_monitor_metric_alert" "high_port_usage" {
name = "${var.resource_prefix}-tsc-${var.environment}-high-port-usage"
resource_group_name = "${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg"
scopes = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg/providers/Microsoft.Network/loadBalancers/kubernetes"]
scopes = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${azurerm_kubernetes_cluster.main.node_resource_group}/providers/Microsoft.Network/loadBalancers/kubernetes"]
severity = 2
criteria {
metric_namespace = "microsoft.network/loadbalancers"
Expand Down
3 changes: 1 addition & 2 deletions cluster/terraform_aks_cluster/config/development.tfvars.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@
"orchestrator_version": "1.30.6"
}
},
"admin_group_id": "f77b2daf-7ff4-4aa5-8138-cf983d0b4a18",
"enable_azure_RBAC": true
"admin_group_id": "f77b2daf-7ff4-4aa5-8138-cf983d0b4a18"
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@
"orchestrator_version": "1.30.6"
}
},
"admin_group_id": "f726cc54-78cb-4c98-89a6-b8e4396afb98",
"enable_azure_RBAC": true
"admin_group_id": "f726cc54-78cb-4c98-89a6-b8e4396afb98"
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@
}
},
"admin_group_id": "5b0f84de-54a8-481a-8689-f3c226597259",
"enable_azure_RBAC": true,
"ci_collection_interval": "1m"
}
3 changes: 1 addition & 2 deletions cluster/terraform_aks_cluster/config/test.tfvars.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,5 @@
"orchestrator_version": "1.30.6"
}
},
"admin_group_id": "21b2f2a6-231e-45cb-b624-d5521b820941",
"enable_azure_RBAC": true
"admin_group_id": "21b2f2a6-231e-45cb-b624-d5521b820941"
}
37 changes: 30 additions & 7 deletions cluster/terraform_aks_cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,14 @@ resource "azurerm_kubernetes_cluster_node_pool" "node_pools" {
resource "azurerm_kubernetes_cluster" "clone" {
count = var.clone_cluster ? 1 : 0

name = local.clone_cluster_name
location = azurerm_kubernetes_cluster.main.location
resource_group_name = azurerm_kubernetes_cluster.main.resource_group_name
node_resource_group = local.clone_node_resource_group_name
dns_prefix = "${azurerm_kubernetes_cluster.main.dns_prefix}-clone"
kubernetes_version = azurerm_kubernetes_cluster.main.kubernetes_version
name = local.clone_cluster_name
location = azurerm_kubernetes_cluster.main.location
resource_group_name = azurerm_kubernetes_cluster.main.resource_group_name
node_resource_group = local.clone_node_resource_group_name
dns_prefix = "${azurerm_kubernetes_cluster.main.dns_prefix}-clone"
kubernetes_version = azurerm_kubernetes_cluster.main.kubernetes_version
oidc_issuer_enabled = true
workload_identity_enabled = true

dynamic "azure_active_directory_role_based_access_control" {
for_each = var.enable_azure_RBAC_clone ? [1] : []
Expand All @@ -111,7 +113,17 @@ resource "azurerm_kubernetes_cluster" "clone" {
}

identity {
type = "SystemAssigned"
type = "UserAssigned"
identity_ids = [data.azurerm_user_assigned_identity.aks_control_plane.id]
}

network_profile {
network_plugin = "kubenet"
load_balancer_sku = "standard"

load_balancer_profile {
outbound_ip_address_ids = [azurerm_public_ip.egress-public-ip-clone[0].id]
}
}

lifecycle { ignore_changes = [tags] }
Expand Down Expand Up @@ -142,6 +154,17 @@ resource "azurerm_public_ip" "egress-public-ip" {
lifecycle { ignore_changes = [tags] }
}

resource "azurerm_public_ip" "egress-public-ip-clone" {
count = var.clone_cluster ? 1 : 0
name = "${var.resource_prefix}-tsc-aks-nodes-${var.environment}-clone-egress-pip"
location = data.azurerm_resource_group.cluster.location
resource_group_name = data.azurerm_resource_group.cluster.name
allocation_method = "Static"
sku = "Standard"

lifecycle { ignore_changes = [tags] }
}

# resource "null_resource" "delayed_uncordon" {
# # Trigger uncordon after node pool creation/updation

Expand Down
4 changes: 2 additions & 2 deletions cluster/terraform_aks_cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ variable "admin_group_id" {
}
variable "enable_azure_RBAC" {
type = bool
default = false
default = true
description = "Enable Azure AD RBAC on this cluster"
}
variable "enable_azure_RBAC_clone" {
type = bool
default = false
default = true
description = "Enable Azure AD RBAC on the clone cluster"
}

Expand Down
27 changes: 27 additions & 0 deletions cluster/terraform_kubernetes/analytics.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,30 @@ resource "kubernetes_config_map" "ama_logs" {
}

}

resource "kubernetes_config_map" "ama_logs_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone
metadata {
name = "container-azm-ms-agentconfig"
namespace = "kube-system"
}

data = {
config-version = "ver1"
log-data-collection-settings = <<-EOT
# Log data collection settings
[log_collection_settings]
[log_collection_settings.env_var]
# In the absense of this configmap, default value for enabled is true
enabled = false
[log_collection_settings.filter_using_annotations]
# if enabled will exclude logs from pods with annotations fluentbit.io/exclude: "true".
# Read more: https://docs.fluentbit.io/manual/pipeline/filters/kubernetes#kubernetes-annotations
enabled = true
EOT
schema-version = "v1"
}

}
188 changes: 187 additions & 1 deletion cluster/terraform_kubernetes/filebeat.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ resource "kubernetes_service_account" "filebeat" {
}
}


resource "kubernetes_cluster_role" "filebeat" {
metadata {
name = "filebeat"
Expand All @@ -34,6 +33,7 @@ resource "kubernetes_cluster_role" "filebeat" {
}

}

resource "kubernetes_cluster_role_binding" "filebeat" {
metadata {
name = "filebeat"
Expand Down Expand Up @@ -179,3 +179,189 @@ resource "kubernetes_daemonset" "filebeat" {
}
}
}

#
# Clone definition
#

resource "kubernetes_service_account" "filebeat_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone
metadata {
name = "filebeat"
namespace = kubernetes_namespace.default_list_clone["monitoring"].metadata[0].name
labels = {
"name" = "filebeat"
}
}
}

resource "kubernetes_cluster_role" "filebeat_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone
metadata {
name = "filebeat"
labels = {
"name" = "filebeat"
}
}
rule {
api_groups = [""]
resources = ["pods", "nodes", "namespaces"]
verbs = ["get", "list", "watch"]
}

rule {
api_groups = ["apps"]
resources = ["replicasets"]
verbs = ["get", "list", "watch"]
}

}

resource "kubernetes_cluster_role_binding" "filebeat_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone
metadata {
name = "filebeat"
labels = {
"name" = "filebeat"
}
}

role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.filebeat_clone[0].metadata[0].name
}

subject {
kind = "ServiceAccount"
name = "filebeat"
namespace = kubernetes_namespace.default_list_clone["monitoring"].metadata[0].name
}
}

resource "kubernetes_config_map" "filebeat_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone

metadata {
name = "filebeat-config-${local.config_map_hash}"
namespace = kubernetes_namespace.default_list_clone["monitoring"].metadata[0].name
}

data = {
"filebeat.yml" = local.config_map_data
}

}

resource "kubernetes_daemonset" "filebeat_clone" {
count = var.clone_cluster ? 1 : 0
provider = kubernetes.clone

metadata {
name = "filebeat"
namespace = kubernetes_namespace.default_list_clone["monitoring"].metadata[0].name
labels = {
app = "filebeat"
}
}

spec {
selector {
match_labels = {
app = "filebeat"
}
}


template {
metadata {
labels = {
app = "filebeat"
}
}

spec {
service_account_name = kubernetes_service_account.filebeat_clone[0].metadata[0].name
termination_grace_period_seconds = 30

node_selector = {
"teacherservices.cloud/node_pool" = "applications"
"kubernetes.io/os" = "linux"
}

container {
image = "docker.elastic.co/beats/filebeat-oss:${var.filebeat_version}"
name = "filebeat"

args = [
"-c",
"filebeat.yml",
"-e",
]

security_context {
run_as_user = 0
}

resources {
limits = {
cpu = "200m"
memory = "200Mi"
}
requests = {
cpu = "100m"
memory = "100Mi"
}
}

volume_mount {
mount_path = "/usr/share/filebeat/filebeat.yml"
name = "filebeat-config"
read_only = "true"
sub_path = "filebeat.yml"
}

volume_mount {
mount_path = "/usr/share/filebeat/data"
name = "data"
}

volume_mount {
mount_path = "/var/log"
name = "varlog"
read_only = "true"
}

}

volume {
name = "filebeat-config"
config_map {
name = kubernetes_config_map.filebeat_clone[0].metadata[0].name
default_mode = "0644"
}
}

volume {
name = "varlog"
host_path {
path = "/var/log"
}
}

volume {
name = "data"
host_path {
path = "/var/lib/filebeat-data"
type = "DirectoryOrCreate"
}
}

}
}
}
}
18 changes: 17 additions & 1 deletion cluster/terraform_kubernetes/gcp_wif.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,23 @@ resource "kubernetes_service_account" "gcp_wif" {

metadata {
name = "gcp-wif"
namespace = each.key
namespace = kubernetes_namespace.default_list[each.key].metadata[0].name

annotations = {
"azure.workload.identity/client-id" = azurerm_user_assigned_identity.gcp_wif[each.key].client_id
}
}
}

resource "kubernetes_service_account" "gcp_wif_clone" {

for_each = var.clone_cluster ? toset(var.gcp_wif_namespaces) : []

provider = kubernetes.clone

metadata {
name = "gcp-wif"
namespace = kubernetes_namespace.default_list[each.key].metadata[0].name

annotations = {
"azure.workload.identity/client-id" = azurerm_user_assigned_identity.gcp_wif[each.key].client_id
Expand Down
Loading

0 comments on commit e398954

Please sign in to comment.