diff --git a/cluster/terraform_kubernetes/alertmanager.tf b/cluster/terraform_kubernetes/alertmanager.tf new file mode 100644 index 00000000..b58fee37 --- /dev/null +++ b/cluster/terraform_kubernetes/alertmanager.tf @@ -0,0 +1,165 @@ +data "azurerm_key_vault" "key_vault_slack" { + name = var.cluster_kv + resource_group_name = var.resource_group_name +} +data "azurerm_key_vault_secret" "slack_secret" { + key_vault_id = data.azurerm_key_vault.key_vault_slack.id + name = "SLACK-SECRET" +} + +locals { + alertmanager_config_path = "${path.module}/config/prometheus/alertmanager-config.yaml" + alertmanager_config_content = templatefile(local.alertmanager_config_path, { + slack_secret = data.azurerm_key_vault_secret.slack_secret.value, + slack_channel = var.slack_channel + }) + + template_files = { + "default.tmpl" = "${path.module}/config/prometheus/alertmanager-default.yaml", + "slack.tmpl" = "${path.module}/config/prometheus/alertmanager-slack.yaml" + } + + alertmanager_templates = { for k, v in local.template_files : k => file(v) } +} + +resource "kubernetes_manifest" "configmap_monitoring_alertmanager_config" { + manifest = { + apiVersion = "v1" + kind = "ConfigMap" + metadata = { + name = "alertmanager-config" + namespace = var.namespace + } + data = { + "config.yml" = local.alertmanager_config_content + } + } +} + +resource "kubernetes_manifest" "configmap_monitoring_alertmanager_templates" { + manifest = { + apiVersion = "v1" + kind = "ConfigMap" + metadata = { + name = "alertmanager-templates" + namespace = var.namespace + } + data = local.alertmanager_templates + } +} + +# Deployment for AlertManager +resource "kubernetes_manifest" "deployment_monitoring_alertmanager" { + manifest = { + "apiVersion" = "apps/v1" + "kind" = "Deployment" + "metadata" = { + "name" = "alertmanager" + "namespace" = var.namespace + } + "spec" = { + "replicas" = 1 + "selector" = { + "matchLabels" = { + "app" = "alertmanager" + } + } + "template" = { + "metadata" = { + "labels" = { + "app" = "alertmanager" + } + } + "spec" = { + "containers" = [ + { + "args" = [ + "--config.file=/etc/alertmanager/config.yml", + "--storage.path=/alertmanager", + ] + "image" = "prom/alertmanager:${var.alertmanager_image_version}" + "name" = "alertmanager" + "ports" = [ + { + "containerPort" = 9093 + "name" = "alertmanager" + }, + ] + "resources" = { + "limits" = { + "cpu" = var.alertmanager_cpu_limit + "memory" = var.alertmanager_memory_limit + } + "requests" = { + "cpu" = var.alertmanager_cpu_request + "memory" = var.alertmanager_memory_request + } + } + "volumeMounts" = [ + { + "mountPath" = "/etc/alertmanager" + "name" = "config-volume" + }, + { + "mountPath" = "/etc/alertmanager-templates" + "name" = "templates-volume" + }, + { + "mountPath" = "/alertmanager" + "name" = "alertmanager" + }, + ] + }, + ] + "volumes" = [ + { + "configMap" = { + "name" = "alertmanager-config" + } + "name" = "config-volume" + }, + { + "configMap" = { + "name" = "alertmanager-templates" + } + "name" = "templates-volume" + }, + { + "emptyDir" = {} + "name" = "alertmanager" + }, + ] + } + } + } + } +} + +# Service for AlertManager +resource "kubernetes_manifest" "service_monitoring_alertmanager" { + manifest = { + "apiVersion" = "v1" + "kind" = "Service" + "metadata" = { + "annotations" = { + "prometheus.io/port" = "9093" + "prometheus.io/scrape" = "true" + } + "name" = "alertmanager" + "namespace" = var.namespace + } + "spec" = { + "ports" = [ + { + "nodePort" = 31000 + "port" = 9093 + "targetPort" = 9093 + }, + ] + "selector" = { + "app" = "alertmanager" + } + "type" = "NodePort" + } + } +} diff --git a/cluster/terraform_kubernetes/config/prometheus/alertmanager-config.yaml b/cluster/terraform_kubernetes/config/prometheus/alertmanager-config.yaml new file mode 100644 index 00000000..e5f20dc8 --- /dev/null +++ b/cluster/terraform_kubernetes/config/prometheus/alertmanager-config.yaml @@ -0,0 +1,21 @@ +global: +templates: + - '/etc/alertmanager/*.tmpl' +route: + receiver: alert-default + group_by: ['alertname', 'priority'] + group_wait: 10s + repeat_interval: 30m + routes: + - receiver: slack_demo + match: + severity: slack + group_wait: 10s + repeat_interval: 1m +receivers: +- name: alert-default +- name: slack_demo + slack_configs: + - api_url: '${slack_secret}' + channel: '${slack_channel}' + send_resolved: true diff --git a/cluster/terraform_kubernetes/config/prometheus/alertmanager-default.yaml b/cluster/terraform_kubernetes/config/prometheus/alertmanager-default.yaml new file mode 100644 index 00000000..b1a8f591 --- /dev/null +++ b/cluster/terraform_kubernetes/config/prometheus/alertmanager-default.yaml @@ -0,0 +1,153 @@ + {{ define "__alertmanager" }}AlertManager{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__description" }}{{ end }} + {{ define "__text_alert_list" }}{{ range . }}Labels: + {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Annotations: + {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Source: {{ .GeneratorURL }} + {{ end }}{{ end }} + {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} + {{ define "slack.default.pretext" }}{{ end }} + {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "slack.default.iconemoji" }}{{ end }} + {{ define "slack.default.iconurl" }}{{ end }} + {{ define "slack.default.text" }}{{ end }} + {{ if gt (len .Alerts.Firing) 0 -}} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{- end }} + {{ if gt (len .Alerts.Resolved) 0 -}} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{- end }} + {{- end }} + {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} + {{ define "email.default.html" }} + + + + + + + {{ template "__subject" . }} + + + + + + + + +
+
+ + + + + + + +
+ {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} + {{ .Name }}={{ .Value }} + {{ end }} +
+ + + + + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + {{ range .Alerts.Firing }} + + + + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + + + + {{ end }} + {{ range .Alerts.Resolved }} + + + + {{ end }} +
+ View in {{ template "__alertmanager" . }} +
+ [{{ .Alerts.Firing | len }}] Firing +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+
+ [{{ .Alerts.Resolved | len }}] Resolved +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+ + + {{ end }} + {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 }} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + {{ end }} + {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} diff --git a/cluster/terraform_kubernetes/config/prometheus/alertmanager-slack.yaml b/cluster/terraform_kubernetes/config/prometheus/alertmanager-slack.yaml new file mode 100644 index 00000000..a4d301e5 --- /dev/null +++ b/cluster/terraform_kubernetes/config/prometheus/alertmanager-slack.yaml @@ -0,0 +1,4 @@ + {{ define "slack.devops.text" }} + {{range .Alerts}}{{.Annotations.DESCRIPTION}} + {{end}} +{{ end }} diff --git a/cluster/terraform_kubernetes/variables.tf b/cluster/terraform_kubernetes/variables.tf index 97c23b04..d6821d0a 100644 --- a/cluster/terraform_kubernetes/variables.tf +++ b/cluster/terraform_kubernetes/variables.tf @@ -182,3 +182,36 @@ locals { var.environment # cluster1, cluster2, etc ) } + +# Define variables for namespace, key vault, slack channel, and resource configurations +variable "namespace" { + default = "monitoring" +} + +variable "key_vault_id" { + description = "The Azure Key Vault ID where secrets are stored" +} + +variable "slack_channel" { + default = "#teacher-services-infra" +} + +variable "alertmanager_image_version" { + default = "v0.19.0" +} + +variable "alertmanager_cpu_limit" { + default = "1" +} + +variable "alertmanager_memory_limit" { + default = "1Gi" +} + +variable "alertmanager_cpu_request" { + default = "500m" +} + +variable "alertmanager_memory_request" { + default = "500M" +}