-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
408 additions
and
0 deletions.
There are no files selected for viewing
15 changes: 15 additions & 0 deletions
15
cluster/terraform_kubernetes/config/prometheus/development.prometheus.rules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# | ||
# see https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ | ||
# | ||
# Example rule below, which should be removed once real rules added | ||
# | ||
groups: | ||
- name: container restarts | ||
rules: | ||
- alert: High number of restarted containers | ||
expr: sum(kube_pod_container_status_restarts_total) > 1000 | ||
for: 5m | ||
labels: | ||
severity: slack | ||
annotations: | ||
summary: High number of restarted containers |
201 changes: 201 additions & 0 deletions
201
cluster/terraform_kubernetes/config/prometheus/development.prometheus.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
# | ||
# see https://prometheus.io/docs/prometheus/latest/configuration/configuration | ||
# and | ||
# https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml | ||
# | ||
# initial scrape configs added, these will be updated later as required | ||
# | ||
global: | ||
scrape_interval: 1m | ||
evaluation_interval: 1m | ||
# Keep at most 100 sets of details of targets dropped by relabeling. | ||
# This information is used to display in the UI for troubleshooting. | ||
keep_dropped_targets: 100 | ||
rule_files: | ||
- /etc/prometheus/prometheus.rules | ||
alerting: | ||
alertmanagers: | ||
- scheme: http | ||
static_configs: | ||
- targets: | ||
- "alertmanager.monitoring.svc:9093" | ||
scrape_configs: | ||
# Scrape config for node-exporter. | ||
- job_name: 'node-exporter' | ||
kubernetes_sd_configs: | ||
- role: endpoints | ||
relabel_configs: | ||
- source_labels: [__meta_kubernetes_endpoints_name] | ||
regex: 'node-exporter' | ||
action: keep | ||
# Scrape config for API servers. | ||
# | ||
# Kubernetes exposes API servers as endpoints to the default/kubernetes | ||
# service so this uses `endpoints` role and uses relabelling to only keep | ||
# the endpoints associated with the default/kubernetes service using the | ||
# default named port `https`. This works for single API server deployments as | ||
# well as HA API server deployments. | ||
- job_name: 'kubernetes-apiservers' | ||
kubernetes_sd_configs: | ||
- role: endpoints | ||
scheme: https | ||
tls_config: | ||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||
# disabled certificate verification otherwise it fails with incorrect IP | ||
insecure_skip_verify: true | ||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||
# Keep only the default/kubernetes service endpoints for the https port. | ||
relabel_configs: | ||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | ||
action: keep | ||
regex: default;kubernetes;https | ||
# Scrape config for nodes (kubelet). | ||
- job_name: 'kubernetes-nodes' | ||
scheme: https | ||
tls_config: | ||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||
kubernetes_sd_configs: | ||
- role: node | ||
relabel_configs: | ||
- action: labelmap | ||
regex: __meta_kubernetes_node_label_(.+) | ||
- target_label: __address__ | ||
replacement: kubernetes.default.svc:443 | ||
- source_labels: [__meta_kubernetes_node_name] | ||
regex: (.+) | ||
target_label: __metrics_path__ | ||
replacement: /api/v1/nodes/${1}/proxy/metrics | ||
# Example scrape config for pods | ||
# | ||
# The relabeling allows the actual pod scrape to be configured | ||
# for all the declared ports (or port-free target if none is declared) | ||
# or only some ports. | ||
- job_name: 'kubernetes-pods' | ||
kubernetes_sd_configs: | ||
- role: pod | ||
relabel_configs: | ||
# Only scrape pods that have | ||
# "prometheus_io_scrape = true" annotation. | ||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] | ||
action: keep | ||
regex: true | ||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] | ||
action: replace | ||
target_label: __metrics_path__ | ||
regex: (.+) | ||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] | ||
action: replace | ||
regex: ([^:]+)(?::\d+)?;(\d+) | ||
replacement: $1:$2 | ||
target_label: __address__ | ||
- action: labelmap | ||
regex: __meta_kubernetes_pod_label_(.+) | ||
- source_labels: [__meta_kubernetes_namespace] | ||
action: replace | ||
target_label: kubernetes_namespace | ||
- source_labels: [__meta_kubernetes_pod_name] | ||
action: replace | ||
target_label: kubernetes_pod_name | ||
# Scrape config for kube-state-metrics. | ||
- job_name: 'kube-state-metrics' | ||
static_configs: | ||
- targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] | ||
# Scrape config for Kubelet cAdvisor. | ||
# | ||
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics | ||
# (those whose names begin with 'container_') have been removed from the | ||
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to | ||
# retrieve those metrics. | ||
- job_name: 'kubernetes-cadvisor' | ||
scheme: https | ||
tls_config: | ||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||
kubernetes_sd_configs: | ||
- role: node | ||
relabel_configs: | ||
- action: labelmap | ||
regex: __meta_kubernetes_node_label_(.+) | ||
- target_label: __address__ | ||
replacement: kubernetes.default.svc:443 | ||
- source_labels: [__meta_kubernetes_node_name] | ||
regex: (.+) | ||
target_label: __metrics_path__ | ||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor | ||
# Example scrape config for service endpoints. | ||
# | ||
# The relabeling allows the actual service scrape endpoint to be configured | ||
# for all or only some endpoints. | ||
- job_name: 'kubernetes-service-endpoints' | ||
kubernetes_sd_configs: | ||
- role: endpoints | ||
relabel_configs: | ||
# Only scrape endpoints that have | ||
# "prometheus_io_scrape = true" annotation. | ||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] | ||
action: keep | ||
regex: true | ||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] | ||
action: replace | ||
target_label: __scheme__ | ||
regex: (https?) | ||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] | ||
action: replace | ||
target_label: __metrics_path__ | ||
regex: (.+) | ||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] | ||
action: replace | ||
target_label: __address__ | ||
regex: ([^:]+)(?::\d+)?;(\d+) | ||
replacement: $1:$2 | ||
- action: labelmap | ||
regex: __meta_kubernetes_service_label_(.+) | ||
- source_labels: [__meta_kubernetes_namespace] | ||
action: replace | ||
target_label: kubernetes_namespace | ||
- source_labels: [__meta_kubernetes_service_name] | ||
action: replace | ||
target_label: kubernetes_name | ||
# | ||
# Below were used in the spike and left here for info | ||
# They wree a quick fix, so very likely can be improved | ||
# | ||
# - job_name: 'nginx-pods' | ||
# kubernetes_sd_configs: | ||
# - role: pod | ||
# relabel_configs: | ||
# - source_labels: [__meta_kubernetes_pod_container_port_number] | ||
# regex: '443' | ||
# action: drop | ||
# - source_labels: [__meta_kubernetes_pod_container_port_number] | ||
# regex: '8443' | ||
# action: drop | ||
# - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] | ||
# regex: 'ingress-nginx' | ||
# action: keep | ||
# - source_labels: [__address__, __meta_kubernetes_pod_container_port_number] | ||
# action: replace | ||
# regex: ([^:]+)(?::\d+)?;(\d+) | ||
# replacement: $1:10254 | ||
# target_label: __address__ | ||
# - job_name: 'apply-pods' | ||
# kubernetes_sd_configs: | ||
# - role: pod | ||
# relabel_configs: | ||
# - source_labels: [__meta_kubernetes_pod_label_app] | ||
# regex: 'apply-review-rm1' | ||
# action: keep | ||
# - job_name: 'apply-sidekiq-pods' | ||
# kubernetes_sd_configs: | ||
# - role: pod | ||
# relabel_configs: | ||
# - source_labels: [__meta_kubernetes_pod_label_app] | ||
# regex: 'apply-secondary-worker-review-rm1|apply-worker-review-rm1' | ||
# action: keep | ||
# - source_labels: [__address__] | ||
# action: replace | ||
# regex: ([^:]+) | ||
# replacement: "$${1}:9394" | ||
# target_label: __address__ | ||
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
resource "kubernetes_cluster_role" "prometheus" { | ||
|
||
metadata { | ||
name = "prometheus" | ||
} | ||
|
||
rule { | ||
api_groups = [""] | ||
resources = ["nodes", "nodes/proxy", "services", "endpoints", "pods", ] | ||
verbs = ["get", "list", "watch", ] | ||
} | ||
|
||
rule { | ||
api_groups = ["extensions", ] | ||
resources = ["ingresses", ] | ||
verbs = ["get", "list", "watch", ] | ||
} | ||
|
||
rule { | ||
non_resource_urls = ["/metrics", ] | ||
verbs = ["get", ] | ||
} | ||
} | ||
|
||
resource "kubernetes_cluster_role_binding" "prometheus" { | ||
|
||
metadata { | ||
name = "prometheus" | ||
} | ||
|
||
role_ref { | ||
api_group = "rbac.authorization.k8s.io" | ||
kind = "ClusterRole" | ||
name = kubernetes_cluster_role.prometheus.metadata[0].name | ||
} | ||
|
||
subject { | ||
kind = "ServiceAccount" | ||
name = "default" | ||
namespace = "monitoring" | ||
} | ||
|
||
} | ||
|
||
resource "kubernetes_config_map" "prometheus" { | ||
|
||
metadata { | ||
name = "prometheus-server-conf" | ||
namespace = "monitoring" | ||
} | ||
|
||
data = { | ||
"prometheus.rules" = "${file("${path.module}/config/prometheus/${var.config}.prometheus.rules")}" | ||
"prometheus.yml" = "${file("${path.module}/config/prometheus/${var.config}.prometheus.yml")}" | ||
} | ||
|
||
} | ||
|
||
resource "kubernetes_deployment" "prometheus" { | ||
|
||
metadata { | ||
name = "prometheus" | ||
namespace = "monitoring" | ||
} | ||
|
||
spec { | ||
replicas = 1 | ||
|
||
selector { | ||
match_labels = { | ||
app = "prometheus-server" | ||
} | ||
} | ||
|
||
template { | ||
metadata { | ||
labels = { | ||
app = "prometheus-server" | ||
} | ||
} | ||
|
||
spec { | ||
container { | ||
image = "prom/prometheus:${var.prometheus_version}" | ||
name = "prometheus" | ||
|
||
args = [ | ||
"--storage.tsdb.retention.time=12h", | ||
"--config.file=/etc/prometheus/prometheus.yml", | ||
"--storage.tsdb.path=/prometheus/", | ||
] | ||
|
||
port { | ||
container_port = 9090 | ||
} | ||
|
||
resources { | ||
limits = { | ||
cpu = 1 | ||
memory = "1Gi" | ||
} | ||
requests = { | ||
cpu = "500m" | ||
memory = "500M" | ||
} | ||
} | ||
|
||
volume_mount { | ||
mount_path = "/etc/prometheus/" | ||
name = "prometheus-config-volume" | ||
} | ||
|
||
volume_mount { | ||
mount_path = "/prometheus/" | ||
name = "prometheus-storage-volume" | ||
} | ||
} | ||
|
||
volume { | ||
name = "prometheus-config-volume" | ||
config_map { | ||
# default_mode = "0420" | ||
name = kubernetes_config_map.prometheus.metadata[0].name | ||
} | ||
} | ||
|
||
volume { | ||
name = "prometheus-storage-volume" | ||
empty_dir {} | ||
} | ||
|
||
} | ||
} | ||
} | ||
} | ||
|
||
resource "kubernetes_service" "prometheus" { | ||
|
||
metadata { | ||
name = "prometheus" | ||
namespace = "monitoring" | ||
annotations = { | ||
"prometheus.io/port" = "9090" | ||
"prometheus.io/scrape" = "true" | ||
} | ||
} | ||
|
||
spec { | ||
port { | ||
node_port = 30000 | ||
port = 8080 | ||
target_port = kubernetes_deployment.prometheus.spec[0].template[0].spec[0].container[0].port[0].container_port | ||
} | ||
selector = { | ||
app = "prometheus-server" | ||
} | ||
type = "NodePort" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.