diff --git a/cluster/terraform_aks_cluster/azure_metric_alerts.tf b/cluster/terraform_aks_cluster/azure_metric_alerts.tf
index 55104fd9..f92a11de 100644
--- a/cluster/terraform_aks_cluster/azure_metric_alerts.tf
+++ b/cluster/terraform_aks_cluster/azure_metric_alerts.tf
@@ -31,3 +31,60 @@ resource "azurerm_monitor_metric_alert" "node_availability" {
 
   lifecycle { ignore_changes = [tags] }
 }
+
+data "azurerm_subscription" "current" {}
+
+resource "azurerm_monitor_metric_alert" "port_exhaustion" {
+  name                = "${var.resource_prefix}-tsc-${var.environment}-port-exhaustion"
+  resource_group_name = "${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg"
+  scopes              = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg/providers/Microsoft.Network/loadBalancers/kubernetes"]
+  severity            = 1
+  criteria {
+    metric_namespace = "microsoft.network/loadbalancers"
+    metric_name      = "SnatConnectionCount"
+    aggregation      = "Total"
+    operator         = "GreaterThan"
+    threshold        = 0
+    dimension {
+      name     = "ConnectionState"
+      operator = "Include"
+      values   = ["failed"]
+    }
+  }
+
+  action {
+    action_group_id = data.azurerm_monitor_action_group.main.id
+  }
+
+  lifecycle { ignore_changes = [tags] }
+}
+
+resource "azurerm_monitor_metric_alert" "high_port_usage" {
+  name                = "${var.resource_prefix}-tsc-${var.environment}-high-port-usage"
+  resource_group_name = "${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg"
+  scopes              = ["/subscriptions/${data.azurerm_subscription.current.subscription_id}/resourceGroups/${var.resource_prefix}-tsc-aks-nodes-${var.environment}-rg/providers/Microsoft.Network/loadBalancers/kubernetes"]
+  severity            = 2
+  criteria {
+    metric_namespace = "microsoft.network/loadbalancers"
+    metric_name      = "UsedSnatPorts"
+    aggregation      = "Average"
+    operator         = "GreaterThan"
+    threshold        = 900
+    dimension {
+      name     = "BackendIPAddress"
+      operator = "Include"
+      values   = ["*"]
+    }
+    dimension {
+      name     = "ProtocolType"
+      operator = "Include"
+      values   = ["TCP"]
+    }
+  }
+
+  action {
+    action_group_id = data.azurerm_monitor_action_group.main.id
+  }
+
+  lifecycle { ignore_changes = [tags] }
+}
diff --git a/documentation/monitoring.md b/documentation/monitoring.md
index 806cf143..af3d9043 100644
--- a/documentation/monitoring.md
+++ b/documentation/monitoring.md
@@ -141,3 +141,19 @@ Configuration is managed through Terraform variables:
 - The action group name follows the format `[resource-prefix]-tsc`
 - Alert thresholds can be customized per environment
 - The metric namespace used is `microsoft.containerservice/managedclusters`
+
+### High Port Usage
+
+AKS uses an azure load balancer for inbound and outbound connections and this can lead to port exhaustion if a node does alot of network requests.
+
+If port usage goes over a threshold we alert on this as a warning so we can take pre-emptive action.
+
+### Port Exhaustion
+
+If connections start failing because of port exhaustion we alert on this as an error.
+
+### Troubleshooting Port Exhaustion
+
+Unfortunately we can't alert which kubernetes service is using aa high number of ports so this is a troublshooting exercise following:
+
+[Troubleshoot SNAT port exhaustion on Azure Kubernetes Service nodes](https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/snat-port-exhaustion?tabs=for-a-linux-pod)