Skip to content

Commit 073764a

Browse files
authored
PPHA-645: create container app health checks (#359)
# What is the change? This includes infrastructure alarming on the Database and Container webapp. Also includes an Application insights web test, this will test the availability of the container app via hitting the healthcheck endpoint. Also adds a boolean option to enable alerting or not, but default its false Also adds in the number of replicas for the container app. And adds in the memory used by the container app (default 1). To test this out:- - Add the infra secrets to the inf key vault `kv-lungcs-[environment]-inf` - Create an monitoring email address which will be used to fire alerts to. - Set `monitoring-email-address` to a pre created monitring email address, if this is not set up then set it to your own email address. <!-- Describe the intended changes. --> # Why are we making this change? <!-- Why is this change required? What problem does it solve? -->
2 parents 459bec0 + cf29b02 commit 073764a

16 files changed

Lines changed: 289 additions & 5 deletions

File tree

infrastructure/environments/dev/variables.tfvars

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ postgres_geo_redundant_backup_enabled = false
1515
protect_keyvault = false
1616
vnet_address_space = "10.12.0.0/16"
1717
seed_demo_data = true
18+
enable_alerting = false
19+
min_replicas = 1
20+
container_memory = "1"

infrastructure/environments/preprod/variables.tfvars

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ postgres_geo_redundant_backup_enabled = false
1515
protect_keyvault = true
1616
vnet_address_space = "10.14.0.0/16"
1717
seed_demo_data = true
18+
enable_alerting = false
19+
min_replicas = 2
20+
container_memory = "1"

infrastructure/environments/prod/variables.tfvars

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ protect_keyvault = true
1515
vnet_address_space = "10.15.0.0/16"
1616
use_apex_domain = true
1717
cae_zone_redundancy_enabled = true
18+
enable_alerting = true
19+
min_replicas = 2
20+
container_memory = "1"

infrastructure/modules/container-apps/main.tf

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ module "webapp" {
1313

1414
name = "${var.app_short_name}-web-${var.environment}"
1515
container_app_environment_id = var.container_app_environment_id
16+
17+
# alerts
18+
action_group_id = var.action_group_id
19+
enable_alerting = var.enable_alerting
20+
alert_memory_threshold = 80
21+
alert_cpu_threshold = 90
22+
1623
resource_group_name = azurerm_resource_group.main.name
1724
fetch_secrets_from_app_key_vault = var.fetch_secrets_from_app_key_vault
1825
infra_key_vault_name = "kv-${var.app_short_name}-${var.env_config}-inf"
@@ -32,4 +39,19 @@ module "webapp" {
3239
secret_variables = var.deploy_database_as_container ? { DATABASE_PASSWORD = resource.random_password.admin_password[0].result } : {}
3340
is_web_app = true
3441
port = 8000
42+
probe_path = "/healthcheck"
43+
min_replicas = var.min_replicas
44+
memory = var.container_memory
45+
}
46+
47+
module "azurerm_application_insights_standard_web_test" {
48+
count = var.enable_alerting ? 1 : 0
49+
50+
source = "../dtos-devops-templates/infrastructure/modules/application-insights-availability-test"
51+
name = "${var.app_short_name}-web-${var.environment}"
52+
resource_group_name = var.resource_group_name_infra
53+
location = var.region
54+
action_group_id = var.action_group_id
55+
application_insights_id = var.app_insights_id
56+
target_url = var.features.front_door ? "${local.external_url}healthcheck" : null
3557
}

infrastructure/modules/container-apps/output.tf

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ output "internal_url" {
22
value = module.webapp.url
33
}
44

5-
# Commented out as the front door endpoints is not being used at the moment (awaiting for DNS to be sorted), but this can be re-enabled if front door is added back in.
6-
# output "external_url" {
7-
# value = var.features.front_door ? "https://${module.frontdoor_endpoint[0].custom_domains["${var.environment}-domain"].host_name}/" : null
8-
# }
5+
output "external_url" {
6+
value = var.features.front_door ? local.external_url : null
7+
}

infrastructure/modules/container-apps/postgres.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ module "postgres" {
4848

4949
public_network_access_enabled = !var.features.private_networking
5050

51+
# alerts
52+
action_group_id = var.action_group_id
53+
enable_alerting = var.enable_alerting
54+
alert_memory_threshold = 80
55+
alert_cpu_threshold = 90
56+
alert_storage_threshold = 80
57+
5158
databases = {
5259
db1 = {
5360
collation = "en_US.utf8"
@@ -94,6 +101,13 @@ module "database_container" {
94101
POSTGRES_USER = local.database_user
95102
POSTGRES_DB = local.database_name
96103
}
104+
105+
# alerts
106+
action_group_id = var.action_group_id
107+
enable_alerting = var.enable_alerting
108+
alert_memory_threshold = 80
109+
alert_cpu_threshold = 90
110+
97111
resource_group_name = azurerm_resource_group.main.name
98112
is_tcp_app = true
99113
# postgres has a port of 5432

infrastructure/modules/container-apps/variables.tf

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ variable "app_short_name" {
1414
type = string
1515
}
1616

17+
variable "resource_group_name_infra" {
18+
description = "resource group name infra"
19+
type = string
20+
}
21+
1722
variable "container_app_environment_id" {
1823
description = "The ID of the container app environment where container apps are deployed"
1924
type = string
@@ -40,6 +45,7 @@ variable "enable_entra_id_authentication" {
4045
type = bool
4146
}
4247

48+
4349
variable "env_config" {
4450
description = "Environment configuration. Different environments may share the same environment config and the same infrastructure"
4551
type = string
@@ -128,6 +134,17 @@ variable "main_subnet_id" {
128134
type = string
129135
}
130136

137+
variable "min_replicas" {
138+
description = "Minimum number of container replicas"
139+
type = number
140+
}
141+
142+
variable "app_insights_id" {
143+
description = "The Application Insights id."
144+
type = string
145+
}
146+
147+
131148
variable "region" {
132149
description = "The region to deploy in"
133150
type = string
@@ -144,6 +161,40 @@ variable "use_apex_domain" {
144161
type = bool
145162
}
146163

164+
variable "enable_alerting" {
165+
description = "Whether monitoring and alerting is enabled."
166+
type = bool
167+
}
168+
169+
variable "alert_window_size" {
170+
type = string
171+
nullable = false
172+
validation {
173+
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
174+
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
175+
}
176+
description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
177+
}
178+
179+
variable "container_memory" {
180+
description = "Memory allocated to the webapp container in Gi. CPU is automatically set to half the memory value by the container-app module."
181+
type = string
182+
}
183+
184+
variable "action_group_id" {
185+
type = string
186+
description = "ID of the action group to notify."
187+
}
188+
189+
variable "infra_key_vault_name" {
190+
description = "Name of the infra key vault"
191+
type = string
192+
}
193+
194+
variable "infra_key_vault_rg" {
195+
description = "Name of the infra key vault resource group"
196+
type = string
197+
}
147198

148199
locals {
149200
resource_group_name = "rg-${var.app_short_name}-${var.environment}-container-app-uks"
@@ -179,7 +230,7 @@ locals {
179230
DATABASE_NAME = var.deploy_database_as_container ? null : module.postgres[0].database_names[0]
180231
DATABASE_USER = var.deploy_database_as_container ? null : module.db_connect_identity[0].name
181232
}
182-
233+
external_url = "https://${module.frontdoor_endpoint[0].custom_domains["${var.environment}-domain"].host_name}/"
183234
storage_account_name = "st${var.app_short_name}${var.environment}uks"
184235
storage_containers = {}
185236
storage_queues = []
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
module "service_health_alert" {
2+
source = "../dtos-devops-templates/infrastructure/modules/monitor-activity-log-alert"
3+
4+
name = "service-health-alerts-${var.app_short_name}-${var.environment}"
5+
location = "global"
6+
resource_group_name = azurerm_resource_group.main.name
7+
description = "Azure Service Health alert for services impacting ${var.app_short_name} in ${var.environment}"
8+
9+
scopes = [data.azurerm_subscription.current.id]
10+
11+
criteria = {
12+
category = "ServiceHealth"
13+
level = null
14+
15+
service_health = {
16+
events = ["Incident", "Maintenance", "Informational", "ActionRequired", "Security"]
17+
locations = [var.region]
18+
19+
# Only monitor Azure services used by this application
20+
# This reduces noise from unrelated service health events
21+
services = [
22+
"Application Insights",
23+
"Azure Container Apps",
24+
"Azure Container Service",
25+
"Azure Container Storage",
26+
"Azure Database for PostgreSQL flexible servers",
27+
"Azure DNS",
28+
"Azure Frontdoor",
29+
"Azure Monitor",
30+
"Azure Private Link",
31+
"Key Vault",
32+
"Log Analytics",
33+
"Storage",
34+
"Virtual Network",
35+
"Windows Virtual Desktop"
36+
]
37+
}
38+
}
39+
40+
action_group_id = module.monitor_action_group.monitor_action_group.id
41+
}

infrastructure/modules/infra/data.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
data "azurerm_subscription" "current" {}
2+
13
data "azuread_service_principal" "github-mi" {
24
display_name = var.github_mi_name
35
}
@@ -7,3 +9,15 @@ data "azuread_group" "kv_officers" {
79

810
display_name = each.value
911
}
12+
13+
data "azurerm_key_vault" "infra" {
14+
provider = azurerm.hub
15+
16+
name = var.infra_key_vault_name
17+
resource_group_name = var.infra_key_vault_rg
18+
}
19+
20+
data "azurerm_key_vault_secret" "infra" {
21+
name = "monitoring-email-address"
22+
key_vault_id = data.azurerm_key_vault.infra.id
23+
}

infrastructure/modules/infra/main.tf

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,44 @@ module "container-app-environment" {
7070
private_dns_zone_rg_name = var.features.private_networking ? "rg-hub-${var.hub}-uks-private-dns-zones" : null
7171
zone_redundancy_enabled = var.cae_zone_redundancy_enabled
7272
}
73+
74+
module "app_insights_audit" {
75+
source = "../dtos-devops-templates/infrastructure/modules/app-insights"
76+
77+
name = "appi-${var.environment}-uks-${var.app_short_name}"
78+
location = var.region
79+
resource_group_name = azurerm_resource_group.main.name
80+
appinsights_type = "web"
81+
82+
log_analytics_workspace_id = module.log_analytics_workspace_audit.id
83+
84+
# alerts
85+
action_group_id = module.monitor_action_group.monitor_action_group.id
86+
enable_alerting = var.enable_alerting
87+
}
88+
89+
module "private_link_scoped_service_law" {
90+
source = "../dtos-devops-templates/infrastructure/modules/private-link-scoped-service"
91+
92+
providers = {
93+
azurerm = azurerm.hub
94+
}
95+
96+
name = "pls-${var.app_short_name}-${var.environment}-law"
97+
resource_group_name = "rg-hub-${var.hub}-uks-hub-private-endpoints"
98+
linked_resource_id = module.log_analytics_workspace_audit.id
99+
scope_name = "ampls-${var.hub}hub"
100+
}
101+
102+
module "private_link_scoped_service_app_insights" {
103+
source = "../dtos-devops-templates/infrastructure/modules/private-link-scoped-service"
104+
105+
providers = {
106+
azurerm = azurerm.hub
107+
}
108+
109+
name = "pls-${var.app_short_name}-${var.environment}-appinsights"
110+
resource_group_name = "rg-hub-${var.hub}-uks-hub-private-endpoints"
111+
linked_resource_id = module.app_insights_audit.id
112+
scope_name = "ampls-${var.hub}hub"
113+
}

0 commit comments

Comments
 (0)