diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 0000000..d963d97 --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,12 @@ +.terraform/ +.terraform.lock.hcl +*.tfstate +*.tfstate.* +*.tfvars +!*.tfvars.example +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json +backend.tf diff --git a/infra/README.md b/infra/README.md new file mode 100644 index 0000000..f479b31 --- /dev/null +++ b/infra/README.md @@ -0,0 +1,133 @@ +# Agentic Platform Engineering — Infrastructure + +This Terraform configuration provisions a complete Azure-hosted platform for the agentic-platform-engineering workshop: an AKS cluster with workload identity, a container registry, ArgoCD for GitOps, and the AKS MCP Server for AI-assisted cluster management — all wired together with GitHub Actions OIDC so no long-lived secrets are required. + +## Prerequisites + +| Tool | Minimum version | +|------|----------------| +| [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) (`az`) | Latest, authenticated (`az login`) | +| [Terraform](https://developer.hashicorp.com/terraform/install) | >= 1.7 | +| [kubectl](https://kubernetes.io/docs/tasks/tools/) | Latest | +| [helm](https://helm.sh/docs/intro/install/) | Latest | + +You also need a fork or clone of [MicrosoftGbb/agentic-platform-engineering](https://github.com/MicrosoftGbb/agentic-platform-engineering) — the `github_org` and `github_repo` variables must match your fork. + +## What Gets Provisioned + +| Resource | Details | +|----------|---------| +| **Resource Group** | `rg-agentic-demo` (configurable) | +| **Virtual Network** | `vnet-agentic-demo`, `10.0.0.0/8` | +| **AKS Subnet** | `snet-aks`, `10.240.0.0/16` | +| **AKS Cluster** | `aks-eastus2`, Kubernetes 1.30, `Standard_D4s_v3` × 3 nodes, OIDC issuer + workload identity enabled, Azure CNI | +| **Azure Container Registry** | Basic SKU, auto-named `acragentic<4-digit-suffix>` (or set `acr_name`). AKS kubelet identity gets `AcrPull`. | +| **User-Assigned Managed Identity** | `uami-agentic-workload` — Contributor on the resource group, AKS Cluster Admin | +| **Federated Identity Credentials** | 5 total: GitHub env `copilot`, GitHub env `demo`, branch `main`, pull requests, and the `aks-mcp` Kubernetes service account | +| **ArgoCD** | Helm chart `7.3.4`, namespace `argocd`, LoadBalancer service, notifications controller enabled, random 16-char admin password | +| **AKS MCP Server** | Helm chart `0.1.0` from `oci://ghcr.io/azure/aks-mcp/charts`, namespace `aks-mcp`, port 8000, workload identity via dedicated service account | + +## Quick Start + +```bash +# 1. Copy and edit variables +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars — set github_org and github_repo at minimum + +# 2. Initialize +terraform init + +# 3. Plan +terraform plan + +# 4. Apply (~15 min) +terraform apply +``` + +## After Apply + +```bash +# Connect to the cluster +$(terraform output -raw get_credentials_command) + +# Get GitHub Actions secrets to configure +terraform output -json github_actions_env_vars + +# Get ArgoCD admin password (username: admin) +terraform output -raw argocd_admin_password + +# Access ArgoCD UI +kubectl port-forward -n argocd svc/argocd-server 8080:443 +# Open https://localhost:8080 + +# Access AKS MCP server +$(terraform output -raw aks_mcp_port_forward_command) +# Server listening on http://localhost:8000 +``` + +## GitHub Actions Setup + +After `terraform apply`, configure the following **repository secrets** in your GitHub fork (values come from `terraform output -json github_actions_env_vars`): + +| Secret | Value | +|--------|-------| +| `ARM_CLIENT_ID` | Client ID of the managed identity | +| `ARM_SUBSCRIPTION_ID` | Your Azure subscription ID | +| `ARM_TENANT_ID` | Your Azure tenant ID | +| `ARM_USE_OIDC` | `true` | + +Also create two **GitHub Environments** named exactly `copilot` and `demo` (Settings → Environments). The federated credentials are scoped to these environment names — workflows using other environment names will fail to authenticate. + +## Remote State (Optional) + +By default Terraform stores state locally. For team or CI use, migrate to Azure Storage: + +```bash +# 1. Create storage account (one-time) +az group create -n rg-terraform-state -l eastus2 +az storage account create -n -g rg-terraform-state --sku Standard_LRS +az storage container create -n tfstate --account-name + +# 2. Copy backend.tf.example to backend.tf and fill in values +cp backend.tf.example backend.tf +# Edit backend.tf — set storage_account_name + +# 3. Migrate existing state +terraform init -reconfigure +``` + +## Variables Reference + +| Variable | Description | Default | +|----------|-------------|---------| +| `location` | Azure region for all resources | `eastus2` | +| `resource_group_name` | Name of the Azure Resource Group | `rg-agentic-demo` | +| `cluster_name` | Name of the AKS cluster | `aks-eastus2` | +| `kubernetes_version` | Kubernetes version for the AKS cluster | `1.30` | +| `node_vm_size` | VM size for the AKS default node pool | `Standard_D4s_v3` | +| `node_count` | Number of nodes in the default node pool | `3` | +| `acr_name` | ACR name — auto-generated as `acragentic` if empty | `""` | +| `github_org` | GitHub org for OIDC federation (**required**) | — | +| `github_repo` | GitHub repo name for OIDC federation (**required**) | — | +| `argocd_chart_version` | Helm chart version for ArgoCD | `7.3.4` | +| `aks_mcp_chart_version` | Helm chart version for the AKS MCP Server | `0.1.0` | +| `tags` | Tags applied to all resources | `{project, managed_by}` | + +## Outputs Reference + +| Output | Description | Sensitive | +|--------|-------------|-----------| +| `resource_group_name` | Name of the Azure Resource Group | No | +| `cluster_name` | Name of the AKS cluster | No | +| `get_credentials_command` | `az aks get-credentials` command ready to run | No | +| `acr_login_server` | ACR login server hostname | No | +| `acr_id` | Resource ID of the ACR | No | +| `uami_client_id` | Client ID of the managed identity (`ARM_CLIENT_ID`) | No | +| `uami_principal_id` | Principal ID of the managed identity | No | +| `github_actions_env_vars` | Map of all GitHub Actions secrets to configure | No | +| `oidc_issuer_url` | OIDC issuer URL of the AKS cluster | No | +| `vnet_id` | Resource ID of the Virtual Network | No | +| `aks_subnet_id` | Resource ID of the AKS subnet | No | +| `kube_config` | Raw kubeconfig for the AKS cluster | **Yes** | +| `argocd_admin_password` | ArgoCD admin password (username: `admin`) | **Yes** | +| `aks_mcp_port_forward_command` | `kubectl port-forward` command for the AKS MCP server | No | diff --git a/infra/acr.tf b/infra/acr.tf new file mode 100644 index 0000000..9da896a --- /dev/null +++ b/infra/acr.tf @@ -0,0 +1,8 @@ +resource "azurerm_container_registry" "main" { + name = local.acr_name + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + sku = "Basic" + admin_enabled = false + tags = var.tags +} diff --git a/infra/aks-mcp.tf b/infra/aks-mcp.tf new file mode 100644 index 0000000..93d123e --- /dev/null +++ b/infra/aks-mcp.tf @@ -0,0 +1,45 @@ +resource "helm_release" "aks_mcp" { + name = "aks-mcp" + repository = "oci://ghcr.io/azure/aks-mcp/charts" + chart = "aks-mcp" + version = var.aks_mcp_chart_version + namespace = kubernetes_namespace.aks_mcp.metadata[0].name + create_namespace = false + wait = true + timeout = 300 + + set { + name = "serviceAccount.create" + value = "false" + } + + set { + name = "serviceAccount.name" + value = kubernetes_service_account.aks_mcp.metadata[0].name + } + + set { + name = "podLabels.azure\\.workload\\.identity/use" + value = "true" + } + + set { + name = "env.AZURE_CLIENT_ID" + value = azurerm_user_assigned_identity.workload.client_id + } + + set { + name = "env.AZURE_TENANT_ID" + value = data.azurerm_client_config.current.tenant_id + } + + set { + name = "service.port" + value = "8000" + } + + depends_on = [ + kubernetes_namespace.aks_mcp, + kubernetes_service_account.aks_mcp + ] +} diff --git a/infra/aks.tf b/infra/aks.tf new file mode 100644 index 0000000..7046964 --- /dev/null +++ b/infra/aks.tf @@ -0,0 +1,42 @@ +resource "azurerm_kubernetes_cluster" "main" { + name = var.cluster_name + location = azurerm_resource_group.main.location + resource_group_name = azurerm_resource_group.main.name + dns_prefix = var.cluster_name + kubernetes_version = var.kubernetes_version + + default_node_pool { + name = "system" + node_count = var.node_count + vm_size = var.node_vm_size + vnet_subnet_id = azurerm_subnet.aks.id + os_disk_size_gb = 128 + upgrade_settings { + max_surge = "10%" + } + } + + identity { + type = "SystemAssigned" + } + + # Enable OIDC issuer for workload identity + oidc_issuer_enabled = true + workload_identity_enabled = true + + network_profile { + network_plugin = "azure" + network_policy = "azure" + load_balancer_sku = "standard" + service_cidr = "10.0.0.0/16" + dns_service_ip = "10.0.0.10" + } + + tags = var.tags +} + +resource "azurerm_role_assignment" "aks_acr_pull" { + scope = azurerm_container_registry.main.id + role_definition_name = "AcrPull" + principal_id = azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id +} diff --git a/infra/argocd.tf b/infra/argocd.tf new file mode 100644 index 0000000..2cb0865 --- /dev/null +++ b/infra/argocd.tf @@ -0,0 +1,33 @@ +resource "random_password" "argocd_admin" { + length = 16 + special = true +} + +resource "helm_release" "argocd" { + name = "argocd" + repository = "https://argoproj.github.io/argo-helm" + chart = "argo-cd" + version = var.argocd_chart_version + namespace = kubernetes_namespace.argocd.metadata[0].name + create_namespace = false + wait = true + timeout = 600 + + set { + name = "configs.secret.argocdServerAdminPassword" + value = bcrypt(random_password.argocd_admin.result) + } + + set { + name = "server.service.type" + value = "LoadBalancer" + } + + # Enable notifications controller for ArgoCD notifications + set { + name = "notifications.enabled" + value = "true" + } + + depends_on = [kubernetes_namespace.argocd] +} diff --git a/infra/backend.tf.example b/infra/backend.tf.example new file mode 100644 index 0000000..d4d4cfd --- /dev/null +++ b/infra/backend.tf.example @@ -0,0 +1,10 @@ +# Rename to backend.tf and fill in values to use Azure Storage remote state. +# Run: terraform init -reconfigure +terraform { + backend "azurerm" { + resource_group_name = "rg-terraform-state" + storage_account_name = "" + container_name = "tfstate" + key = "agentic-platform-engineering.tfstate" + } +} diff --git a/infra/identity.tf b/infra/identity.tf new file mode 100644 index 0000000..34a7c4a --- /dev/null +++ b/infra/identity.tf @@ -0,0 +1,69 @@ +resource "azurerm_user_assigned_identity" "workload" { + name = "uami-agentic-workload" + resource_group_name = azurerm_resource_group.main.name + location = azurerm_resource_group.main.location + tags = var.tags +} + +# Environment: copilot +resource "azurerm_federated_identity_credential" "gh_env_copilot" { + name = "github-env-copilot" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = "repo:${var.github_org}/${var.github_repo}:environment:copilot" +} + +# Environment: demo +resource "azurerm_federated_identity_credential" "gh_env_demo" { + name = "github-env-demo" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = "repo:${var.github_org}/${var.github_repo}:environment:demo" +} + +# Branch: main +resource "azurerm_federated_identity_credential" "gh_branch_main" { + name = "github-branch-main" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = "repo:${var.github_org}/${var.github_repo}:ref:refs/heads/main" +} + +# Pull requests +resource "azurerm_federated_identity_credential" "gh_pr" { + name = "github-pull-request" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = "https://token.actions.githubusercontent.com" + subject = "repo:${var.github_org}/${var.github_repo}:pull_request" +} + +resource "azurerm_federated_identity_credential" "aks_mcp_sa" { + name = "aks-mcp-service-account" + resource_group_name = azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:aks-mcp:aks-mcp" +} + +# Contributor on the resource group (deploy AKS, ACR, etc.) +resource "azurerm_role_assignment" "workload_rg_contributor" { + scope = azurerm_resource_group.main.id + role_definition_name = "Contributor" + principal_id = azurerm_user_assigned_identity.workload.principal_id +} + +# AKS cluster admin (for kubectl access in workflows) +resource "azurerm_role_assignment" "workload_aks_admin" { + scope = azurerm_kubernetes_cluster.main.id + role_definition_name = "Azure Kubernetes Service Cluster Admin Role" + principal_id = azurerm_user_assigned_identity.workload.principal_id +} diff --git a/infra/kubernetes.tf b/infra/kubernetes.tf new file mode 100644 index 0000000..6baa1bd --- /dev/null +++ b/infra/kubernetes.tf @@ -0,0 +1,31 @@ +resource "kubernetes_namespace" "argocd" { + metadata { + name = "argocd" + labels = { + "app.kubernetes.io/managed-by" = "terraform" + } + } +} + +resource "kubernetes_namespace" "aks_mcp" { + metadata { + name = "aks-mcp" + labels = { + "app.kubernetes.io/managed-by" = "terraform" + "azure.workload.identity/use" = "true" + } + } +} + +resource "kubernetes_service_account" "aks_mcp" { + metadata { + name = "aks-mcp" + namespace = kubernetes_namespace.aks_mcp.metadata[0].name + annotations = { + "azure.workload.identity/client-id" = azurerm_user_assigned_identity.workload.client_id + } + labels = { + "azure.workload.identity/use" = "true" + } + } +} diff --git a/infra/main.tf b/infra/main.tf new file mode 100644 index 0000000..dc4e339 --- /dev/null +++ b/infra/main.tf @@ -0,0 +1,35 @@ +data "azurerm_client_config" "current" {} + +resource "azurerm_resource_group" "main" { + name = var.resource_group_name + location = var.location + tags = var.tags +} + +# 4-char numeric suffix for globally unique names (ACR, etc.) +resource "random_string" "suffix" { + length = 4 + upper = false + lower = false + numeric = true + special = false +} + +locals { + acr_name = var.acr_name != "" ? var.acr_name : "acragentic${random_string.suffix.result}" +} + +resource "azurerm_virtual_network" "main" { + name = "vnet-agentic-demo" + address_space = ["10.0.0.0/8"] + location = azurerm_resource_group.main.location + resource_group_name = azurerm_resource_group.main.name + tags = var.tags +} + +resource "azurerm_subnet" "aks" { + name = "snet-aks" + resource_group_name = azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = ["10.240.0.0/16"] +} diff --git a/infra/outputs.tf b/infra/outputs.tf new file mode 100644 index 0000000..fae2e3e --- /dev/null +++ b/infra/outputs.tf @@ -0,0 +1,76 @@ +output "resource_group_name" { + description = "Name of the Azure Resource Group" + value = azurerm_resource_group.main.name +} + +output "cluster_name" { + description = "Name of the AKS cluster" + value = azurerm_kubernetes_cluster.main.name +} + +output "get_credentials_command" { + description = "Command to fetch AKS kubeconfig" + value = "az aks get-credentials --resource-group ${azurerm_resource_group.main.name} --name ${azurerm_kubernetes_cluster.main.name}" +} + +output "acr_login_server" { + description = "ACR login server hostname" + value = azurerm_container_registry.main.login_server +} + +output "acr_id" { + description = "Resource ID of the Azure Container Registry (used for AKS role assignment)" + value = azurerm_container_registry.main.id +} + +output "uami_client_id" { + description = "Client ID of the User-Assigned Managed Identity (ARM_CLIENT_ID for GitHub Actions)" + value = azurerm_user_assigned_identity.workload.client_id +} + +output "uami_principal_id" { + description = "Principal ID of the User-Assigned Managed Identity" + value = azurerm_user_assigned_identity.workload.principal_id +} + +output "github_actions_env_vars" { + description = "Environment variables / secrets to configure in GitHub Actions" + value = { + ARM_CLIENT_ID = azurerm_user_assigned_identity.workload.client_id + ARM_SUBSCRIPTION_ID = data.azurerm_client_config.current.subscription_id + ARM_TENANT_ID = data.azurerm_client_config.current.tenant_id + ARM_USE_OIDC = "true" + } +} + +output "oidc_issuer_url" { + description = "OIDC issuer URL of the AKS cluster (for federated credential configuration)" + value = azurerm_kubernetes_cluster.main.oidc_issuer_url +} + +output "vnet_id" { + description = "ID of the Virtual Network" + value = azurerm_virtual_network.main.id +} + +output "aks_subnet_id" { + description = "ID of the AKS subnet" + value = azurerm_subnet.aks.id +} + +output "kube_config" { + description = "Raw kubeconfig for the AKS cluster" + value = azurerm_kubernetes_cluster.main.kube_config_raw + sensitive = true +} + +output "argocd_admin_password" { + description = "ArgoCD admin password — use with username 'admin'" + value = random_password.argocd_admin.result + sensitive = true +} + +output "aks_mcp_port_forward_command" { + description = "Command to port-forward AKS MCP server locally" + value = "kubectl port-forward -n aks-mcp svc/aks-mcp 8000:8000" +} diff --git a/infra/providers.tf b/infra/providers.tf new file mode 100644 index 0000000..9be3f23 --- /dev/null +++ b/infra/providers.tf @@ -0,0 +1,44 @@ +# --------------------------------------------------------------------------- +# OIDC authentication for GitHub Actions +# Set the following environment variables in your workflow (no secrets needed): +# ARM_USE_OIDC=true +# ARM_TENANT_ID= +# ARM_SUBSCRIPTION_ID= +# ARM_CLIENT_ID= +# --------------------------------------------------------------------------- + +provider "azurerm" { + features {} + # Credentials are sourced from ARM_* environment variables. + # No hardcoded values here — safe for public repos. +} + +provider "azuread" { + # Tenant is sourced from ARM_TENANT_ID / AZURE_TENANT_ID env var. +} + +# --------------------------------------------------------------------------- +# Kubernetes and Helm providers are configured from the AKS cluster outputs +# defined in aks.tf. The try() calls below allow `terraform validate` and +# `terraform plan` to succeed before aks.tf resources exist. +# --------------------------------------------------------------------------- + +locals { + kube_config = azurerm_kubernetes_cluster.main.kube_config[0] +} + +provider "kubernetes" { + host = local.kube_config.host + client_certificate = base64decode(local.kube_config.client_certificate) + client_key = base64decode(local.kube_config.client_key) + cluster_ca_certificate = base64decode(local.kube_config.cluster_ca_certificate) +} + +provider "helm" { + kubernetes { + host = local.kube_config.host + client_certificate = base64decode(local.kube_config.client_certificate) + client_key = base64decode(local.kube_config.client_key) + cluster_ca_certificate = base64decode(local.kube_config.cluster_ca_certificate) + } +} diff --git a/infra/terraform.tfvars.example b/infra/terraform.tfvars.example new file mode 100644 index 0000000..4681704 --- /dev/null +++ b/infra/terraform.tfvars.example @@ -0,0 +1,8 @@ +location = "eastus2" +resource_group_name = "rg-agentic-demo" +cluster_name = "aks-eastus2" +kubernetes_version = "1.30" +node_vm_size = "Standard_D4s_v3" +node_count = 3 +github_org = "MicrosoftGbb" +github_repo = "agentic-platform-engineering" diff --git a/infra/variables.tf b/infra/variables.tf new file mode 100644 index 0000000..4f571c9 --- /dev/null +++ b/infra/variables.tf @@ -0,0 +1,72 @@ +variable "location" { + description = "Azure region for all resources" + type = string + default = "eastus2" +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group" + type = string + default = "rg-agentic-demo" +} + +variable "cluster_name" { + description = "Name of the AKS cluster" + type = string + default = "aks-eastus2" +} + +variable "kubernetes_version" { + description = "Kubernetes version for the AKS cluster" + type = string + default = "1.30" +} + +variable "node_vm_size" { + description = "VM size for AKS default node pool" + type = string + default = "Standard_D4s_v3" +} + +variable "node_count" { + description = "Number of nodes in the AKS default node pool" + type = number + default = 3 +} + +variable "acr_name" { + description = "Azure Container Registry name. If empty, auto-generated as acragentic" + type = string + default = "" +} + +variable "github_org" { + description = "GitHub org for OIDC federation" + type = string +} + +variable "github_repo" { + description = "GitHub repo name for OIDC federation" + type = string +} + +variable "argocd_chart_version" { + description = "Helm chart version for ArgoCD" + type = string + default = "7.3.4" +} + +variable "aks_mcp_chart_version" { + description = "Helm chart version for the AKS MCP Server" + type = string + default = "0.1.0" +} + +variable "tags" { + description = "Tags to apply to all resources" + type = map(string) + default = { + project = "agentic-platform-engineering" + managed_by = "terraform" + } +} diff --git a/infra/versions.tf b/infra/versions.tf new file mode 100644 index 0000000..9a27724 --- /dev/null +++ b/infra/versions.tf @@ -0,0 +1,26 @@ +terraform { + required_version = ">= 1.7" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.110" + } + azuread = { + source = "hashicorp/azuread" + version = "~> 2.53" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.31" + } + helm = { + source = "hashicorp/helm" + version = "~> 2.14" + } + random = { + source = "hashicorp/random" + version = "~> 3.6" + } + } +}