diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 1ac2be6..27f67a8 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,14 +6,14 @@ }, "metadata": { "description": "A curated marketplace of Claude Code plugins for AWS development — skills, sub-agents, MCP servers, and hooks for building on AWS.", - "version": "0.1.0" + "version": "0.2.0" }, "plugins": [ { "name": "aws-dev-toolkit", "source": "./plugins/aws-dev-toolkit", "description": "AWS development toolkit — 34 skills, 11 agents, 3 MCP servers, and hooks for building, migrating, and reviewing well-architected applications on AWS.", - "version": "0.1.0", + "version": "0.2.0", "author": { "name": "aws-samples" }, diff --git a/plugins/aws-dev-toolkit/.claude-plugin/plugin.json b/plugins/aws-dev-toolkit/.claude-plugin/plugin.json index 0836e44..207d56d 100644 --- a/plugins/aws-dev-toolkit/.claude-plugin/plugin.json +++ b/plugins/aws-dev-toolkit/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "aws-dev-toolkit", - "version": "0.1.0", + "version": "0.2.0", "description": "AWS development toolkit — 34 skills, 11 agents, 3 MCP servers, and hooks for building, migrating, and reviewing well-architected applications on AWS.", "author": { "name": "aws-samples" diff --git a/plugins/aws-dev-toolkit/skills/ecs/SKILL.md b/plugins/aws-dev-toolkit/skills/ecs/SKILL.md index 30c6eda..ff0e5dd 100644 --- a/plugins/aws-dev-toolkit/skills/ecs/SKILL.md +++ b/plugins/aws-dev-toolkit/skills/ecs/SKILL.md @@ -1,6 +1,6 @@ --- name: ecs -description: Design, deploy, and troubleshoot Amazon ECS workloads. Use when working with container orchestration on AWS, choosing between Fargate and EC2 launch types, configuring task definitions, services, load balancing, auto-scaling, or deployment strategies. +description: This skill should be used when the user asks to "deploy containers on ECS", "set up an ECS service", "choose between Fargate and EC2", "configure ECS task definitions", "set up ECS auto-scaling", "use ECS Express Mode", "migrate from App Runner", or mentions ECS load balancing, deployment strategies, or container orchestration on AWS. --- You are an AWS ECS specialist. When advising on ECS workloads: @@ -51,6 +51,18 @@ You are an AWS ECS specialist. When advising on ECS workloads: - Set `minCapacity` >= 2 for production services (multi-AZ resilience). - Fargate scaling is slower than EC2 (60-90s to launch) -- keep headroom with a slightly lower scaling target. +## Express Mode + +**ECS Express Mode** deploys a production-ready, load-balanced Fargate service from a single API call with just three parameters: container image, task execution role, and infrastructure role. No additional charge. AWS recommends Express Mode as the App Runner replacement (closing to new customers April 30, 2026). + +**Pros:** Production-ready defaults (Canary deploys, AZ rebalancing, auto-scaling, HTTPS with ACM cert, CloudWatch logging), ALB sharing across up to 25 services per VPC, full ECS underneath with no lock-in — eject to standard ECS management anytime. Supports Console, CLI, SDKs, CloudFormation, Terraform, and MCP Server. + +**Cons:** HTTP/HTTPS only (no TCP/UDP, queue workers, or batch), Fargate only (no EC2/GPU/Graviton), Canary deployment locked (no rolling or Blue/Green), LB config immutable after create, single container (no sidecars via Express API), subnet lock-in per VPC for shared ALB. + +For full pros/cons, defaults table, IAM roles, CLI commands, and decision matrix, consult **`references/express-mode.md`**. + +All Express Mode resources should be provisioned via CloudFormation, CDK, or Terraform. Use the `aws-docs` MCP tools or consult **`references/express-mode.md`** for API parameters and IaC examples. + ## Deployment Strategies - **Rolling update** (default): Good for most workloads. Set `minimumHealthyPercent: 100` and `maximumPercent: 200` to deploy with zero downtime. @@ -58,41 +70,41 @@ You are an AWS ECS specialist. When advising on ECS workloads: - **Canary**: Use CodeDeploy with `CodeDeployDefault.ECSCanary10Percent5Minutes` for high-risk changes. - Circuit breaker: Always enable `deploymentCircuitBreaker` with `rollback: true` to auto-rollback failed deployments. -## Copilot CLI +## Provisioning -AWS Copilot is the fastest path from code to running ECS service. Use it for greenfield projects: +All ECS resources (clusters, task definitions, services, load balancers, auto-scaling) should be provisioned via IaC — CloudFormation, CDK, or Terraform. Never create or mutate infrastructure with imperative CLI commands. Use the `cdk-docs` or `cloudformation-docs` MCP tools for current resource properties. -``` -copilot init # Initialize app, service, and environment -copilot svc deploy # Deploy service -copilot svc logs --follow # Stream logs -copilot svc status # Health and task status -copilot pipeline init # CI/CD pipeline with CodePipeline -``` +## Observability & Debugging CLI -## Common CLI Commands +CLI usage should be limited to read-only operations, observability, and interactive debugging: ```bash -# Create a cluster -aws ecs create-cluster --cluster-name my-cluster --capacity-providers FARGATE FARGATE_SPOT +# Describe cluster status +aws ecs describe-clusters --clusters my-cluster --include STATISTICS ATTACHMENTS -# Register a task definition -aws ecs register-task-definition --cli-input-json file://task-def.json +# List services in a cluster +aws ecs list-services --cluster my-cluster -# Create/update a service -aws ecs create-service --cluster my-cluster --service-name my-svc --task-definition my-task:1 --desired-count 2 --launch-type FARGATE --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=DISABLED}" +# Describe a service (deployment status, events, task counts) +aws ecs describe-services --cluster my-cluster --services my-svc -# Force new deployment (pulls latest image for :latest tag) -aws ecs update-service --cluster my-cluster --service my-svc --force-new-deployment +# List running tasks +aws ecs list-tasks --cluster my-cluster --service-name my-svc --desired-status RUNNING -# Run a one-off task -aws ecs run-task --cluster my-cluster --task-definition my-task --launch-type FARGATE --network-configuration "..." +# Describe a task (container status, stopped reason, network) +aws ecs describe-tasks --cluster my-cluster --tasks # Exec into a running container (requires ECS Exec enabled) aws ecs execute-command --cluster my-cluster --task --container my-container --interactive --command "/bin/sh" # Tail logs aws logs tail /ecs/my-task --follow + +# Describe task definition (inspect current config) +aws ecs describe-task-definition --task-definition my-task + +# Check service events for deployment issues +aws ecs describe-services --cluster my-cluster --services my-svc --query "services[].events[:5]" ``` ## Output Format @@ -127,3 +139,10 @@ aws logs tail /ecs/my-task --follow - **No deployment circuit breaker**: Without it, a bad deployment will keep cycling failing tasks indefinitely, consuming capacity and generating noise. - **Putting secrets in environment variables**: Use the `secrets` field with Secrets Manager or SSM Parameter Store references. Environment variables are visible in the console and API. - **Running as root**: Set `user` in the task definition to a non-root user. Combine with `readonlyRootFilesystem` for defense in depth. + +## Additional Resources + +### Reference Files + +For detailed documentation and decision guidance, consult: +- **`references/express-mode.md`** — Full Express Mode pros/cons, defaults table, IAM roles, CLI commands, resource sharing details, Express Mode vs standard ECS decision matrix, and official AWS documentation links diff --git a/plugins/aws-dev-toolkit/skills/ecs/references/express-mode.md b/plugins/aws-dev-toolkit/skills/ecs/references/express-mode.md new file mode 100644 index 0000000..6ba3356 --- /dev/null +++ b/plugins/aws-dev-toolkit/skills/ecs/references/express-mode.md @@ -0,0 +1,131 @@ +# Amazon ECS Express Mode — Detailed Reference + +ECS Express Mode provisions a complete production application stack from a single API call requiring only three parameters: a container image, task execution role, and infrastructure role. No additional charge beyond the underlying AWS resources. AWS recommends Express Mode as the migration path from App Runner (closing to new customers April 30, 2026). + +## Pros + +- **3-parameter deployment** — Container image, execution role, infrastructure role. Everything else gets sensible defaults. +- **Production-ready from day one** — Canary deployments, AZ rebalancing, auto-scaling (CPU/memory/request count), health checks, HTTPS with auto-provisioned ACM certificate, CloudWatch logging — all configured automatically. +- **Full ECS underneath** — All underlying resources (service, task definition, ALB, security groups, scaling policies) are created in your account and remain directly accessible. Customize any resource after creation without leaving Express Mode. +- **ALB sharing across services** — Up to 25 Express Mode services in the same VPC share an ALB via host-header routing, significantly reducing per-service cost. Express Mode auto-provisions and deprovisions ALBs as services are added/removed. +- **Cluster sharing** — Express Mode services can coexist in the same cluster with standard ECS services. +- **IaC support** — Available via Console, CLI, SDKs, CloudFormation, Terraform, and the AWS Labs MCP Server for ECS. +- **No vendor lock-in risk** — Unlike App Runner, Express Mode is just ECS. "Eject" to standard ECS management at any time by managing the underlying resources directly. + +## Cons / Limitations + +- **HTTP/HTTPS workloads only** — Express Mode provisions an ALB and expects HTTP traffic. Not suitable for TCP/UDP services (use NLB + standard ECS), queue workers, batch jobs, gRPC without HTTP/2, or non-web workloads. +- **Fargate only** — No EC2 launch type. Rules out GPU instances, Graviton selection, host-level access, Docker-in-Docker, EBS volume mounts, or custom AMIs. +- **Canary deployment locked** — Deployment strategy is set to Canary and cannot be changed after creation. No rolling update or Blue/Green (CodeDeploy) option. +- **Load balancer config immutable** — Load balancer configurations cannot be updated on Express Mode services. If NLB, custom listener rules, or multi-protocol support is needed, use standard ECS. +- **Service name and cluster immutable after create** — Cannot be changed on updates. +- **Subnet lock-in per VPC** — The first Express Mode service in a VPC defines the subnets for that VPC's shared ALB (internet-facing or internal). Subsequent services must match those AZs. +- **Single container only** — No sidecar support in the Express Mode API. Envoy proxies, log routers, or datadog agents as sidecars require editing the task definition directly after creation. +- **Default VPC requirements** — If no subnets are specified, requires a default VPC with at least two public subnets in two AZs with at least 8 free IPs per CIDR block per subnet. +- **x86_64 Linux only by default** — Defaults to X86_64 architecture on Linux. ARM/Graviton requires post-creation task definition changes. +- **Container name sensitivity** — The default container is named "Main". Renaming it can break Express Mode's ability to manage subsequent updates via the Express Mode Console or APIs. + +## Defaults Table + +All underlying resources remain accessible for direct management. + +| Resource | Default | Customizable via Express Mode? | +|----------|---------|-------------------------------| +| Launch type | Fargate capacity provider | No | +| Task CPU/Memory | 1 vCPU / 2 GB | Yes (`--cpu`, `--memory`) | +| Deployment strategy | Canary | No (locked) | +| AZ rebalancing | Enabled | No (editable on service directly) | +| Auto-scaling metric | CPU at 60% target | Yes (`--scaling-target`) | +| Min/Max tasks | 1 / 20 | Yes (`--scaling-target`) | +| Health check grace | 300s | No (editable on service directly) | +| Container port | 80 | Yes (`--primary-container`) | +| Health check path | `/` | Yes (`--health-check-path`) | +| Logging | CloudWatch Logs, non-blocking, 25MB buffer | Yes (log group, prefix) | +| Subnets | Default VPC public subnets | Yes (`--network-configuration`) | +| ALB scheme | Internet-facing (public) or Internal (private) | Based on subnet type | + +## Resources Created by Express Mode + +Express Mode automatically provisions and configures: + +- ECS default cluster (if not already existing) with Fargate capacity providers +- Task definition with container, logging, and networking configurations +- Service with canary deployment and auto-scaling +- Application Load Balancer with HTTPS listener, listener rules, and target groups +- Security groups with minimal required ingress (service SG + LB SG) +- Service Linked Roles for auto-scaling and load balancing +- Application Auto Scaling scalable target and target tracking scaling policy +- CloudWatch Log group specific to the service +- Metric alarm for detecting faulty deployments +- ACM certificate for HTTPS + +## Resource Sharing and Cost Optimization + +- **Load balancer sharing** — Up to 25 Express Mode services in the same VPC share an ALB. Express Mode auto-provisions additional ALBs as needed and deprovisions unused ones as services are removed. +- **Cluster sharing** — Express Mode services can be grouped in ECS Clusters alongside standard (non-Express) ECS services. + +## IAM Roles + +| Role | Required? | Purpose | +|------|-----------|---------| +| `ecsTaskExecutionRole` | Yes | Pull images from ECR, send logs to CloudWatch, retrieve secrets | +| `ecsInfrastructureRoleForExpressServices` | Yes | Manage AWS resources (ALB, SGs, scaling) on your behalf | +| Task Role (`--task-role-arn`) | Optional | Allow application code to call other AWS services (S3, DynamoDB, etc.) | + +Auto-created service-linked roles: `ecsServiceRoleForECS`, `AWSServiceRoleForElasticLoadBalancing`, `AWSServiceRoleForApplicationAutoScaling_ECSService`. + +## CLI Commands + +```bash +# Create an Express Mode service (minimal — 3 required params) +aws ecs create-express-gateway-service \ + --execution-role-arn arn:aws:iam::role/ecsTaskExecutionRole \ + --infrastructure-role-arn arn:aws:iam::role/ecsInfrastructureRoleForExpressServices \ + --primary-container 'image=nginx' + +# Create with custom scaling, port, and service name +aws ecs create-express-gateway-service \ + --execution-role-arn arn:aws:iam::role/ecsTaskExecutionRole \ + --infrastructure-role-arn arn:aws:iam::role/ecsInfrastructureRoleForExpressServices \ + --primary-container 'image=my-app:v1,port=8080' \ + --scaling-target '{"minTaskCount": 2}' \ + --service-name my-api + +# Monitor an Express Mode deployment (interactive terminal UI) +aws ecs monitor-express-gateway-service \ + --service-arn arn:aws:ecs:us-east-1:123456789012:service/my-cluster/my-svc + +# Monitor with custom timeout (default 30 min) +aws ecs monitor-express-gateway-service \ + --service-arn my-express-gateway-service \ + --timeout 60 + +# Delete an Express Mode service and its managed resources +aws ecs delete-express-gateway-service --service +``` + +## When to Use Express Mode vs Standard ECS + +| Scenario | Express Mode | Standard ECS | +|----------|-------------|-------------| +| Stateless HTTP/HTTPS web apps and APIs | Yes | Yes | +| Rapid prototyping | Yes (fastest path) | Possible but more config | +| App Runner migration | Yes (recommended) | Possible | +| TCP/UDP services | No | Yes (NLB) | +| Queue workers / batch jobs | No | Yes | +| GPU workloads | No | Yes (EC2 launch type) | +| Graviton / ARM | No (x86 default, manual change) | Yes | +| Custom deployment strategy | No (Canary locked) | Yes (Rolling, Blue/Green, Canary) | +| Sidecar containers | No (manual post-creation) | Yes | +| NLB or custom LB config | No | Yes | +| Service-to-service (non-HTTP) | No | Yes (Service Connect, NLB) | + +## Official Documentation + +- [Amazon ECS Express Mode Overview](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/express-service-overview.html) +- [Resources created by Express Mode](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/express-service-work.html) +- [Creating an Express Mode service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/express-service-create-full.html) +- [App Runner to Express Mode migration](https://docs.aws.amazon.com/apprunner/latest/dg/apprunner-availability-change.html) +- [CLI: create-express-gateway-service](https://docs.aws.amazon.com/cli/latest/reference/ecs/create-express-gateway-service.html) +- [CLI: monitor-express-gateway-service](https://docs.aws.amazon.com/cli/latest/reference/ecs/monitor-express-gateway-service.html) +- [CLI: delete-express-gateway-service](https://docs.aws.amazon.com/cli/latest/reference/ecs/delete-express-gateway-service.html)