From 3e5e866b3970eea2f5f4560f01336ccfb6e7d9fe Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Fri, 23 Jan 2026 14:27:23 +0100 Subject: [PATCH 1/6] OCPEDGE-2286: Add instance availability check for AWS hypervisor deployment Add pre-deployment capacity validation before CloudFormation stack creation to prevent failures due to EC2 capacity constraints. Changes: - Add create_capacity_reservation() function that auto-detects available AZs and creates a targeted capacity reservation - Add cancel_capacity_reservation() for cleanup on destroy or failure - Update CloudFormation template with CapacityReservationId and AvailabilityZone parameters with conditional usage - Add ENABLE_CAPACITY_RESERVATION config variable (default: true) - Add error handling flags to destroy.sh for consistency The capacity check runs before stack creation, trying each AZ in the region until one with available capacity is found. On failure, users get actionable error messages suggesting alternative regions or instance types. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/instance.env.template | 5 + deploy/aws-hypervisor/scripts/common.sh | 111 ++++++++++++++++++++ deploy/aws-hypervisor/scripts/create.sh | 65 +++++++++++- deploy/aws-hypervisor/scripts/destroy.sh | 16 +++ 4 files changed, 194 insertions(+), 3 deletions(-) diff --git a/deploy/aws-hypervisor/instance.env.template b/deploy/aws-hypervisor/instance.env.template index da7746d..1e4e5d6 100644 --- a/deploy/aws-hypervisor/instance.env.template +++ b/deploy/aws-hypervisor/instance.env.template @@ -7,6 +7,11 @@ export REGION=us-west-2 export EC2_INSTANCE_TYPE="c5n.metal" export AWS_DEFAULT_REGION=us-west-2 +# EC2 Capacity Reservation Settings +# Enable capacity reservation check before instance creation (recommended) +# Set to 'false' to disable the pre-flight capacity check +export ENABLE_CAPACITY_RESERVATION=true + export SSH_PUBLIC_KEY=/home/${USER}/.ssh/id_ed25519.pub #It is suggested that this key has no passphrase for ease of use # Leave empty for auto-detect ami for images, defaults to RHEL 9.6 GA diff --git a/deploy/aws-hypervisor/scripts/common.sh b/deploy/aws-hypervisor/scripts/common.sh index bbdb212..28d447b 100755 --- a/deploy/aws-hypervisor/scripts/common.sh +++ b/deploy/aws-hypervisor/scripts/common.sh @@ -10,6 +10,9 @@ export RHEL_HOST_ARCHITECTURE="${RHEL_HOST_ARCHITECTURE:-x86_64}" export EC2_INSTANCE_TYPE="${EC2_INSTANCE_TYPE:-c5n.metal}" export RHEL_VERSION="${RHEL_VERSION:-9.6}" +# Capacity reservation defaults +export ENABLE_CAPACITY_RESERVATION="${ENABLE_CAPACITY_RESERVATION:-true}" + readonly COLOR_RED='\033[0;31m' readonly COLOR_YELLOW='\033[0;33m' readonly COLOR_BLUE='\033[0;34m' @@ -74,3 +77,111 @@ function set_aws_machine_hostname() { msg_info "setting machine hostname to aws-${STACK_NAME}" ssh "$instance_ip" "sudo hostnamectl set-hostname aws-$STACK_NAME" } + +# Creates a capacity reservation and returns the reservation ID and availability zone. +# Auto-detects the first available AZ in the configured region. +# Exits with error if capacity is unavailable. +# Usage: result=$(create_capacity_reservation "instance_type" "region") +# reservation_id=$(echo "$result" | awk '{print $1}') +# availability_zone=$(echo "$result" | awk '{print $2}') +function create_capacity_reservation() { + local instance_type="$1" + local region="$2" + + msg_info "Checking EC2 capacity availability for ${instance_type} in ${region}..." + + # Auto-detect available AZs in region + local az_list + if ! az_list=$(aws ec2 describe-availability-zones \ + --region "${region}" \ + --filters "Name=state,Values=available" \ + --query 'AvailabilityZones[*].ZoneName' \ + --output text \ + --no-cli-pager); then + msg_err "Failed to query availability zones in region ${region}" + return 1 + fi + + if [[ -z "${az_list}" ]]; then + msg_err "No available availability zones found in region ${region}" + return 1 + fi + + # Try each AZ until we find one with capacity + local reservation_output + local create_status + local reservation_id + local availability_zone + + for az in ${az_list}; do + msg_info "Trying availability zone: ${az}..." + + set +e + reservation_output=$(aws ec2 create-capacity-reservation \ + --region "${region}" \ + --instance-type "${instance_type}" \ + --instance-platform "Linux/UNIX" \ + --instance-count 1 \ + --availability-zone "${az}" \ + --instance-match-criteria "targeted" \ + --output json \ + --no-cli-pager 2>&1) + create_status=$? + set -e + + if [[ ${create_status} -eq 0 ]]; then + # Extract reservation ID + reservation_id=$(echo "${reservation_output}" | jq -r '.CapacityReservation.CapacityReservationId') + + if [[ -n "${reservation_id}" && "${reservation_id}" != "null" ]]; then + availability_zone="${az}" + msg_info "Capacity reservation created: ${reservation_id} in ${availability_zone}" + echo "${reservation_id} ${availability_zone}" + return 0 + fi + fi + + # Check if it's a capacity error (expected) vs other error (unexpected) + if echo "${reservation_output}" | grep -qi "InsufficientInstanceCapacity\|Unsupported"; then + msg_info "No capacity in ${az}, trying next..." + else + msg_warning "Unexpected error in ${az}: ${reservation_output}" + fi + done + + # No capacity found in any AZ + msg_err "Failed to reserve capacity for ${instance_type} in any availability zone in ${region}" + msg_err "" + msg_err "Possible solutions:" + msg_err " 1. Try a different region (set REGION in instance.env)" + msg_err " 2. Try a different instance type (set EC2_INSTANCE_TYPE in instance.env)" + msg_err " 3. Wait and retry (capacity constraints are often temporary)" + return 1 +} + +# Cancels a capacity reservation by ID. Handles already-cancelled reservations gracefully. +# Usage: cancel_capacity_reservation "reservation_id" "region" +function cancel_capacity_reservation() { + local reservation_id="$1" + local region="$2" + + if [[ -z "${reservation_id}" || "${reservation_id}" == "null" ]]; then + return 0 # Nothing to cancel + fi + + msg_info "Canceling capacity reservation ${reservation_id}..." + + set +e + aws ec2 cancel-capacity-reservation \ + --region "${region}" \ + --capacity-reservation-id "${reservation_id}" \ + --no-cli-pager >/dev/null 2>&1 + local cancel_status=$? + set -e + + if [[ ${cancel_status} -eq 0 ]]; then + msg_info "Capacity reservation canceled successfully" + else + msg_warning "Failed to cancel capacity reservation (may already be canceled)" + fi +} diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index 99d5737..b1f1f57 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -8,8 +8,23 @@ set -o nounset set -o errexit set -o pipefail -#Save stacks events -trap 'save_stack_events' EXIT TERM INT +#Save stacks events and cleanup capacity reservation on failure +trap 'save_stack_events; cleanup_capacity_on_error' EXIT TERM INT + +# Cleanup function for capacity reservation on error +function cleanup_capacity_on_error() { + set +o errexit + local reservation_file="${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" + # Only cleanup if stack creation didn't complete successfully + if [[ -f "${reservation_file}" && ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" ]]; then + local reservation_id + reservation_id=$(cat "${reservation_file}") + cancel_capacity_reservation "${reservation_id}" "${REGION}" + rm -f "${reservation_file}" + rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + fi + set -o errexit +} mkdir -p "${SCRIPT_DIR}/../${SHARED_DIR}" @@ -41,6 +56,28 @@ echo "ec2-user" > "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" echo -e "AMI ID: $RHEL_HOST_AMI" echo -e "Machine Type: $EC2_INSTANCE_TYPE" +# Create capacity reservation to validate and guarantee instance availability +CAPACITY_RESERVATION_ID="" +AVAILABILITY_ZONE="" + +if [[ "${ENABLE_CAPACITY_RESERVATION}" == "true" ]]; then + if reservation_result=$(create_capacity_reservation "${EC2_INSTANCE_TYPE}" "${REGION}"); then + CAPACITY_RESERVATION_ID=$(echo "${reservation_result}" | awk '{print $1}') + AVAILABILITY_ZONE=$(echo "${reservation_result}" | awk '{print $2}') + + # Store for cleanup + echo "${CAPACITY_RESERVATION_ID}" > "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" + echo "${AVAILABILITY_ZONE}" > "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + + msg_info "Capacity guaranteed in ${AVAILABILITY_ZONE}" + else + msg_err "Failed to reserve capacity. Aborting deployment." + exit 1 + fi +else + msg_info "Capacity reservation disabled, skipping pre-flight check" +fi + ec2Type="VirtualMachine" if [[ "$EC2_INSTANCE_TYPE" =~ c[0-9]+[gn].metal ]]; then ec2Type="MetalMachine" @@ -53,6 +90,8 @@ Description: Template for RHEL machine Launch Conditions: # If IsMetal parameter == metal, then do not add a secondary volume AddSecondaryVolume: !Not [!Equals [!Ref EC2Type, 'MetalMachine']] + UseCapacityReservation: !Not [!Equals [!Ref CapacityReservationId, '']] + UseSpecificAZ: !Not [!Equals [!Ref AvailabilityZone, '']] Mappings: VolumeSize: MetalMachine: @@ -91,6 +130,14 @@ Parameters: PublicKeyString: Type: String Description: The public key used to connect to the EC2 instance + CapacityReservationId: + Type: String + Description: EC2 Capacity Reservation ID (optional) + Default: "" + AvailabilityZone: + Type: String + Description: Specific AZ for instance placement (optional) + Default: "" Metadata: AWS::CloudFormation::Interface: @@ -144,6 +191,7 @@ Resources: VpcId: !Ref RHELVPC CidrBlock: !Ref PublicSubnetCidr MapPublicIpOnLaunch: true + AvailabilityZone: !If [UseSpecificAZ, !Ref AvailabilityZone, !Ref 'AWS::NoValue'] Tags: - Key: Name Value: RHELPublicSubnet @@ -277,6 +325,11 @@ Resources: ImageId: !Ref AmiId IamInstanceProfile: !Ref RHELInstanceProfile InstanceType: !Ref HostInstanceType + CapacityReservationSpecification: !If + - UseCapacityReservation + - CapacityReservationTarget: + CapacityReservationId: !Ref CapacityReservationId + - !Ref AWS::NoValue NetworkInterfaces: - AssociatePublicIpAddress: "False" DeviceIndex: "0" @@ -362,7 +415,9 @@ aws --region "$REGION" cloudformation create-stack --stack-name "${STACK_NAME}" "ParameterKey=Machinename,ParameterValue=${STACK_NAME}" \ "ParameterKey=AmiId,ParameterValue=${RHEL_HOST_AMI}" \ "ParameterKey=EC2Type,ParameterValue=${ec2Type}" \ - "ParameterKey=PublicKeyString,ParameterValue=$(cat "${SSH_PUBLIC_KEY}")" + "ParameterKey=PublicKeyString,ParameterValue=$(cat "${SSH_PUBLIC_KEY}")" \ + "ParameterKey=CapacityReservationId,ParameterValue=${CAPACITY_RESERVATION_ID}" \ + "ParameterKey=AvailabilityZone,ParameterValue=${AVAILABILITY_ZONE}" echo "Created stack" @@ -401,3 +456,7 @@ copy_configure_script set_aws_machine_hostname scp "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/init_output.txt" "${SCRIPT_DIR}/../${SHARED_DIR}/init_output.txt" + +# Mark stack creation as successful (prevents capacity cleanup on exit) +touch "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" +msg_info "Instance creation completed successfully" diff --git a/deploy/aws-hypervisor/scripts/destroy.sh b/deploy/aws-hypervisor/scripts/destroy.sh index 9bd8d3d..fba37c4 100755 --- a/deploy/aws-hypervisor/scripts/destroy.sh +++ b/deploy/aws-hypervisor/scripts/destroy.sh @@ -4,6 +4,10 @@ SCRIPT_DIR=$(dirname "$0") # shellcheck source=/dev/null source "${SCRIPT_DIR}/common.sh" +set -o nounset +set -o errexit +set -o pipefail + # Check if instance data directory exists and has the required files instance_data_dir="${SCRIPT_DIR}/../${SHARED_DIR}" public_address_file="${instance_data_dir}/public_address" @@ -34,6 +38,18 @@ else ssh "$ssh_host_ip" "sudo subscription-manager unregister" || echo "Warning: Failed to unregister subscription manager (instance may be unreachable or not registered)" fi +# Cancel capacity reservation if it exists +reservation_file="${instance_data_dir}/capacity-reservation-id" +if [[ -f "${reservation_file}" ]]; then + reservation_id=$(cat "${reservation_file}") + if [[ -n "${reservation_id}" && "${reservation_id}" != "null" ]]; then + cancel_capacity_reservation "${reservation_id}" "${REGION}" + fi + # Clean up capacity reservation files + rm -f "${reservation_file}" + rm -f "${instance_data_dir}/availability-zone" +fi + # Delete the CloudFormation stack echo "Deleting CloudFormation stack '${STACK_NAME}'..." aws --region "$REGION" cloudformation delete-stack --stack-name "${STACK_NAME}" From 4571b7901f6d2ea47f8a0f5b6138df1bf4bdd2a6 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 27 Jan 2026 15:39:49 +0100 Subject: [PATCH 2/6] OCPEDGE-2286: Fix capacity reservation bugs and extract CloudFormation template Bug fixes: - Fix platform mismatch: RHEL requires "Red Hat Enterprise Linux" platform, not "Linux/UNIX" for capacity reservations - Fix CloudFormation error: CapacityReservationSpecification is a LaunchTemplate property, not EC2::Instance property. Added conditional RHELLaunchTemplate resource that's created when capacity reservation is used Refactoring: - Extract CloudFormation template from heredoc in create.sh to separate file at templates/rhel-instance.yaml for better maintainability and validation - Clean up YAML formatting and comments Tested in us-east-1, eu-west-1, and eu-north-1 regions successfully. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/scripts/common.sh | 6 +- deploy/aws-hypervisor/scripts/create.sh | 321 +---------------- .../templates/rhel-instance.yaml | 329 ++++++++++++++++++ 3 files changed, 335 insertions(+), 321 deletions(-) create mode 100644 deploy/aws-hypervisor/templates/rhel-instance.yaml diff --git a/deploy/aws-hypervisor/scripts/common.sh b/deploy/aws-hypervisor/scripts/common.sh index 28d447b..553541d 100755 --- a/deploy/aws-hypervisor/scripts/common.sh +++ b/deploy/aws-hypervisor/scripts/common.sh @@ -31,6 +31,7 @@ function msg_info() { } function aws_ec2_describe_images() { + # shellcheck disable=SC2153 # REGION is an env var from instance.env, not a misspelling of local 'region' aws ec2 describe-images \ --query 'reverse(sort_by(Images, &CreationDate))[].[Name, ImageId, CreationDate]' \ --filters "Name=name,Values=RHEL-${RHEL_VERSION}.*GA*${RHEL_HOST_ARCHITECTURE}*" \ @@ -87,8 +88,9 @@ function set_aws_machine_hostname() { function create_capacity_reservation() { local instance_type="$1" local region="$2" + local instance_platform="${3:-Red Hat Enterprise Linux}" - msg_info "Checking EC2 capacity availability for ${instance_type} in ${region}..." + msg_info "Checking EC2 capacity availability for ${instance_type} (${instance_platform}) in ${region}..." # Auto-detect available AZs in region local az_list @@ -120,7 +122,7 @@ function create_capacity_reservation() { reservation_output=$(aws ec2 create-capacity-reservation \ --region "${region}" \ --instance-type "${instance_type}" \ - --instance-platform "Linux/UNIX" \ + --instance-platform "${instance_platform}" \ --instance-count 1 \ --availability-zone "${az}" \ --instance-match-criteria "targeted" \ diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index b1f1f57..49f4eb0 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -83,325 +83,8 @@ if [[ "$EC2_INSTANCE_TYPE" =~ c[0-9]+[gn].metal ]]; then ec2Type="MetalMachine" fi -# shellcheck disable=SC2154 -cat > "${cf_tpl_file}" << EOF -AWSTemplateFormatVersion: 2010-09-09 -Description: Template for RHEL machine Launch -Conditions: -# If IsMetal parameter == metal, then do not add a secondary volume - AddSecondaryVolume: !Not [!Equals [!Ref EC2Type, 'MetalMachine']] - UseCapacityReservation: !Not [!Equals [!Ref CapacityReservationId, '']] - UseSpecificAZ: !Not [!Equals [!Ref AvailabilityZone, '']] -Mappings: - VolumeSize: - MetalMachine: - PrimaryVolumeSize: "300" - SecondaryVolumeSize: "0" - VirtualMachine: - PrimaryVolumeSize: "200" - SecondaryVolumeSize: "100" -Parameters: - EC2Type: - Default: 'VirtualMachine' - Type: String - VpcCidr: - AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/(1[6-9]|2[0-4]))$ - ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-24. - Default: 10.192.0.0/16 - Description: CIDR block for VPC. - Type: String - PublicSubnetCidr: - Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone - Type: String - Default: 10.192.10.0/24 - AmiId: - Description: Current RHEL AMI to use. - Type: AWS::EC2::Image::Id - Machinename: - MaxLength: 27 - MinLength: 1 - ConstraintDescription: Machinename - Description: Machinename - Type: String - Default: rhel-testbed-ec2-instance - HostInstanceType: - Default: t2.medium - Type: String - PublicKeyString: - Type: String - Description: The public key used to connect to the EC2 instance - CapacityReservationId: - Type: String - Description: EC2 Capacity Reservation ID (optional) - Default: "" - AvailabilityZone: - Type: String - Description: Specific AZ for instance placement (optional) - Default: "" - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: "Host Information" - Parameters: - - HostInstanceType - - Label: - default: "Network Configuration" - Parameters: - - PublicSubnet - ParameterLabels: - PublicSubnet: - default: "Worker Subnet" - HostInstanceType: - default: "Worker Instance Type" - -Resources: -## VPC Creation - - RHELVPC: - Type: AWS::EC2::VPC - Properties: - CidrBlock: !Ref VpcCidr - Tags: - - Key: Name - Value: RHELVPC - -## Setup internet access - - RHELInternetGateway: - Type: AWS::EC2::InternetGateway - DeletionPolicy: Delete - Properties: - Tags: - - Key: Name - Value: RHELInternetGateway - - RHELGatewayAttachment: - Type: AWS::EC2::VPCGatewayAttachment - DeletionPolicy: Delete - Properties: - VpcId: !Ref RHELVPC - InternetGatewayId: !Ref RHELInternetGateway - - RHELPublicSubnet: - Type: AWS::EC2::Subnet - DeletionPolicy: Delete - Properties: - VpcId: !Ref RHELVPC - CidrBlock: !Ref PublicSubnetCidr - MapPublicIpOnLaunch: true - AvailabilityZone: !If [UseSpecificAZ, !Ref AvailabilityZone, !Ref 'AWS::NoValue'] - Tags: - - Key: Name - Value: RHELPublicSubnet - - RHELNatGatewayEIP: - Type: AWS::EC2::EIP - DeletionPolicy: Delete - DependsOn: RHELGatewayAttachment - Properties: - Domain: vpc - - RHELNatGateway: - Type: AWS::EC2::NatGateway - DeletionPolicy: Delete - DependsOn: RHELNatGatewayEIP - Properties: - AllocationId: !GetAtt RHELNatGatewayEIP.AllocationId - SubnetId: !Ref RHELPublicSubnet - - RHELRouteTable: - Type: AWS::EC2::RouteTable - DeletionPolicy: Delete - Properties: - VpcId: !Ref RHELVPC - Tags: - - Key: Name - Value: RHELRouteTable - - RHELPublicRoute: - Type: AWS::EC2::Route - DependsOn: RHELGatewayAttachment - Properties: - RouteTableId: !Ref RHELRouteTable - DestinationCidrBlock: "0.0.0.0/0" - GatewayId: !Ref RHELInternetGateway - - RHELPublicSubnetRouteTableAssociation: - Type: AWS::EC2::SubnetRouteTableAssociation - DependsOn: RHELRouteTable - Properties: - RouteTableId: !Ref RHELRouteTable - SubnetId: !Ref RHELPublicSubnet - -# Setup EC2 Roles and security - - RHELIamRole: - Type: AWS::IAM::Role - DeletionPolicy: Delete - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: "Allow" - Principal: - Service: - - "ec2.amazonaws.com" - Action: - - "sts:AssumeRole" - Path: "/" - - RHELInstanceProfile: - Type: "AWS::IAM::InstanceProfile" - Properties: - Path: "/" - Roles: - - Ref: "RHELIamRole" - - RHELSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: RHEL Host Security Group - SecurityGroupIngress: - - IpProtocol: icmp - FromPort: -1 - ToPort: -1 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 22 - ToPort: 22 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 5678 - ToPort: 5678 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 80 - ToPort: 80 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 443 - ToPort: 443 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 8080 - ToPort: 8080 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 9090 - ToPort: 9090 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 5353 - ToPort: 5353 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 6443 - ToPort: 6443 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 8213 - ToPort: 8213 - CidrIp: 0.0.0.0/0 - - IpProtocol: tcp - FromPort: 30000 - ToPort: 32767 - CidrIp: 0.0.0.0/0 - - IpProtocol: udp - FromPort: 30000 - ToPort: 32767 - CidrIp: 0.0.0.0/0 - VpcId: !Ref RHELVPC - - RHELInstance: - Type: AWS::EC2::Instance - DeletionPolicy: Delete - DependsOn: - - RHELPublicSubnet - - RHELGatewayAttachment - - RHELInstanceProfile - Properties: - ImageId: !Ref AmiId - IamInstanceProfile: !Ref RHELInstanceProfile - InstanceType: !Ref HostInstanceType - CapacityReservationSpecification: !If - - UseCapacityReservation - - CapacityReservationTarget: - CapacityReservationId: !Ref CapacityReservationId - - !Ref AWS::NoValue - NetworkInterfaces: - - AssociatePublicIpAddress: "False" - DeviceIndex: "0" - GroupSet: - - !GetAtt RHELSecurityGroup.GroupId - SubnetId: !Ref RHELPublicSubnet - Tags: - - Key: Name - Value: !Join ["", [!Ref Machinename]] - BlockDeviceMappings: - - DeviceName: /dev/sda1 - Ebs: - VolumeSize: !FindInMap [VolumeSize, !Ref EC2Type, PrimaryVolumeSize] - VolumeType: gp3 - Iops: 16000 - - !If - - AddSecondaryVolume - - DeviceName: /dev/sdc - Ebs: - VolumeSize: !FindInMap [VolumeSize, !Ref EC2Type, SecondaryVolumeSize] - VolumeType: gp3 - Iops: 16000 - - !Ref AWS::NoValue - UserData: - Fn::Base64: !Sub | - #!/bin/bash -xe - - log_output_file=/tmp/init_output.txt - - echo "====== Authorizing public key ======" | tee -a "\$log_output_file" - echo "\${PublicKeyString}" >> /home/ec2-user/.ssh/authorized_keys - - sudo dnf install -y git make cockpit lvm2 jq |& tee -a "\$log_output_file" - sudo systemctl enable --now cockpit.socket |& tee -a "\$log_output_file" - - echo "====== Getting Disk Path ======" | tee -a "\$log_output_file" - pv_location=\$(sudo lsblk -Jd | jq -r '.blockdevices[] | select(.size == "200G") | "/dev/\(.name)"') - echo "discovered pv location of (\$pv_location)" | tee -a "\$log_output_file" - - # NOTE: wrappig script vars with {} since the cloudformation will see - # them as cloudformation vars instead. - echo "====== Creating PV ======" | tee -a "\$log_output_file" - sudo pvcreate "\$pv_location" |& tee -a "\$log_output_file" - - echo "====== Creating VG ======" | tee -a "\$log_output_file" - sudo vgcreate rhel "\$pv_location" |& tee -a "\$log_output_file" - - RHELElasticIP: - Type: AWS::EC2::EIP - Properties: - Domain: vpc - Tags: - - Key: Name - Value: !Ref Machinename - - RHELEIPAssociation: - Type: AWS::EC2::EIPAssociation - Properties: - InstanceId: !Ref RHELInstance - AllocationId: !GetAtt RHELElasticIP.AllocationId - -Outputs: - InstanceId: - Description: RHEL Host Instance ID - Value: !Ref RHELInstance - PrivateIp: - Description: The bastion host Private DNS, will be used for cluster install pulling release image - Value: !GetAtt RHELInstance.PrivateIp - PublicIp: - Description: The bastion host Public IP, will be used for registering minIO server DNS - Value: !GetAtt RHELInstance.PublicIp -EOF +# Copy CloudFormation template from templates directory +cp "${SCRIPT_DIR}/../templates/rhel-instance.yaml" "${cf_tpl_file}" echo -e "==== Start to create rhel host ====" diff --git a/deploy/aws-hypervisor/templates/rhel-instance.yaml b/deploy/aws-hypervisor/templates/rhel-instance.yaml new file mode 100644 index 0000000..9c71969 --- /dev/null +++ b/deploy/aws-hypervisor/templates/rhel-instance.yaml @@ -0,0 +1,329 @@ +AWSTemplateFormatVersion: 2010-09-09 +Description: Template for RHEL machine Launch + +Conditions: + # If EC2Type parameter == MetalMachine, then do not add a secondary volume + AddSecondaryVolume: !Not [!Equals [!Ref EC2Type, 'MetalMachine']] + UseCapacityReservation: !Not [!Equals [!Ref CapacityReservationId, '']] + UseSpecificAZ: !Not [!Equals [!Ref AvailabilityZone, '']] + +Mappings: + VolumeSize: + MetalMachine: + PrimaryVolumeSize: "300" + SecondaryVolumeSize: "0" + VirtualMachine: + PrimaryVolumeSize: "200" + SecondaryVolumeSize: "100" + +Parameters: + EC2Type: + Default: 'VirtualMachine' + Type: String + VpcCidr: + AllowedPattern: ^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/(1[6-9]|2[0-4]))$ + ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/16-24. + Default: 10.192.0.0/16 + Description: CIDR block for VPC. + Type: String + PublicSubnetCidr: + Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone + Type: String + Default: 10.192.10.0/24 + AmiId: + Description: Current RHEL AMI to use. + Type: AWS::EC2::Image::Id + Machinename: + MaxLength: 27 + MinLength: 1 + ConstraintDescription: Machinename + Description: Machinename + Type: String + Default: rhel-testbed-ec2-instance + HostInstanceType: + Default: t2.medium + Type: String + PublicKeyString: + Type: String + Description: The public key used to connect to the EC2 instance + CapacityReservationId: + Type: String + Description: EC2 Capacity Reservation ID (optional) + Default: "" + AvailabilityZone: + Type: String + Description: Specific AZ for instance placement (optional) + Default: "" + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: "Host Information" + Parameters: + - HostInstanceType + - Label: + default: "Network Configuration" + Parameters: + - PublicSubnet + ParameterLabels: + PublicSubnet: + default: "Worker Subnet" + HostInstanceType: + default: "Worker Instance Type" + +Resources: + # VPC Creation + RHELVPC: + Type: AWS::EC2::VPC + Properties: + CidrBlock: !Ref VpcCidr + Tags: + - Key: Name + Value: RHELVPC + + # Internet Gateway + RHELInternetGateway: + Type: AWS::EC2::InternetGateway + DeletionPolicy: Delete + Properties: + Tags: + - Key: Name + Value: RHELInternetGateway + + RHELGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + DeletionPolicy: Delete + Properties: + VpcId: !Ref RHELVPC + InternetGatewayId: !Ref RHELInternetGateway + + RHELPublicSubnet: + Type: AWS::EC2::Subnet + DeletionPolicy: Delete + Properties: + VpcId: !Ref RHELVPC + CidrBlock: !Ref PublicSubnetCidr + MapPublicIpOnLaunch: true + AvailabilityZone: !If [UseSpecificAZ, !Ref AvailabilityZone, !Ref 'AWS::NoValue'] + Tags: + - Key: Name + Value: RHELPublicSubnet + + RHELNatGatewayEIP: + Type: AWS::EC2::EIP + DeletionPolicy: Delete + DependsOn: RHELGatewayAttachment + Properties: + Domain: vpc + + RHELNatGateway: + Type: AWS::EC2::NatGateway + DeletionPolicy: Delete + DependsOn: RHELNatGatewayEIP + Properties: + AllocationId: !GetAtt RHELNatGatewayEIP.AllocationId + SubnetId: !Ref RHELPublicSubnet + + RHELRouteTable: + Type: AWS::EC2::RouteTable + DeletionPolicy: Delete + Properties: + VpcId: !Ref RHELVPC + Tags: + - Key: Name + Value: RHELRouteTable + + RHELPublicRoute: + Type: AWS::EC2::Route + DependsOn: RHELGatewayAttachment + Properties: + RouteTableId: !Ref RHELRouteTable + DestinationCidrBlock: "0.0.0.0/0" + GatewayId: !Ref RHELInternetGateway + + RHELPublicSubnetRouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + DependsOn: RHELRouteTable + Properties: + RouteTableId: !Ref RHELRouteTable + SubnetId: !Ref RHELPublicSubnet + + # IAM Roles and Instance Profile + RHELIamRole: + Type: AWS::IAM::Role + DeletionPolicy: Delete + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: "Allow" + Principal: + Service: + - "ec2.amazonaws.com" + Action: + - "sts:AssumeRole" + Path: "/" + + RHELInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + Path: "/" + Roles: + - !Ref RHELIamRole + + # Security Group + RHELSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: RHEL Host Security Group + SecurityGroupIngress: + - IpProtocol: icmp + FromPort: -1 + ToPort: -1 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 22 + ToPort: 22 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 5678 + ToPort: 5678 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 80 + ToPort: 80 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 443 + ToPort: 443 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 8080 + ToPort: 8080 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 9090 + ToPort: 9090 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 5353 + ToPort: 5353 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 6443 + ToPort: 6443 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 8213 + ToPort: 8213 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 30000 + ToPort: 32767 + CidrIp: 0.0.0.0/0 + - IpProtocol: udp + FromPort: 30000 + ToPort: 32767 + CidrIp: 0.0.0.0/0 + VpcId: !Ref RHELVPC + + # Launch Template for Capacity Reservation (only created when capacity reservation is used) + RHELLaunchTemplate: + Type: AWS::EC2::LaunchTemplate + Condition: UseCapacityReservation + Properties: + LaunchTemplateData: + CapacityReservationSpecification: + CapacityReservationTarget: + CapacityReservationId: !Ref CapacityReservationId + + # EC2 Instance + RHELInstance: + Type: AWS::EC2::Instance + DeletionPolicy: Delete + DependsOn: + - RHELPublicSubnet + - RHELGatewayAttachment + - RHELInstanceProfile + Properties: + ImageId: !Ref AmiId + IamInstanceProfile: !Ref RHELInstanceProfile + InstanceType: !Ref HostInstanceType + LaunchTemplate: !If + - UseCapacityReservation + - LaunchTemplateId: !Ref RHELLaunchTemplate + Version: !GetAtt RHELLaunchTemplate.LatestVersionNumber + - !Ref AWS::NoValue + NetworkInterfaces: + - AssociatePublicIpAddress: "False" + DeviceIndex: "0" + GroupSet: + - !GetAtt RHELSecurityGroup.GroupId + SubnetId: !Ref RHELPublicSubnet + Tags: + - Key: Name + Value: !Join ["", [!Ref Machinename]] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [VolumeSize, !Ref EC2Type, PrimaryVolumeSize] + VolumeType: gp3 + Iops: 16000 + - !If + - AddSecondaryVolume + - DeviceName: /dev/sdc + Ebs: + VolumeSize: !FindInMap [VolumeSize, !Ref EC2Type, SecondaryVolumeSize] + VolumeType: gp3 + Iops: 16000 + - !Ref AWS::NoValue + UserData: + Fn::Base64: !Sub | + #!/bin/bash -xe + + log_output_file=/tmp/init_output.txt + + echo "====== Authorizing public key ======" | tee -a "$log_output_file" + echo "${PublicKeyString}" >> /home/ec2-user/.ssh/authorized_keys + + sudo dnf install -y git make cockpit lvm2 jq |& tee -a "$log_output_file" + sudo systemctl enable --now cockpit.socket |& tee -a "$log_output_file" + + echo "====== Getting Disk Path ======" | tee -a "$log_output_file" + pv_location=$(sudo lsblk -Jd | jq -r '.blockdevices[] | select(.size == "200G") | "/dev/\(.name)"') + echo "discovered pv location of ($pv_location)" | tee -a "$log_output_file" + + # NOTE: wrapping script vars with {} since CloudFormation will see + # them as CloudFormation vars instead. + echo "====== Creating PV ======" | tee -a "$log_output_file" + sudo pvcreate "$pv_location" |& tee -a "$log_output_file" + + echo "====== Creating VG ======" | tee -a "$log_output_file" + sudo vgcreate rhel "$pv_location" |& tee -a "$log_output_file" + + # Elastic IP + RHELElasticIP: + Type: AWS::EC2::EIP + Properties: + Domain: vpc + Tags: + - Key: Name + Value: !Ref Machinename + + RHELEIPAssociation: + Type: AWS::EC2::EIPAssociation + Properties: + InstanceId: !Ref RHELInstance + AllocationId: !GetAtt RHELElasticIP.AllocationId + +Outputs: + InstanceId: + Description: RHEL Host Instance ID + Value: !Ref RHELInstance + PrivateIp: + Description: The bastion host Private DNS, will be used for cluster install pulling release image + Value: !GetAtt RHELInstance.PrivateIp + PublicIp: + Description: The bastion host Public IP, will be used for registering minIO server DNS + Value: !GetAtt RHELInstance.PublicIp From 42261b9b5704afd5ff6d6951290db7ad7a311d7f Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Fri, 30 Jan 2026 14:09:12 +0100 Subject: [PATCH 3/6] OCPEDGE-2286: Make capacity reservations time-limited to prevent orphaned reservations Capacity reservations now auto-expire after 60 minutes (configurable via CAPACITY_RESERVATION_DURATION_MINUTES). This prevents orphaned reservations that could accumulate costs if the deployment script crashes or is terminated. The reservation only needs to exist until the EC2 instance launches - once running, the instance is independent of the reservation. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/instance.env.template | 3 +++ deploy/aws-hypervisor/scripts/common.sh | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/deploy/aws-hypervisor/instance.env.template b/deploy/aws-hypervisor/instance.env.template index 1e4e5d6..6fa028a 100644 --- a/deploy/aws-hypervisor/instance.env.template +++ b/deploy/aws-hypervisor/instance.env.template @@ -11,6 +11,9 @@ export AWS_DEFAULT_REGION=us-west-2 # Enable capacity reservation check before instance creation (recommended) # Set to 'false' to disable the pre-flight capacity check export ENABLE_CAPACITY_RESERVATION=true +# Duration in minutes for capacity reservation (auto-expires to prevent orphaned reservations) +# Default: 60 minutes - should be enough for CloudFormation stack creation +# export CAPACITY_RESERVATION_DURATION_MINUTES=60 export SSH_PUBLIC_KEY=/home/${USER}/.ssh/id_ed25519.pub #It is suggested that this key has no passphrase for ease of use diff --git a/deploy/aws-hypervisor/scripts/common.sh b/deploy/aws-hypervisor/scripts/common.sh index 553541d..8096f89 100755 --- a/deploy/aws-hypervisor/scripts/common.sh +++ b/deploy/aws-hypervisor/scripts/common.sh @@ -12,6 +12,7 @@ export RHEL_VERSION="${RHEL_VERSION:-9.6}" # Capacity reservation defaults export ENABLE_CAPACITY_RESERVATION="${ENABLE_CAPACITY_RESERVATION:-true}" +export CAPACITY_RESERVATION_DURATION_MINUTES="${CAPACITY_RESERVATION_DURATION_MINUTES:-60}" readonly COLOR_RED='\033[0;31m' readonly COLOR_YELLOW='\033[0;33m' @@ -79,8 +80,9 @@ function set_aws_machine_hostname() { ssh "$instance_ip" "sudo hostnamectl set-hostname aws-$STACK_NAME" } -# Creates a capacity reservation and returns the reservation ID and availability zone. +# Creates a time-limited capacity reservation and returns the reservation ID and availability zone. # Auto-detects the first available AZ in the configured region. +# The reservation expires after CAPACITY_RESERVATION_DURATION_MINUTES (default: 60 minutes). # Exits with error if capacity is unavailable. # Usage: result=$(create_capacity_reservation "instance_type" "region") # reservation_id=$(echo "$result" | awk '{print $1}') @@ -89,8 +91,14 @@ function create_capacity_reservation() { local instance_type="$1" local region="$2" local instance_platform="${3:-Red Hat Enterprise Linux}" + local duration_minutes="${CAPACITY_RESERVATION_DURATION_MINUTES:-60}" + + # Calculate end date (current time + duration) + local end_date + end_date=$(date -u -d "+${duration_minutes} minutes" '+%Y-%m-%dT%H:%M:%SZ') msg_info "Checking EC2 capacity availability for ${instance_type} (${instance_platform}) in ${region}..." + msg_info "Reservation will expire at ${end_date} (${duration_minutes} minutes from now)" # Auto-detect available AZs in region local az_list @@ -126,6 +134,8 @@ function create_capacity_reservation() { --instance-count 1 \ --availability-zone "${az}" \ --instance-match-criteria "targeted" \ + --end-date-type "limited" \ + --end-date "${end_date}" \ --output json \ --no-cli-pager 2>&1) create_status=$? From b4d99e3517c9b701b016f2a3417446e71ee597fb Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 3 Feb 2026 10:11:56 +0100 Subject: [PATCH 4/6] OCPEDGE-2286: Release capacity reservation after successful instance deployment When an EC2 instance is created with a targeted capacity reservation, the instance remains bound to that reservation. If the reservation expires while the instance is stopped, the instance cannot be restarted because AWS requires the targeted reservation to be active. This change releases the capacity reservation immediately after the instance is successfully deployed: - Modifies the instance to use "open" capacity preference (on-demand) - Cancels the capacity reservation (no longer needed) - Cleans up local tracking files The reservation's purpose is to guarantee capacity during creation - once the instance exists, it's no longer needed and releasing it allows the instance to start/stop freely. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/scripts/create.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index 49f4eb0..b3d86fe 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -142,4 +142,28 @@ scp "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/ini # Mark stack creation as successful (prevents capacity cleanup on exit) touch "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" + +# Release capacity reservation now that instance is running +# The reservation served its purpose (guaranteeing capacity at creation time) +# Releasing it allows the instance to start/stop freely without reservation dependency +if [[ -n "${CAPACITY_RESERVATION_ID}" ]]; then + msg_info "Releasing capacity reservation (no longer needed)..." + + # Remove the instance's association with the specific reservation + # This changes the instance to use "open" preference (on-demand capacity) + aws --region "${REGION}" ec2 modify-instance-capacity-reservation-attributes \ + --instance-id "${INSTANCE_ID}" \ + --capacity-reservation-specification "CapacityReservationPreference=open" \ + --no-cli-pager || msg_warning "Failed to modify instance capacity reservation attributes" + + # Cancel the capacity reservation + cancel_capacity_reservation "${CAPACITY_RESERVATION_ID}" "${REGION}" + + # Clean up local files + rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" + rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + + msg_info "Capacity reservation released successfully" +fi + msg_info "Instance creation completed successfully" From 69bd5ffe1e106ad68f0768d1a9267980eb50eccc Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 3 Feb 2026 10:28:56 +0100 Subject: [PATCH 5/6] OCPEDGE-2286: Add capacity error detection to instance start Add enhanced error handling in start.sh to detect InsufficientInstanceCapacity errors when starting a stopped instance. When capacity issues occur, provide clear, actionable error messages explaining: - EC2 instances are permanently bound to their AZ and cannot be moved - The resolution path: make destroy && make create - The trade-off: data loss warning for hypervisor contents This complements the pre-deployment capacity reservation system by handling the edge case where an instance was created successfully but later encounters capacity issues when restarting after being stopped. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/scripts/start.sh | 35 ++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/deploy/aws-hypervisor/scripts/start.sh b/deploy/aws-hypervisor/scripts/start.sh index b2445fc..1de6a5f 100755 --- a/deploy/aws-hypervisor/scripts/start.sh +++ b/deploy/aws-hypervisor/scripts/start.sh @@ -8,6 +8,37 @@ set -o nounset set -o errexit set -o pipefail +# Start instance with capacity error detection +# Provides actionable guidance if start fails due to insufficient capacity +function start_instance_with_capacity_check() { + local instance_id="$1" + local region="$2" + + local output + set +e + output=$(aws --region "${region}" ec2 start-instances --instance-ids "${instance_id}" --no-cli-pager 2>&1) + local status=$? + set -e + + if [[ ${status} -ne 0 ]]; then + if echo "${output}" | grep -qi "InsufficientInstanceCapacity\|InsufficientCapacity\|capacity"; then + msg_err "Cannot start instance: No capacity available in this Availability Zone" + msg_err "" + msg_err "EC2 instances are permanently bound to their original AZ and cannot be moved." + msg_err "The AZ where this instance was created currently has no available capacity" + msg_err "for this instance type." + msg_err "" + msg_err "To resolve, destroy and recreate the instance (will find an AZ with capacity):" + msg_err " make destroy && make create" + msg_err "" + msg_err "Note: This will delete any data on the hypervisor (clusters, images, etc.)" + exit 1 + fi + msg_err "Failed to start instance: ${output}" + exit 1 + fi +} + # Check if the instance exists and get its ID if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." @@ -27,7 +58,7 @@ case "${INSTANCE_STATE}" in ;; "stopped") echo "Starting instance..." - aws --region "${REGION}" ec2 start-instances --instance-ids "${INSTANCE_ID}" --no-cli-pager + start_instance_with_capacity_check "${INSTANCE_ID}" "${REGION}" echo "Waiting for instance to start..." aws --region "${REGION}" ec2 wait instance-running --instance-ids "${INSTANCE_ID}" --no-cli-pager echo "Waiting for instance to be ready..." @@ -37,7 +68,7 @@ case "${INSTANCE_STATE}" in echo "Instance is currently stopping. Waiting for it to stop completely..." aws --region "${REGION}" ec2 wait instance-stopped --instance-ids "${INSTANCE_ID}" --no-cli-pager echo "Now starting instance..." - aws --region "${REGION}" ec2 start-instances --instance-ids "${INSTANCE_ID}" --no-cli-pager + start_instance_with_capacity_check "${INSTANCE_ID}" "${REGION}" echo "Waiting for instance to start..." aws --region "${REGION}" ec2 wait instance-running --instance-ids "${INSTANCE_ID}" --no-cli-pager echo "Waiting for instance to be ready..." From c88e41dcd1e908b50bc7eb4b88a8646f0ad897e6 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 3 Feb 2026 10:34:02 +0100 Subject: [PATCH 6/6] OCPEDGE-2286: Fix shellcheck warning for REGION variable Add targeted SC2153 disable for the line where shellcheck incorrectly flags REGION as a possible misspelling of the local 'region' variable. REGION is sourced from instance.env via common.sh. Co-Authored-By: Claude Opus 4.5 --- deploy/aws-hypervisor/scripts/start.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy/aws-hypervisor/scripts/start.sh b/deploy/aws-hypervisor/scripts/start.sh index 1de6a5f..6058232 100755 --- a/deploy/aws-hypervisor/scripts/start.sh +++ b/deploy/aws-hypervisor/scripts/start.sh @@ -49,6 +49,7 @@ INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id") echo "Starting instance ${INSTANCE_ID}..." # Check current instance state +# shellcheck disable=SC2153 # REGION is sourced from instance.env via common.sh, not a misspelling of local 'region' INSTANCE_STATE=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].State.Name' --output text --no-cli-pager) echo "Current instance state: ${INSTANCE_STATE}"