Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions deploy/aws-hypervisor/instance.env.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
export SHARED_DIR="instance-data"

# Node identifier for multi-node deployments (default: node-0)
# export NODE_ID="node-0"

export AWS_PROFILE=microshift-dev
export STACK_NAME=${USER}-dev
export RHEL_HOST_ARCHITECTURE=x86_64
Expand Down
24 changes: 22 additions & 2 deletions deploy/aws-hypervisor/scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@ export RHEL_VERSION="${RHEL_VERSION:-9.6}"
export ENABLE_CAPACITY_RESERVATION="${ENABLE_CAPACITY_RESERVATION:-true}"
export CAPACITY_RESERVATION_DURATION_MINUTES="${CAPACITY_RESERVATION_DURATION_MINUTES:-60}"

export NODE_ID="${NODE_ID:-node-0}"

get_shared_dir() {
echo "${SCRIPT_DIR}/../${SHARED_DIR}"
}

get_node_dir() {
local node_dir="${SCRIPT_DIR}/../${SHARED_DIR}/${NODE_ID}"
# Fallback for unported scripts: flat layout means pre-subdir deployment
if [[ ! -d "$node_dir" && -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then
echo "${SCRIPT_DIR}/../${SHARED_DIR}"
return
fi
echo "$node_dir"
}

readonly COLOR_RED='\033[0;31m'
readonly COLOR_YELLOW='\033[0;33m'
readonly COLOR_BLUE='\033[0;34m'
Expand Down Expand Up @@ -65,17 +81,21 @@ function get_rhel_ami() {
}

function copy_configure_script() {
local node_dir
node_dir="$(get_node_dir)"
local instance_ip
instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")"
msg_info "copying over config ${SCRIPT_DIR}/configure.sh and making it executable"
scp "${SCRIPT_DIR}/configure.sh" "$instance_ip:~/configure.sh"
ssh "$instance_ip" 'chmod +x ~/configure.sh'
}

# shellcheck disable=SC2029 # we want interpolation for the stack name in the ssh command
function set_aws_machine_hostname() {
local node_dir
node_dir="$(get_node_dir)"
local instance_ip
instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")"
msg_info "setting machine hostname to aws-${STACK_NAME}"
ssh "$instance_ip" "sudo hostnamectl set-hostname aws-$STACK_NAME"
}
Expand Down
46 changes: 26 additions & 20 deletions deploy/aws-hypervisor/scripts/create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,25 @@ trap 'save_stack_events; cleanup_capacity_on_error' EXIT TERM INT
# Cleanup function for capacity reservation on error
function cleanup_capacity_on_error() {
set +o errexit
local reservation_file="${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id"
local ndir
ndir="$(get_node_dir)"
local reservation_file="${ndir}/capacity-reservation-id"
# Only cleanup if stack creation didn't complete successfully
if [[ -f "${reservation_file}" && ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" ]]; then
if [[ -f "${reservation_file}" && ! -f "${ndir}/.stack-created" ]]; then
local reservation_id
reservation_id=$(cat "${reservation_file}")
cancel_capacity_reservation "${reservation_id}" "${REGION}"
rm -f "${reservation_file}"
rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone"
rm -f "${ndir}/availability-zone"
fi
set -o errexit
}

mkdir -p "${SCRIPT_DIR}/../${SHARED_DIR}"
mkdir -p "$(get_shared_dir)"
mkdir -p "$(get_node_dir)"

node_dir="$(get_node_dir)"
shared_dir="$(get_shared_dir)"

NETWORK_STACK_NAME="${STACK_NAME}-network"
TEMPLATES_DIR="${SCRIPT_DIR}/../templates"
Expand All @@ -36,10 +42,10 @@ function save_stack_events()
set +o errexit
aws --region "${REGION}" cloudformation describe-stack-events \
--stack-name "${STACK_NAME}" --output json \
> "${SCRIPT_DIR}/../${SHARED_DIR}/stack-events-${STACK_NAME}.json" 2>/dev/null
> "$(get_node_dir)/stack-events-${STACK_NAME}.json" 2>/dev/null
aws --region "${REGION}" cloudformation describe-stack-events \
--stack-name "${NETWORK_STACK_NAME}" --output json \
> "${SCRIPT_DIR}/../${SHARED_DIR}/stack-events-${NETWORK_STACK_NAME}.json" 2>/dev/null
> "$(get_shared_dir)/stack-events-${NETWORK_STACK_NAME}.json" 2>/dev/null
set -o errexit
}

Expand All @@ -57,7 +63,7 @@ if [[ -z "${RHEL_HOST_AMI}" ]]; then
exit 1
fi

echo "ec2-user" > "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user"
echo "ec2-user" > "${node_dir}/ssh_user"

echo -e "AMI ID: $RHEL_HOST_AMI"
echo -e "Machine Type: $EC2_INSTANCE_TYPE"
Expand All @@ -72,8 +78,8 @@ if [[ "${ENABLE_CAPACITY_RESERVATION}" == "true" ]]; then
AVAILABILITY_ZONE=$(echo "${reservation_result}" | awk '{print $2}')

# Store for cleanup
echo "${CAPACITY_RESERVATION_ID}" > "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id"
echo "${AVAILABILITY_ZONE}" > "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone"
echo "${CAPACITY_RESERVATION_ID}" > "${node_dir}/capacity-reservation-id"
echo "${AVAILABILITY_ZONE}" > "${node_dir}/availability-zone"

msg_info "Capacity guaranteed in ${AVAILABILITY_ZONE}"
else
Expand All @@ -90,7 +96,7 @@ if [[ "$EC2_INSTANCE_TYPE" =~ c[0-9]+[a-z]*.metal ]]; then
fi

echo -e "==== Creating network stack ===="
echo "${STACK_NAME}" >> "${SCRIPT_DIR}/../${SHARED_DIR}/to_be_removed_cf_stack_list"
echo "${STACK_NAME}" >> "${shared_dir}/to_be_removed_cf_stack_list"
aws --region "$REGION" cloudformation create-stack \
--stack-name "${NETWORK_STACK_NAME}" \
--template-body "file://${TEMPLATES_DIR}/network-stack.yaml" \
Expand All @@ -103,10 +109,10 @@ echo "Waiting for network stack..."
aws --region "${REGION}" cloudformation wait stack-create-complete \
--stack-name "${NETWORK_STACK_NAME}"

echo "${NETWORK_STACK_NAME}" > "${SCRIPT_DIR}/../${SHARED_DIR}/network_stack_name"
echo "${NETWORK_STACK_NAME}" > "${shared_dir}/network_stack_name"

echo -e "==== Creating compute stack ===="
echo "${NETWORK_STACK_NAME}" >> "${SCRIPT_DIR}/../${SHARED_DIR}/to_be_removed_cf_stack_list"
echo "${NETWORK_STACK_NAME}" >> "${shared_dir}/to_be_removed_cf_stack_list"
aws --region "$REGION" cloudformation create-stack \
--stack-name "${STACK_NAME}" \
--template-body "file://${TEMPLATES_DIR}/compute-stack.yaml" \
Expand All @@ -124,21 +130,21 @@ aws --region "$REGION" cloudformation create-stack \
echo "Waiting for compute stack..."
aws --region "${REGION}" cloudformation wait stack-create-complete --stack-name "${STACK_NAME}"

echo "$STACK_NAME" > "${SCRIPT_DIR}/../${SHARED_DIR}/rhel_host_stack_name"
echo "$STACK_NAME" > "${node_dir}/rhel_host_stack_name"
# shellcheck disable=SC2016
INSTANCE_ID="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \
--query 'Stacks[].Outputs[?OutputKey == `InstanceId`].OutputValue' --output text)"
echo "Instance ${INSTANCE_ID}"
echo "${INSTANCE_ID}" > "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id"
echo "${INSTANCE_ID}" > "${node_dir}/aws-instance-id"
# shellcheck disable=SC2016
HOST_PUBLIC_IP="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \
--query 'Stacks[].Outputs[?OutputKey == `PublicIp`].OutputValue' --output text)"
# shellcheck disable=SC2016
HOST_PRIVATE_IP="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \
--query 'Stacks[].Outputs[?OutputKey == `PrivateIp`].OutputValue' --output text)"

echo "${HOST_PUBLIC_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/public_address"
echo "${HOST_PRIVATE_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/private_address"
echo "${HOST_PUBLIC_IP}" > "${node_dir}/public_address"
echo "${HOST_PRIVATE_IP}" > "${node_dir}/private_address"

echo "Waiting up to 10 mins for RHEL host to be up."
timeout 10m aws ec2 wait instance-status-ok --instance-id "${INSTANCE_ID}" --no-cli-pager
Expand Down Expand Up @@ -166,10 +172,10 @@ echo "updating sshconfig for aws-hypervisor"
copy_configure_script
set_aws_machine_hostname

scp "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/init_output.txt" "${SCRIPT_DIR}/../${SHARED_DIR}/init_output.txt"
scp "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/init_output.txt" "${node_dir}/init_output.txt"

# Mark stack creation as successful (prevents capacity cleanup on exit)
touch "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created"
touch "${node_dir}/.stack-created"

# Release capacity reservation now that instance is running
# The reservation served its purpose (guaranteeing capacity at creation time)
Expand All @@ -188,8 +194,8 @@ if [[ -n "${CAPACITY_RESERVATION_ID}" ]]; then
cancel_capacity_reservation "${CAPACITY_RESERVATION_ID}" "${REGION}"

# Clean up local files
rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id"
rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone"
rm -f "${node_dir}/capacity-reservation-id"
rm -f "${node_dir}/availability-zone"

msg_info "Capacity reservation released successfully"
fi
Expand Down
21 changes: 11 additions & 10 deletions deploy/aws-hypervisor/scripts/destroy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ set -o nounset
set -o errexit
set -o pipefail

instance_data_dir="${SCRIPT_DIR}/../${SHARED_DIR}"
public_address_file="${instance_data_dir}/public_address"
ssh_user_file="${instance_data_dir}/ssh_user"
network_stack_name_file="${instance_data_dir}/network_stack_name"
shared_dir="$(get_shared_dir)"
node_dir="$(get_node_dir)"
public_address_file="${node_dir}/public_address"
ssh_user_file="${node_dir}/ssh_user"
network_stack_name_file="${shared_dir}/network_stack_name"
NETWORK_STACK_NAME="${STACK_NAME}-network"

# Check if we have a deployed instance
Expand Down Expand Up @@ -51,14 +52,14 @@ else
fi

# Cancel capacity reservation if it exists
reservation_file="${instance_data_dir}/capacity-reservation-id"
reservation_file="${node_dir}/capacity-reservation-id"
if [[ -f "${reservation_file}" ]]; then
reservation_id=$(cat "${reservation_file}")
if [[ -n "${reservation_id}" && "${reservation_id}" != "null" ]]; then
cancel_capacity_reservation "${reservation_id}" "${REGION}"
fi
rm -f "${reservation_file}"
rm -f "${instance_data_dir}/availability-zone"
rm -f "${node_dir}/availability-zone"
fi

# Delete compute stack first (CF prevents deleting network while its exports are imported)
Expand All @@ -84,10 +85,10 @@ if aws --region "$REGION" cloudformation describe-stacks --stack-name "${NETWORK
fi

# Clean up instance data directory
if [[ -d "$instance_data_dir" ]]; then
if [[ -d "$shared_dir" ]]; then
echo "Cleaning up instance data..."
rm -rf "${instance_data_dir:?}/"*
rm -rf "${shared_dir:?}/"*
echo "Stacks deleted successfully." > "${shared_dir}/.done"
fi

echo "Stacks deleted successfully." > "${instance_data_dir}/.done"
echo "Destroy operation completed successfully."
echo "Destroy operation completed successfully."
5 changes: 3 additions & 2 deletions deploy/aws-hypervisor/scripts/force-stop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ wait_for_instance_stopped() {
}

# Check if the instance exists and get its ID
if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then
node_dir="$(get_node_dir)"
if [[ ! -f "${node_dir}/aws-instance-id" ]]; then
echo "Error: No instance found. Please run 'make deploy' first."
exit 1
fi

INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id")
INSTANCE_ID=$(cat "${node_dir}/aws-instance-id")

# Get current instance state
INSTANCE_STATE=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].State.Name' --output text --no-cli-pager)
Expand Down
5 changes: 3 additions & 2 deletions deploy/aws-hypervisor/scripts/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ SCRIPT_DIR=$(dirname "$0")
# shellcheck source=/dev/null
source "${SCRIPT_DIR}/common.sh"

instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
instance_host="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
node_dir="$(get_node_dir)"
instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")"
instance_host="$(cat "${node_dir}/public_address")"

# Add the host key to known_hosts to avoid prompts while maintaining security
echo "Adding host key for $instance_host to known_hosts..."
Expand Down
10 changes: 6 additions & 4 deletions deploy/aws-hypervisor/scripts/inventory.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,22 @@ INVENTORY_DIR="${SCRIPT_DIR}/../../openshift-clusters"
INVENTORY_FILE="${INVENTORY_DIR}/inventory.ini"
INVENTORY_TEMPLATE="${INVENTORY_DIR}/inventory.ini.sample"

node_dir="$(get_node_dir)"

# Check if instance data exists
if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" ]]; then
if [[ ! -f "${node_dir}/public_address" ]]; then
echo "Error: No public address found. Please run 'make deploy' first."
exit 1
fi

if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" ]]; then
if [[ ! -f "${node_dir}/ssh_user" ]]; then
echo "Error: No ssh user found. Please run 'make deploy' first."
exit 1
fi

# Read instance data
PUBLIC_IP="$(< "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" tr -d '\n')"
SSH_USER="$(< "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" tr -d '\n')"
PUBLIC_IP="$(< "${node_dir}/public_address" tr -d '\n')"
SSH_USER="$(< "${node_dir}/ssh_user" tr -d '\n')"

echo "Updating inventory with:"
echo " User: ${SSH_USER}"
Expand Down
9 changes: 5 additions & 4 deletions deploy/aws-hypervisor/scripts/print_instance_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ SCRIPT_DIR=$(dirname "$0")
# shellcheck source=/dev/null
source "${SCRIPT_DIR}/common.sh"

echo "Stack: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/rhel_host_stack_name")"
echo "Host: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
echo "User: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")"
echo "Cockpit URL: http://$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address"):9090"
node_dir="$(get_node_dir)"
echo "Stack: $(cat "${node_dir}/rhel_host_stack_name")"
echo "Host: $(cat "${node_dir}/public_address")"
echo "User: $(cat "${node_dir}/ssh_user")"
echo "Cockpit URL: http://$(cat "${node_dir}/public_address"):9090"
3 changes: 2 additions & 1 deletion deploy/aws-hypervisor/scripts/ssh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ SCRIPT_DIR=$(dirname "$0")
# shellcheck source=/dev/null
source "${SCRIPT_DIR}/common.sh"

instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")"
node_dir="$(get_node_dir)"
instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")"

# Use the private key corresponding to the configured public key
if [[ -n "${SSH_PUBLIC_KEY}" ]]; then
Expand Down
11 changes: 6 additions & 5 deletions deploy/aws-hypervisor/scripts/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ function ensure_open_capacity_preference() {
}

# Check if the instance exists and get its ID
if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then
node_dir="$(get_node_dir)"
if [[ ! -f "${node_dir}/aws-instance-id" ]]; then
echo "Error: No instance found. Please run 'make deploy' first."
exit 1
fi

INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id")
INSTANCE_ID=$(cat "${node_dir}/aws-instance-id")
echo "Starting instance ${INSTANCE_ID}..."

# Check current instance state
Expand Down Expand Up @@ -84,8 +85,8 @@ esac
HOST_PUBLIC_IP=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].PublicIpAddress' --output text --no-cli-pager)
HOST_PRIVATE_IP=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].PrivateIpAddress' --output text --no-cli-pager)

echo "${HOST_PUBLIC_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/public_address"
echo "${HOST_PRIVATE_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/private_address"
echo "${HOST_PUBLIC_IP}" > "${node_dir}/public_address"
echo "${HOST_PRIVATE_IP}" > "${node_dir}/private_address"

echo "Instance ${INSTANCE_ID} is now running."
echo "Public IP: ${HOST_PUBLIC_IP}"
Expand All @@ -98,7 +99,7 @@ echo "Updating SSH config for aws-hypervisor..."
# Check and restart the proxy container for immediate proxy capabilities
echo "Checking proxy container status..."
set +e # Allow commands to fail for proxy container checks
ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF'
ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF'
echo "Checking external-squid proxy container..."

# Check if the container exists and get its status
Expand Down
9 changes: 5 additions & 4 deletions deploy/aws-hypervisor/scripts/stop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ wait_for_instance_stopped() {
}

# Check if the instance exists and get its ID
if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then
node_dir="$(get_node_dir)"
if [[ ! -f "${node_dir}/aws-instance-id" ]]; then
echo "Error: No instance found. Please run 'make deploy' first."
exit 1
fi

INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id")
INSTANCE_ID=$(cat "${node_dir}/aws-instance-id")
echo "Stopping instance ${INSTANCE_ID}..."

# Check current instance state
Expand All @@ -67,7 +68,7 @@ if [[ "${INSTANCE_STATE}" == "running" ]]; then

# Check if there are running dev-scripts deployments
set +e # Allow commands to fail
ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null
ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null
DEV_SCRIPTS_EXISTS=$?
set -e

Expand All @@ -76,7 +77,7 @@ if [[ "${INSTANCE_STATE}" == "running" ]]; then

# Check for running VMs
set +e # Allow commands to fail
RUNNING_VMS=$(ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF'
RUNNING_VMS=$(ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF'
set -e
cd ~/openshift-metal3/dev-scripts

Expand Down
Loading