diff --git a/deploy/aws-hypervisor/instance.env.template b/deploy/aws-hypervisor/instance.env.template index 6fa028a..19d7917 100644 --- a/deploy/aws-hypervisor/instance.env.template +++ b/deploy/aws-hypervisor/instance.env.template @@ -1,5 +1,8 @@ export SHARED_DIR="instance-data" +# Node identifier for multi-node deployments (default: node-0) +# export NODE_ID="node-0" + export AWS_PROFILE=microshift-dev export STACK_NAME=${USER}-dev export RHEL_HOST_ARCHITECTURE=x86_64 diff --git a/deploy/aws-hypervisor/scripts/common.sh b/deploy/aws-hypervisor/scripts/common.sh index 8096f89..0ffa5bf 100755 --- a/deploy/aws-hypervisor/scripts/common.sh +++ b/deploy/aws-hypervisor/scripts/common.sh @@ -14,6 +14,22 @@ export RHEL_VERSION="${RHEL_VERSION:-9.6}" export ENABLE_CAPACITY_RESERVATION="${ENABLE_CAPACITY_RESERVATION:-true}" export CAPACITY_RESERVATION_DURATION_MINUTES="${CAPACITY_RESERVATION_DURATION_MINUTES:-60}" +export NODE_ID="${NODE_ID:-node-0}" + +get_shared_dir() { + echo "${SCRIPT_DIR}/../${SHARED_DIR}" +} + +get_node_dir() { + local node_dir="${SCRIPT_DIR}/../${SHARED_DIR}/${NODE_ID}" + # Fallback for unported scripts: flat layout means pre-subdir deployment + if [[ ! -d "$node_dir" && -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then + echo "${SCRIPT_DIR}/../${SHARED_DIR}" + return + fi + echo "$node_dir" +} + readonly COLOR_RED='\033[0;31m' readonly COLOR_YELLOW='\033[0;33m' readonly COLOR_BLUE='\033[0;34m' @@ -65,8 +81,10 @@ function get_rhel_ami() { } function copy_configure_script() { + local node_dir + node_dir="$(get_node_dir)" local instance_ip - instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" + instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")" msg_info "copying over config ${SCRIPT_DIR}/configure.sh and making it executable" scp "${SCRIPT_DIR}/configure.sh" "$instance_ip:~/configure.sh" ssh "$instance_ip" 'chmod +x ~/configure.sh' @@ -74,8 +92,10 @@ function copy_configure_script() { # shellcheck disable=SC2029 # we want interpolation for the stack name in the ssh command function set_aws_machine_hostname() { + local node_dir + node_dir="$(get_node_dir)" local instance_ip - instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" + instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")" msg_info "setting machine hostname to aws-${STACK_NAME}" ssh "$instance_ip" "sudo hostnamectl set-hostname aws-$STACK_NAME" } diff --git a/deploy/aws-hypervisor/scripts/create.sh b/deploy/aws-hypervisor/scripts/create.sh index 38a85f1..ac2804b 100755 --- a/deploy/aws-hypervisor/scripts/create.sh +++ b/deploy/aws-hypervisor/scripts/create.sh @@ -14,19 +14,25 @@ trap 'save_stack_events; cleanup_capacity_on_error' EXIT TERM INT # Cleanup function for capacity reservation on error function cleanup_capacity_on_error() { set +o errexit - local reservation_file="${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" + local ndir + ndir="$(get_node_dir)" + local reservation_file="${ndir}/capacity-reservation-id" # Only cleanup if stack creation didn't complete successfully - if [[ -f "${reservation_file}" && ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" ]]; then + if [[ -f "${reservation_file}" && ! -f "${ndir}/.stack-created" ]]; then local reservation_id reservation_id=$(cat "${reservation_file}") cancel_capacity_reservation "${reservation_id}" "${REGION}" rm -f "${reservation_file}" - rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + rm -f "${ndir}/availability-zone" fi set -o errexit } -mkdir -p "${SCRIPT_DIR}/../${SHARED_DIR}" +mkdir -p "$(get_shared_dir)" +mkdir -p "$(get_node_dir)" + +node_dir="$(get_node_dir)" +shared_dir="$(get_shared_dir)" NETWORK_STACK_NAME="${STACK_NAME}-network" TEMPLATES_DIR="${SCRIPT_DIR}/../templates" @@ -36,10 +42,10 @@ function save_stack_events() set +o errexit aws --region "${REGION}" cloudformation describe-stack-events \ --stack-name "${STACK_NAME}" --output json \ - > "${SCRIPT_DIR}/../${SHARED_DIR}/stack-events-${STACK_NAME}.json" 2>/dev/null + > "$(get_node_dir)/stack-events-${STACK_NAME}.json" 2>/dev/null aws --region "${REGION}" cloudformation describe-stack-events \ --stack-name "${NETWORK_STACK_NAME}" --output json \ - > "${SCRIPT_DIR}/../${SHARED_DIR}/stack-events-${NETWORK_STACK_NAME}.json" 2>/dev/null + > "$(get_shared_dir)/stack-events-${NETWORK_STACK_NAME}.json" 2>/dev/null set -o errexit } @@ -57,7 +63,7 @@ if [[ -z "${RHEL_HOST_AMI}" ]]; then exit 1 fi -echo "ec2-user" > "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" +echo "ec2-user" > "${node_dir}/ssh_user" echo -e "AMI ID: $RHEL_HOST_AMI" echo -e "Machine Type: $EC2_INSTANCE_TYPE" @@ -72,8 +78,8 @@ if [[ "${ENABLE_CAPACITY_RESERVATION}" == "true" ]]; then AVAILABILITY_ZONE=$(echo "${reservation_result}" | awk '{print $2}') # Store for cleanup - echo "${CAPACITY_RESERVATION_ID}" > "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" - echo "${AVAILABILITY_ZONE}" > "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + echo "${CAPACITY_RESERVATION_ID}" > "${node_dir}/capacity-reservation-id" + echo "${AVAILABILITY_ZONE}" > "${node_dir}/availability-zone" msg_info "Capacity guaranteed in ${AVAILABILITY_ZONE}" else @@ -90,7 +96,7 @@ if [[ "$EC2_INSTANCE_TYPE" =~ c[0-9]+[a-z]*.metal ]]; then fi echo -e "==== Creating network stack ====" -echo "${STACK_NAME}" >> "${SCRIPT_DIR}/../${SHARED_DIR}/to_be_removed_cf_stack_list" +echo "${STACK_NAME}" >> "${shared_dir}/to_be_removed_cf_stack_list" aws --region "$REGION" cloudformation create-stack \ --stack-name "${NETWORK_STACK_NAME}" \ --template-body "file://${TEMPLATES_DIR}/network-stack.yaml" \ @@ -103,10 +109,10 @@ echo "Waiting for network stack..." aws --region "${REGION}" cloudformation wait stack-create-complete \ --stack-name "${NETWORK_STACK_NAME}" -echo "${NETWORK_STACK_NAME}" > "${SCRIPT_DIR}/../${SHARED_DIR}/network_stack_name" +echo "${NETWORK_STACK_NAME}" > "${shared_dir}/network_stack_name" echo -e "==== Creating compute stack ====" -echo "${NETWORK_STACK_NAME}" >> "${SCRIPT_DIR}/../${SHARED_DIR}/to_be_removed_cf_stack_list" +echo "${NETWORK_STACK_NAME}" >> "${shared_dir}/to_be_removed_cf_stack_list" aws --region "$REGION" cloudformation create-stack \ --stack-name "${STACK_NAME}" \ --template-body "file://${TEMPLATES_DIR}/compute-stack.yaml" \ @@ -124,12 +130,12 @@ aws --region "$REGION" cloudformation create-stack \ echo "Waiting for compute stack..." aws --region "${REGION}" cloudformation wait stack-create-complete --stack-name "${STACK_NAME}" -echo "$STACK_NAME" > "${SCRIPT_DIR}/../${SHARED_DIR}/rhel_host_stack_name" +echo "$STACK_NAME" > "${node_dir}/rhel_host_stack_name" # shellcheck disable=SC2016 INSTANCE_ID="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \ --query 'Stacks[].Outputs[?OutputKey == `InstanceId`].OutputValue' --output text)" echo "Instance ${INSTANCE_ID}" -echo "${INSTANCE_ID}" > "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" +echo "${INSTANCE_ID}" > "${node_dir}/aws-instance-id" # shellcheck disable=SC2016 HOST_PUBLIC_IP="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \ --query 'Stacks[].Outputs[?OutputKey == `PublicIp`].OutputValue' --output text)" @@ -137,8 +143,8 @@ HOST_PUBLIC_IP="$(aws --region "${REGION}" cloudformation describe-stacks --stac HOST_PRIVATE_IP="$(aws --region "${REGION}" cloudformation describe-stacks --stack-name "${STACK_NAME}" \ --query 'Stacks[].Outputs[?OutputKey == `PrivateIp`].OutputValue' --output text)" -echo "${HOST_PUBLIC_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" -echo "${HOST_PRIVATE_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/private_address" +echo "${HOST_PUBLIC_IP}" > "${node_dir}/public_address" +echo "${HOST_PRIVATE_IP}" > "${node_dir}/private_address" echo "Waiting up to 10 mins for RHEL host to be up." timeout 10m aws ec2 wait instance-status-ok --instance-id "${INSTANCE_ID}" --no-cli-pager @@ -166,10 +172,10 @@ echo "updating sshconfig for aws-hypervisor" copy_configure_script set_aws_machine_hostname -scp "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/init_output.txt" "${SCRIPT_DIR}/../${SHARED_DIR}/init_output.txt" +scp "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}:/tmp/init_output.txt" "${node_dir}/init_output.txt" # Mark stack creation as successful (prevents capacity cleanup on exit) -touch "${SCRIPT_DIR}/../${SHARED_DIR}/.stack-created" +touch "${node_dir}/.stack-created" # Release capacity reservation now that instance is running # The reservation served its purpose (guaranteeing capacity at creation time) @@ -188,8 +194,8 @@ if [[ -n "${CAPACITY_RESERVATION_ID}" ]]; then cancel_capacity_reservation "${CAPACITY_RESERVATION_ID}" "${REGION}" # Clean up local files - rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/capacity-reservation-id" - rm -f "${SCRIPT_DIR}/../${SHARED_DIR}/availability-zone" + rm -f "${node_dir}/capacity-reservation-id" + rm -f "${node_dir}/availability-zone" msg_info "Capacity reservation released successfully" fi diff --git a/deploy/aws-hypervisor/scripts/destroy.sh b/deploy/aws-hypervisor/scripts/destroy.sh index ca70c4f..84b3c28 100755 --- a/deploy/aws-hypervisor/scripts/destroy.sh +++ b/deploy/aws-hypervisor/scripts/destroy.sh @@ -8,10 +8,11 @@ set -o nounset set -o errexit set -o pipefail -instance_data_dir="${SCRIPT_DIR}/../${SHARED_DIR}" -public_address_file="${instance_data_dir}/public_address" -ssh_user_file="${instance_data_dir}/ssh_user" -network_stack_name_file="${instance_data_dir}/network_stack_name" +shared_dir="$(get_shared_dir)" +node_dir="$(get_node_dir)" +public_address_file="${node_dir}/public_address" +ssh_user_file="${node_dir}/ssh_user" +network_stack_name_file="${shared_dir}/network_stack_name" NETWORK_STACK_NAME="${STACK_NAME}-network" # Check if we have a deployed instance @@ -51,14 +52,14 @@ else fi # Cancel capacity reservation if it exists -reservation_file="${instance_data_dir}/capacity-reservation-id" +reservation_file="${node_dir}/capacity-reservation-id" if [[ -f "${reservation_file}" ]]; then reservation_id=$(cat "${reservation_file}") if [[ -n "${reservation_id}" && "${reservation_id}" != "null" ]]; then cancel_capacity_reservation "${reservation_id}" "${REGION}" fi rm -f "${reservation_file}" - rm -f "${instance_data_dir}/availability-zone" + rm -f "${node_dir}/availability-zone" fi # Delete compute stack first (CF prevents deleting network while its exports are imported) @@ -84,10 +85,10 @@ if aws --region "$REGION" cloudformation describe-stacks --stack-name "${NETWORK fi # Clean up instance data directory -if [[ -d "$instance_data_dir" ]]; then +if [[ -d "$shared_dir" ]]; then echo "Cleaning up instance data..." - rm -rf "${instance_data_dir:?}/"* + rm -rf "${shared_dir:?}/"* + echo "Stacks deleted successfully." > "${shared_dir}/.done" fi -echo "Stacks deleted successfully." > "${instance_data_dir}/.done" -echo "Destroy operation completed successfully." \ No newline at end of file +echo "Destroy operation completed successfully." diff --git a/deploy/aws-hypervisor/scripts/force-stop.sh b/deploy/aws-hypervisor/scripts/force-stop.sh index c026e77..0a7e099 100755 --- a/deploy/aws-hypervisor/scripts/force-stop.sh +++ b/deploy/aws-hypervisor/scripts/force-stop.sh @@ -44,12 +44,13 @@ wait_for_instance_stopped() { } # Check if the instance exists and get its ID -if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then +node_dir="$(get_node_dir)" +if [[ ! -f "${node_dir}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${node_dir}/aws-instance-id") # Get current instance state INSTANCE_STATE=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].State.Name' --output text --no-cli-pager) diff --git a/deploy/aws-hypervisor/scripts/init.sh b/deploy/aws-hypervisor/scripts/init.sh index 332efb8..a7af87b 100755 --- a/deploy/aws-hypervisor/scripts/init.sh +++ b/deploy/aws-hypervisor/scripts/init.sh @@ -3,8 +3,9 @@ SCRIPT_DIR=$(dirname "$0") # shellcheck source=/dev/null source "${SCRIPT_DIR}/common.sh" -instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" -instance_host="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" +node_dir="$(get_node_dir)" +instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")" +instance_host="$(cat "${node_dir}/public_address")" # Add the host key to known_hosts to avoid prompts while maintaining security echo "Adding host key for $instance_host to known_hosts..." diff --git a/deploy/aws-hypervisor/scripts/inventory.sh b/deploy/aws-hypervisor/scripts/inventory.sh index b186ab6..9f8169f 100755 --- a/deploy/aws-hypervisor/scripts/inventory.sh +++ b/deploy/aws-hypervisor/scripts/inventory.sh @@ -13,20 +13,22 @@ INVENTORY_DIR="${SCRIPT_DIR}/../../openshift-clusters" INVENTORY_FILE="${INVENTORY_DIR}/inventory.ini" INVENTORY_TEMPLATE="${INVENTORY_DIR}/inventory.ini.sample" +node_dir="$(get_node_dir)" + # Check if instance data exists -if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" ]]; then +if [[ ! -f "${node_dir}/public_address" ]]; then echo "Error: No public address found. Please run 'make deploy' first." exit 1 fi -if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" ]]; then +if [[ ! -f "${node_dir}/ssh_user" ]]; then echo "Error: No ssh user found. Please run 'make deploy' first." exit 1 fi # Read instance data -PUBLIC_IP="$(< "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" tr -d '\n')" -SSH_USER="$(< "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user" tr -d '\n')" +PUBLIC_IP="$(< "${node_dir}/public_address" tr -d '\n')" +SSH_USER="$(< "${node_dir}/ssh_user" tr -d '\n')" echo "Updating inventory with:" echo " User: ${SSH_USER}" diff --git a/deploy/aws-hypervisor/scripts/print_instance_data.sh b/deploy/aws-hypervisor/scripts/print_instance_data.sh index 7f2323c..8ddce8a 100755 --- a/deploy/aws-hypervisor/scripts/print_instance_data.sh +++ b/deploy/aws-hypervisor/scripts/print_instance_data.sh @@ -3,7 +3,8 @@ SCRIPT_DIR=$(dirname "$0") # shellcheck source=/dev/null source "${SCRIPT_DIR}/common.sh" -echo "Stack: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/rhel_host_stack_name")" -echo "Host: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" -echo "User: $(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")" -echo "Cockpit URL: http://$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address"):9090" +node_dir="$(get_node_dir)" +echo "Stack: $(cat "${node_dir}/rhel_host_stack_name")" +echo "Host: $(cat "${node_dir}/public_address")" +echo "User: $(cat "${node_dir}/ssh_user")" +echo "Cockpit URL: http://$(cat "${node_dir}/public_address"):9090" diff --git a/deploy/aws-hypervisor/scripts/ssh.sh b/deploy/aws-hypervisor/scripts/ssh.sh index 30a94cf..a1411a0 100755 --- a/deploy/aws-hypervisor/scripts/ssh.sh +++ b/deploy/aws-hypervisor/scripts/ssh.sh @@ -3,7 +3,8 @@ SCRIPT_DIR=$(dirname "$0") # shellcheck source=/dev/null source "${SCRIPT_DIR}/common.sh" -instance_ip="$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/public_address")" +node_dir="$(get_node_dir)" +instance_ip="$(cat "${node_dir}/ssh_user")@$(cat "${node_dir}/public_address")" # Use the private key corresponding to the configured public key if [[ -n "${SSH_PUBLIC_KEY}" ]]; then diff --git a/deploy/aws-hypervisor/scripts/start.sh b/deploy/aws-hypervisor/scripts/start.sh index 3f35931..4e56928 100755 --- a/deploy/aws-hypervisor/scripts/start.sh +++ b/deploy/aws-hypervisor/scripts/start.sh @@ -31,12 +31,13 @@ function ensure_open_capacity_preference() { } # Check if the instance exists and get its ID -if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then +node_dir="$(get_node_dir)" +if [[ ! -f "${node_dir}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${node_dir}/aws-instance-id") echo "Starting instance ${INSTANCE_ID}..." # Check current instance state @@ -84,8 +85,8 @@ esac HOST_PUBLIC_IP=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].PublicIpAddress' --output text --no-cli-pager) HOST_PRIVATE_IP=$(aws --region "${REGION}" ec2 describe-instances --instance-ids "${INSTANCE_ID}" --query 'Reservations[0].Instances[0].PrivateIpAddress' --output text --no-cli-pager) -echo "${HOST_PUBLIC_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/public_address" -echo "${HOST_PRIVATE_IP}" > "${SCRIPT_DIR}/../${SHARED_DIR}/private_address" +echo "${HOST_PUBLIC_IP}" > "${node_dir}/public_address" +echo "${HOST_PRIVATE_IP}" > "${node_dir}/private_address" echo "Instance ${INSTANCE_ID} is now running." echo "Public IP: ${HOST_PUBLIC_IP}" @@ -98,7 +99,7 @@ echo "Updating SSH config for aws-hypervisor..." # Check and restart the proxy container for immediate proxy capabilities echo "Checking proxy container status..." set +e # Allow commands to fail for proxy container checks -ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' +ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' echo "Checking external-squid proxy container..." # Check if the container exists and get its status diff --git a/deploy/aws-hypervisor/scripts/stop.sh b/deploy/aws-hypervisor/scripts/stop.sh index d997906..d889db7 100755 --- a/deploy/aws-hypervisor/scripts/stop.sh +++ b/deploy/aws-hypervisor/scripts/stop.sh @@ -43,12 +43,13 @@ wait_for_instance_stopped() { } # Check if the instance exists and get its ID -if [[ ! -f "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id" ]]; then +node_dir="$(get_node_dir)" +if [[ ! -f "${node_dir}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${node_dir}/aws-instance-id") echo "Stopping instance ${INSTANCE_ID}..." # Check current instance state @@ -67,7 +68,7 @@ if [[ "${INSTANCE_STATE}" == "running" ]]; then # Check if there are running dev-scripts deployments set +e # Allow commands to fail - ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null + ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null DEV_SCRIPTS_EXISTS=$? set -e @@ -76,7 +77,7 @@ if [[ "${INSTANCE_STATE}" == "running" ]]; then # Check for running VMs set +e # Allow commands to fail - RUNNING_VMS=$(ssh -o ConnectTimeout=10 "$(cat "${SCRIPT_DIR}/../${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' + RUNNING_VMS=$(ssh -o ConnectTimeout=10 "$(cat "${node_dir}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' set -e cd ~/openshift-metal3/dev-scripts diff --git a/deploy/openshift-clusters/scripts/clean-spoke.sh b/deploy/openshift-clusters/scripts/clean-spoke.sh index 9d3ee73..a8d7a7e 100755 --- a/deploy/openshift-clusters/scripts/clean-spoke.sh +++ b/deploy/openshift-clusters/scripts/clean-spoke.sh @@ -15,7 +15,8 @@ set -o pipefail source "${DEPLOY_DIR}/aws-hypervisor/instance.env" # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi @@ -38,8 +39,10 @@ fi SPOKE_NETWORK="${SPOKE_CLUSTER_NAME}" # Get SSH connection info -INSTANCE_IP=$(cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/public_address" 2>/dev/null) -SSH_USER=$(cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/ssh_user" 2>/dev/null || echo "ec2-user") +INSTANCE_IP=$(cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/public_address" 2>/dev/null \ + || cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/public_address" 2>/dev/null || echo "") +SSH_USER=$(cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/ssh_user" 2>/dev/null \ + || cat "${DEPLOY_DIR}/aws-hypervisor/instance-data/ssh_user" 2>/dev/null || echo "ec2-user") if [[ -z "${INSTANCE_IP}" ]]; then echo "Error: Could not determine instance IP." diff --git a/deploy/openshift-clusters/scripts/clean.sh b/deploy/openshift-clusters/scripts/clean.sh index 783845b..a8f21a5 100755 --- a/deploy/openshift-clusters/scripts/clean.sh +++ b/deploy/openshift-clusters/scripts/clean.sh @@ -10,7 +10,8 @@ set -o errexit set -o pipefail # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi diff --git a/deploy/openshift-clusters/scripts/deploy-cluster.sh b/deploy/openshift-clusters/scripts/deploy-cluster.sh index 72536c1..0dd0fe9 100755 --- a/deploy/openshift-clusters/scripts/deploy-cluster.sh +++ b/deploy/openshift-clusters/scripts/deploy-cluster.sh @@ -68,7 +68,8 @@ if [[ "${METHOD}" != "ipi" && "${METHOD}" != "agent" && "${METHOD}" != "kcli" ]] fi # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi diff --git a/deploy/openshift-clusters/scripts/deploy-fencing-assisted.sh b/deploy/openshift-clusters/scripts/deploy-fencing-assisted.sh index 120d13b..87c8ee5 100755 --- a/deploy/openshift-clusters/scripts/deploy-fencing-assisted.sh +++ b/deploy/openshift-clusters/scripts/deploy-fencing-assisted.sh @@ -10,7 +10,8 @@ set -o errexit set -o pipefail # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi diff --git a/deploy/openshift-clusters/scripts/full-clean.sh b/deploy/openshift-clusters/scripts/full-clean.sh index c630b96..e8eab3b 100755 --- a/deploy/openshift-clusters/scripts/full-clean.sh +++ b/deploy/openshift-clusters/scripts/full-clean.sh @@ -10,7 +10,8 @@ set -o errexit set -o pipefail # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi diff --git a/deploy/openshift-clusters/scripts/patch-nodes.sh b/deploy/openshift-clusters/scripts/patch-nodes.sh index bd4a5a5..a2d39af 100755 --- a/deploy/openshift-clusters/scripts/patch-nodes.sh +++ b/deploy/openshift-clusters/scripts/patch-nodes.sh @@ -10,7 +10,8 @@ set -o errexit set -o pipefail # Check if instance data exists -if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then +if [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/node-0/aws-instance-id" ]] \ +&& [[ ! -f "${DEPLOY_DIR}/aws-hypervisor/instance-data/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi diff --git a/deploy/openshift-clusters/scripts/redeploy-cluster.sh b/deploy/openshift-clusters/scripts/redeploy-cluster.sh index 02a8db1..4cc3fa5 100755 --- a/deploy/openshift-clusters/scripts/redeploy-cluster.sh +++ b/deploy/openshift-clusters/scripts/redeploy-cluster.sh @@ -17,6 +17,12 @@ set -o nounset set -o errexit set -o pipefail +# Resolve per-node directory (multi-node layout) with fallback for legacy deployments +NODE_DIR="${SHARED_DIR}/node-0" +if [[ ! -d "${NODE_DIR}" ]]; then + NODE_DIR="${SHARED_DIR}" +fi + # Function: Check if VM infrastructure needs to change and determine cleanup strategy check_vm_infrastructure_change() { local topology="$1" @@ -105,12 +111,12 @@ check_vm_infrastructure_change() { # Note: Cluster state is now managed by the Ansible playbook # Check if the instance exists and get its ID -if [[ ! -f "${SHARED_DIR}/aws-instance-id" ]]; then +if [[ ! -f "${NODE_DIR}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${NODE_DIR}/aws-instance-id") echo "Redeploying OpenShift cluster on instance ${INSTANCE_ID}..." # Check current instance state diff --git a/deploy/openshift-clusters/scripts/shutdown-cluster.sh b/deploy/openshift-clusters/scripts/shutdown-cluster.sh index 16c2a91..5b69027 100755 --- a/deploy/openshift-clusters/scripts/shutdown-cluster.sh +++ b/deploy/openshift-clusters/scripts/shutdown-cluster.sh @@ -17,13 +17,19 @@ set -o nounset set -o errexit set -o pipefail +# Resolve per-node directory (multi-node layout) with fallback for legacy deployments +NODE_DIR="${SHARED_DIR}/node-0" +if [[ ! -d "${NODE_DIR}" ]]; then + NODE_DIR="${SHARED_DIR}" +fi + # Check if the instance exists and get its ID -if [[ ! -f "${SHARED_DIR}/aws-instance-id" ]]; then +if [[ ! -f "${NODE_DIR}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${NODE_DIR}/aws-instance-id") echo "Shutting down OpenShift cluster VMs on instance ${INSTANCE_ID}..." # Check current instance state @@ -47,7 +53,7 @@ echo "Connecting to instance at ${HOST_PUBLIC_IP}..." # Check if dev-scripts directory exists set +e # Allow commands to fail -ssh -o ConnectTimeout=10 "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null +ssh -o ConnectTimeout=10 "$(cat "${NODE_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null DEV_SCRIPTS_EXISTS=$? set -e @@ -74,10 +80,10 @@ else fi # Perform orderly shutdown of the cluster VMs -ssh "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' +ssh "$(cat "${NODE_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' set -e cd ~/openshift-metal3/dev-scripts - + # Source the config to get cluster name source common.sh diff --git a/deploy/openshift-clusters/scripts/startup-cluster.sh b/deploy/openshift-clusters/scripts/startup-cluster.sh index 95adb52..fb08df9 100755 --- a/deploy/openshift-clusters/scripts/startup-cluster.sh +++ b/deploy/openshift-clusters/scripts/startup-cluster.sh @@ -17,13 +17,19 @@ set -o nounset set -o errexit set -o pipefail +# Resolve per-node directory (multi-node layout) with fallback for legacy deployments +NODE_DIR="${SHARED_DIR}/node-0" +if [[ ! -d "${NODE_DIR}" ]]; then + NODE_DIR="${SHARED_DIR}" +fi + # Check if the instance exists and get its ID -if [[ ! -f "${SHARED_DIR}/aws-instance-id" ]]; then +if [[ ! -f "${NODE_DIR}/aws-instance-id" ]]; then echo "Error: No instance found. Please run 'make deploy' first." exit 1 fi -INSTANCE_ID=$(cat "${SHARED_DIR}/aws-instance-id") +INSTANCE_ID=$(cat "${NODE_DIR}/aws-instance-id") echo "Starting up OpenShift cluster VMs on instance ${INSTANCE_ID}..." # Check cluster topology from state file @@ -55,7 +61,7 @@ echo "Connecting to instance at ${HOST_PUBLIC_IP}..." # Check if dev-scripts directory exists set +e # Allow commands to fail -ssh -o ConnectTimeout=10 "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null +ssh -o ConnectTimeout=10 "$(cat "${NODE_DIR}/ssh_user")@${HOST_PUBLIC_IP}" "test -d ~/openshift-metal3" 2>/dev/null DEV_SCRIPTS_EXISTS=$? set -e @@ -68,7 +74,7 @@ fi echo "Found dev-scripts directory. Starting up OpenShift cluster VMs..." # Start the cluster VMs remotely -ssh "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' +ssh "$(cat "${NODE_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' set -e cd ~/openshift-metal3/dev-scripts @@ -170,7 +176,7 @@ if [[ "${CLUSTER_TOPOLOGY}" == "fencing" ]]; then echo "" echo "Fencing topology detected. Ensuring sushy-tools BMC simulator is running..." - ssh "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' + ssh "$(cat "${NODE_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' # Check if sushy-tools container exists (dev-scripts deployment) if sudo podman container exists sushy-tools 2>/dev/null; then CONTAINER_STATUS=$(sudo podman inspect sushy-tools --format '{{.State.Status}}' 2>/dev/null || echo "unknown")