From 1379542309ca7635d4ce5d464e15b9b96a255cf1 Mon Sep 17 00:00:00 2001 From: dashinfraclaw Date: Wed, 15 Apr 2026 13:26:11 +0300 Subject: [PATCH 1/3] ansible: codify dashmon status monitoring access --- .../roles/status_dashboard/defaults/main.yml | 2 + ansible/roles/status_dashboard/tasks/main.yml | 4 +- .../templates/docker-compose.yml.j2 | 2 +- .../roles/status_monitoring/defaults/main.yml | 5 + .../status_monitoring/files/dashmon-check.sh | 24 ++++- .../status_monitoring/files/dashmon-sudoers | 10 ++ .../files/dashmon-testnet.pub | 1 + .../roles/status_monitoring/tasks/main.yml | 34 +++++- flake.lock | 27 +++++ flake.nix | 102 ++++++++++++++++++ 10 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 ansible/roles/status_monitoring/defaults/main.yml create mode 100644 ansible/roles/status_monitoring/files/dashmon-sudoers create mode 100644 ansible/roles/status_monitoring/files/dashmon-testnet.pub create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/ansible/roles/status_dashboard/defaults/main.yml b/ansible/roles/status_dashboard/defaults/main.yml index 8bb76a6b..dc3291e1 100644 --- a/ansible/roles/status_dashboard/defaults/main.yml +++ b/ansible/roles/status_dashboard/defaults/main.yml @@ -3,5 +3,7 @@ status_dashboard_image: dashpay/status:latest status_dashboard_port: 3010 status_dashboard_path: "{{ dashd_home }}/status_dashboard" +status_dashboard_ssh_private_key_path: "{{ lookup('env', 'STATUS_DASHBOARD_SSH_KEY_PATH') | default('~/.ssh/dashmon-testnet', true) }}" +status_dashboard_ssh_user: dashmon status_dashboard_poll_interval: 10000 status_dashboard_poll_concurrency: 20 diff --git a/ansible/roles/status_dashboard/tasks/main.yml b/ansible/roles/status_dashboard/tasks/main.yml index 1b6136fc..b7893a7a 100644 --- a/ansible/roles/status_dashboard/tasks/main.yml +++ b/ansible/roles/status_dashboard/tasks/main.yml @@ -12,9 +12,9 @@ dest: "{{ status_dashboard_path }}/inventory" mode: "0644" -- name: Copy SSH deploy key for status dashboard +- name: Copy SSH monitoring key for status dashboard ansible.builtin.copy: - src: "{{ lookup('env', 'PRIVATE_KEY_PATH') | default('~/.ssh/evo-app-deploy.rsa', true) }}" + src: "{{ status_dashboard_ssh_private_key_path }}" dest: "{{ status_dashboard_path }}/ssh_key" mode: "0600" owner: root diff --git a/ansible/roles/status_dashboard/templates/docker-compose.yml.j2 b/ansible/roles/status_dashboard/templates/docker-compose.yml.j2 index 1df1b4c0..c1bd866a 100644 --- a/ansible/roles/status_dashboard/templates/docker-compose.yml.j2 +++ b/ansible/roles/status_dashboard/templates/docker-compose.yml.j2 @@ -10,7 +10,7 @@ services: environment: - INVENTORY_PATH=/app/data/inventory - SSH_KEY_PATH=/app/data/ssh_key - - SSH_USER=ubuntu + - SSH_USER={{ status_dashboard_ssh_user }} - SSH_COMMAND=/usr/local/bin/dashmon-check - SSH_PORT=22 - POLL_INTERVAL_MS={{ status_dashboard_poll_interval }} diff --git a/ansible/roles/status_monitoring/defaults/main.yml b/ansible/roles/status_monitoring/defaults/main.yml new file mode 100644 index 00000000..cf7367eb --- /dev/null +++ b/ansible/roles/status_monitoring/defaults/main.yml @@ -0,0 +1,5 @@ +--- + +status_monitoring_user: dashmon +status_monitoring_home: "/home/{{ status_monitoring_user }}" +status_monitoring_forced_command: /usr/local/bin/dashmon-check diff --git a/ansible/roles/status_monitoring/files/dashmon-check.sh b/ansible/roles/status_monitoring/files/dashmon-check.sh index 23af06cd..eb2abf6b 100644 --- a/ansible/roles/status_monitoring/files/dashmon-check.sh +++ b/ansible/roles/status_monitoring/files/dashmon-check.sh @@ -7,7 +7,29 @@ set -euo pipefail if [[ -f /home/dashmate/.dashmate/config.json ]]; then # HP masternode: dashmate status as the dashmate user - sudo -u dashmate dashmate status 2>&1 || true + sudo -u dashmate dashmate status 2>&1 + echo "===TENDERDASH===" + # Query Tenderdash RPC for proposer info (localhost only, no sudo needed) + python3 -c ' +import json, urllib.request +try: + def fetch(path): + return json.loads(urllib.request.urlopen( + "http://127.0.0.1:36657" + path, timeout=5 + ).read()) + validators = fetch("/validators?per_page=100") + sorted_ptx = sorted(v["pro_tx_hash"] for v in validators["validators"]) + block = fetch("/block") + header = block["block"]["header"] + cur_prop = header["proposer_pro_tx_hash"] + height = int(header["height"]) + idx = sorted_ptx.index(cur_prop) + next_prop = sorted_ptx[(idx + 1) % len(sorted_ptx)] + print(json.dumps({"currentProposer": cur_prop, + "nextProposer": next_prop, "platformHeight": height})) +except Exception as e: + print(json.dumps({"error": str(e)})) +' 2>/dev/null || echo '{"error":"tenderdash-unavailable"}' echo "===SYSMETRICS===" else # Regular masternode: dash-cli as the ubuntu user diff --git a/ansible/roles/status_monitoring/files/dashmon-sudoers b/ansible/roles/status_monitoring/files/dashmon-sudoers new file mode 100644 index 00000000..ade7b535 --- /dev/null +++ b/ansible/roles/status_monitoring/files/dashmon-sudoers @@ -0,0 +1,10 @@ +# /etc/sudoers.d/dashmon +# Allow dashmon user to run read-only monitoring commands only. +# Both rule sets present on all nodes; unused rules are harmless. + +# HP masternodes: dashmate status as dashmate user +dashmon ALL=(dashmate) NOPASSWD: /usr/bin/dashmate status + +# Regular masternodes: dash-cli commands as ubuntu user +dashmon ALL=(ubuntu) NOPASSWD: /usr/local/bin/dash-cli getblockchaininfo +dashmon ALL=(ubuntu) NOPASSWD: /usr/local/bin/dash-cli masternode status diff --git a/ansible/roles/status_monitoring/files/dashmon-testnet.pub b/ansible/roles/status_monitoring/files/dashmon-testnet.pub new file mode 100644 index 00000000..af2fd586 --- /dev/null +++ b/ansible/roles/status_monitoring/files/dashmon-testnet.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOZRnc5hqc+WjCLt9PHiVVfFPfkWSlWNscOwSZrUnRAu dashmon-readonly@testnet-dashboard diff --git a/ansible/roles/status_monitoring/tasks/main.yml b/ansible/roles/status_monitoring/tasks/main.yml index 2cf25cde..39b6a505 100644 --- a/ansible/roles/status_monitoring/tasks/main.yml +++ b/ansible/roles/status_monitoring/tasks/main.yml @@ -1,9 +1,41 @@ --- +- name: Create dashmon monitoring user + ansible.builtin.user: + name: "{{ status_monitoring_user }}" + shell: /bin/bash + password: "!" + create_home: true + +- name: Create dashmon SSH directory + ansible.builtin.file: + path: "{{ status_monitoring_home }}/.ssh" + state: directory + owner: "{{ status_monitoring_user }}" + group: "{{ status_monitoring_user }}" + mode: "0700" + +- name: Install dashmon authorized key with forced command + ansible.builtin.copy: + dest: "{{ status_monitoring_home }}/.ssh/authorized_keys" + content: "command=\"{{ status_monitoring_forced_command }}\",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty {{ lookup('file', role_path + '/files/dashmon-testnet.pub') | trim }}\n" + owner: "{{ status_monitoring_user }}" + group: "{{ status_monitoring_user }}" + mode: "0600" + - name: Copy dashmon-check monitoring script ansible.builtin.copy: src: dashmon-check.sh - dest: /usr/local/bin/dashmon-check + dest: "{{ status_monitoring_forced_command }}" mode: "0755" owner: root group: root + +- name: Install dashmon sudoers rules + ansible.builtin.copy: + src: dashmon-sudoers + dest: /etc/sudoers.d/dashmon + mode: "0440" + owner: root + group: root + validate: /usr/sbin/visudo -cf %s diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..167663e6 --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1773821835, + "narHash": "sha256-TJ3lSQtW0E2JrznGVm8hOQGVpXjJyXY2guAxku2O9A4=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "b40629efe5d6ec48dd1efba650c797ddbd39ace0", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..d72670aa --- /dev/null +++ b/flake.nix @@ -0,0 +1,102 @@ +{ + description = "FHS development environment for dash-network-deploy"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + }; + + outputs = { self, nixpkgs }: + let + system = "x86_64-linux"; + pkgs = nixpkgs.legacyPackages.${system}; + + fhsEnv = pkgs.buildFHSEnv { + name = "dash-network-deploy-env"; + targetPkgs = pkgs: with pkgs; [ + # Node.js + nodejs_22 + corepack_22 + + # Infrastructure + terraform + ansible + docker-client + + # Python 3 (Ansible interpreter at /usr/bin/python3) + python3 + + # AWS + awscli2 + + # Network / VPN + openssh + openvpn + curl + wget + + # Utilities + git + jq + bash + coreutils + gnugrep + gnused + gawk + findutils + gnutar + gzip + which + + # Libraries for native node modules + stdenv.cc.cc.lib + openssl + zlib + cacert + ]; + profile = '' + # Ensure Python 3 is at /usr/bin/python3 for Ansible + if [ ! -e /usr/bin/python3 ]; then + mkdir -p /usr/bin 2>/dev/null || true + ln -sf "$(command -v python3)" /usr/bin/python3 2>/dev/null || true + fi + + # SSL certs + export SSL_CERT_FILE="${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + export NIX_SSL_CERT_FILE="$SSL_CERT_FILE" + + # Pass through SSH agent + export SSH_AUTH_SOCK="''${SSH_AUTH_SOCK:-}" + + # Docker socket + export DOCKER_HOST="unix:///var/run/docker.sock" + ''; + runScript = pkgs.writeShellScript "dash-network-deploy-run" '' + cd "$HOME/code/dash-network-deploy" || exit 1 + if [ $# -eq 0 ]; then + echo "Entered dash-network-deploy FHS environment" + echo "Working directory: $(pwd)" + exec bash + else + exec "$@" + fi + ''; + }; + in + { + packages.${system}.default = fhsEnv; + + # `nix develop` drops you into the FHS env + devShells.${system}.default = pkgs.mkShell { + buildInputs = [ fhsEnv ]; + shellHook = '' + echo "Run 'dash-network-deploy-env' to enter the FHS environment" + ''; + }; + + # `nix run` launches the FHS env directly + apps.${system}.default = { + type = "app"; + program = "${fhsEnv}/bin/dash-network-deploy-env"; + }; + }; +} From 97afc9780ee2e5f40bccae5f84c065babbcbd07a Mon Sep 17 00:00:00 2001 From: vivekgsharma Date: Wed, 15 Apr 2026 12:31:59 +0000 Subject: [PATCH 2/3] feat: poll testnet status and open infra recovery issues --- .github/workflows/poll-testnet-status.yml | 46 ++++ bin/poll-testnet-status.js | 32 +++ lib/testnetStatus/pollTestnetStatus.js | 257 ++++++++++++++++++++++ test/pollTestnetStatus.spec.js | 157 +++++++++++++ 4 files changed, 492 insertions(+) create mode 100644 .github/workflows/poll-testnet-status.yml create mode 100644 bin/poll-testnet-status.js create mode 100644 lib/testnetStatus/pollTestnetStatus.js create mode 100644 test/pollTestnetStatus.spec.js diff --git a/.github/workflows/poll-testnet-status.yml b/.github/workflows/poll-testnet-status.yml new file mode 100644 index 00000000..61a41934 --- /dev/null +++ b/.github/workflows/poll-testnet-status.yml @@ -0,0 +1,46 @@ +name: Poll Testnet Status + +on: + workflow_dispatch: + schedule: + - cron: '0 */2 * * *' + +permissions: + contents: read + issues: write + +jobs: + poll-testnet-status: + name: Poll testnet status and open recovery issues + runs-on: ubuntu-22.04 + timeout-minutes: 10 + concurrency: + group: poll-testnet-status + cancel-in-progress: false + + env: + GH_TOKEN: ${{ secrets.INFRA_ISSUES_TOKEN }} + TESTNET_RECOVERY_ASSIGNEE: dashinfraclaw + TESTNET_RECOVERY_ISSUE_REPOSITORY: dashpay/infra + + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install dependencies + run: npm ci + + - name: Validate cross-repo issue token + run: | + if [[ -z "${GH_TOKEN}" ]]; then + echo "INFRA_ISSUES_TOKEN secret is required to create issues in dashpay/infra" + exit 1 + fi + + - name: Poll status API and open recovery issues + run: node bin/poll-testnet-status.js diff --git a/bin/poll-testnet-status.js b/bin/poll-testnet-status.js new file mode 100644 index 00000000..c558687e --- /dev/null +++ b/bin/poll-testnet-status.js @@ -0,0 +1,32 @@ +/* eslint-disable no-console */ + +const { + pollTestnetStatus, +} = require('../lib/testnetStatus/pollTestnetStatus'); + +async function main() { + const result = await pollTestnetStatus(); + + console.log(`Checked ${result.expectedNodeCount} expected testnet masternodes.`); + console.log(`Detected ${result.incidentCount} active incidents.`); + + for (const incident of result.skippedIncidents) { + console.log(`Skipped existing issue for ${incident.nodeName} (${incident.observedState}).`); + } + + for (const createdIssue of result.createdIssues) { + if (createdIssue.dryRun) { + console.log(`Would create issue for ${createdIssue.nodeName} (${createdIssue.observedState}).`); + } else { + console.log( + `Created recovery issue for ${createdIssue.nodeName} ` + + `(${createdIssue.observedState}): ${createdIssue.issueUrl}`, + ); + } + } +} + +main().catch((error) => { + console.error(error.message); + process.exit(1); +}); diff --git a/lib/testnetStatus/pollTestnetStatus.js b/lib/testnetStatus/pollTestnetStatus.js new file mode 100644 index 00000000..d5d8306c --- /dev/null +++ b/lib/testnetStatus/pollTestnetStatus.js @@ -0,0 +1,257 @@ +const fs = require('fs').promises; +const path = require('path'); +const { execFile } = require('child_process'); +const { promisify } = require('util'); + +const DEFAULT_STATUS_API_URL = 'https://status.testnet.networks.dash.org/api/nodes'; +const DEFAULT_INVENTORY_PATH = path.resolve(process.cwd(), 'networks/testnet.inventory'); +const DEFAULT_REPOSITORY = 'dashpay/infra'; +const DEFAULT_ASSIGNEE = 'dashinfraclaw'; +const ISSUE_TITLE_PREFIX = '[Testnet Recovery]'; + +const execFileAsync = promisify(execFile); + +function parseExpectedNodeNames(inventoryContents) { + const expectedNodeNames = []; + const trackedGroups = new Set(['masternodes', 'hp_masternodes']); + let currentGroup = null; + + for (const rawLine of inventoryContents.split('\n')) { + const line = rawLine.trim(); + + if (line && !line.startsWith('#') && line.startsWith('[') && line.endsWith(']')) { + const groupName = line.slice(1, -1); + currentGroup = trackedGroups.has(groupName) ? groupName : null; + } else if (line && !line.startsWith('#') && currentGroup) { + expectedNodeNames.push(line.split(/\s+/)[0]); + } + } + + return expectedNodeNames; +} + +function buildObservedState(statusNode) { + if (!statusNode) { + return 'missing from status API'; + } + + const observedStates = []; + const masternodeState = statusNode.status?.masternodeState; + const { health } = statusNode; + const coreServiceStatus = statusNode.status?.coreServiceStatus; + const platformStatus = statusNode.status?.platformStatus; + + if (masternodeState === 'POSE_BANNED') { + observedStates.push(`masternodeState=${masternodeState}`); + } + + if (health && health !== 'healthy') { + observedStates.push(`health=${health}`); + } + + if (coreServiceStatus && coreServiceStatus !== 'up') { + observedStates.push(`coreServiceStatus=${coreServiceStatus}`); + } + + if (platformStatus && platformStatus !== 'up') { + observedStates.push(`platformStatus=${platformStatus}`); + } + + return observedStates.join(', '); +} + +function findRecoveryIncidents(expectedNodeNames, statusNodes) { + const statusByName = new Map(statusNodes.map((statusNode) => [statusNode.name, statusNode])); + + return expectedNodeNames.reduce((incidents, nodeName) => { + const statusNode = statusByName.get(nodeName); + const observedState = buildObservedState(statusNode); + + if (!observedState) { + return incidents; + } + + incidents.push({ + nodeName, + observedState, + }); + + return incidents; + }, []); +} + +function buildIssueTitle(nodeName) { + return `${ISSUE_TITLE_PREFIX} ${nodeName}`; +} + +function buildIssueBody(nodeName, observedState) { + return `${nodeName} requires automated recovery investigation. + +Observed state from the testnet status page: ${observedState}. + +Use the automated-recovery skill for investigation and any safe first-response actions. + +If the incident is outside that skill's safe scope, stop and leave a comment describing what you checked and why it was escalated. + +If recovery succeeds, leave a comment with what you did and then close the issue. + +`; +} + +function filterIncidentsWithoutOpenIssues(incidents, openIssues) { + const openIssueTitles = new Set(openIssues.map((issue) => issue.title)); + + return incidents.filter((incident) => !openIssueTitles.has(buildIssueTitle(incident.nodeName))); +} + +async function readExpectedNodeNames(inventoryPath = DEFAULT_INVENTORY_PATH) { + const inventoryContents = await fs.readFile(inventoryPath, 'utf8'); + return parseExpectedNodeNames(inventoryContents); +} + +async function fetchStatusNodes(statusApiUrl = DEFAULT_STATUS_API_URL, fetchImpl = fetch) { + const response = await fetchImpl(statusApiUrl); + + if (!response.ok) { + throw new Error(`Status API request failed with ${response.status} ${response.statusText}`); + } + + const payload = await response.json(); + + if (!Array.isArray(payload)) { + throw new Error('Status API response must be an array'); + } + + return payload; +} + +async function listOpenRecoveryIssues( + repository = DEFAULT_REPOSITORY, + execFileImpl = execFileAsync, +) { + const { stdout } = await execFileImpl('gh', [ + 'issue', + 'list', + '--repo', + repository, + '--state', + 'open', + '--limit', + '500', + '--json', + 'number,title', + ]); + + return JSON.parse(stdout); +} + +async function createRecoveryIssue( + incident, + repository = DEFAULT_REPOSITORY, + assignee = DEFAULT_ASSIGNEE, + execFileImpl = execFileAsync, +) { + const issueTitle = buildIssueTitle(incident.nodeName); + const issueBody = buildIssueBody(incident.nodeName, incident.observedState); + + const { stdout } = await execFileImpl('gh', [ + 'issue', + 'create', + '--repo', + repository, + '--title', + issueTitle, + '--body', + issueBody, + '--assignee', + assignee, + ]); + + return stdout.trim(); +} + +async function pollTestnetStatus({ + statusApiUrl = DEFAULT_STATUS_API_URL, + inventoryPath = DEFAULT_INVENTORY_PATH, + repository = process.env.TESTNET_RECOVERY_ISSUE_REPOSITORY || DEFAULT_REPOSITORY, + assignee = process.env.TESTNET_RECOVERY_ASSIGNEE || DEFAULT_ASSIGNEE, + dryRun = process.env.DRY_RUN === '1', + fetchImpl = fetch, + execFileImpl = execFileAsync, +} = {}) { + const [expectedNodeNames, statusNodes] = await Promise.all([ + readExpectedNodeNames(inventoryPath), + fetchStatusNodes(statusApiUrl, fetchImpl), + ]); + + const incidents = findRecoveryIncidents(expectedNodeNames, statusNodes); + + if (incidents.length === 0) { + return { + expectedNodeCount: expectedNodeNames.length, + incidentCount: 0, + createdIssues: [], + skippedIncidents: [], + }; + } + + const openIssues = await listOpenRecoveryIssues(repository, execFileImpl); + const incidentsToCreate = filterIncidentsWithoutOpenIssues(incidents, openIssues); + const skippedIncidents = incidents.filter((incident) => !incidentsToCreate.includes(incident)); + + if (dryRun) { + return { + expectedNodeCount: expectedNodeNames.length, + incidentCount: incidents.length, + createdIssues: incidentsToCreate.map((incident) => ({ + nodeName: incident.nodeName, + observedState: incident.observedState, + dryRun: true, + })), + skippedIncidents, + }; + } + + const createdIssues = []; + + for (const incident of incidentsToCreate) { + const issueUrl = await createRecoveryIssue( + incident, + repository, + assignee, + execFileImpl, + ); + + createdIssues.push({ + nodeName: incident.nodeName, + observedState: incident.observedState, + issueUrl, + }); + } + + return { + expectedNodeCount: expectedNodeNames.length, + incidentCount: incidents.length, + createdIssues, + skippedIncidents, + }; +} + +module.exports = { + ISSUE_TITLE_PREFIX, + buildIssueBody, + buildIssueTitle, + buildObservedState, + createRecoveryIssue, + DEFAULT_ASSIGNEE, + DEFAULT_INVENTORY_PATH, + DEFAULT_REPOSITORY, + DEFAULT_STATUS_API_URL, + fetchStatusNodes, + filterIncidentsWithoutOpenIssues, + findRecoveryIncidents, + listOpenRecoveryIssues, + parseExpectedNodeNames, + pollTestnetStatus, + readExpectedNodeNames, +}; diff --git a/test/pollTestnetStatus.spec.js b/test/pollTestnetStatus.spec.js new file mode 100644 index 00000000..058d4949 --- /dev/null +++ b/test/pollTestnetStatus.spec.js @@ -0,0 +1,157 @@ +const { expect } = require('chai'); + +const { + buildIssueBody, + buildIssueTitle, + buildObservedState, + filterIncidentsWithoutOpenIssues, + findRecoveryIncidents, + parseExpectedNodeNames, +} = require('../lib/testnetStatus/pollTestnetStatus'); + +describe('pollTestnetStatus', () => { + describe('parseExpectedNodeNames', () => { + it('should only return testnet masternode groups from the inventory', () => { + const inventoryContents = ` +web-1 ansible_host=1.2.3.4 + +[web] +web-1 + +[masternodes] +masternode-1 +masternode-2 + +[hp_masternodes] +hp-masternode-1 + +[seed_nodes] +seed-1 +`; + + expect(parseExpectedNodeNames(inventoryContents)).to.deep.equal([ + 'masternode-1', + 'masternode-2', + 'hp-masternode-1', + ]); + }); + }); + + describe('buildObservedState', () => { + it('should report a missing node as absent from the status API', () => { + expect(buildObservedState()).to.equal('missing from status API'); + }); + + it('should combine failing status fields into a single observed state', () => { + const observedState = buildObservedState({ + health: 'unreachable', + status: { + masternodeState: 'POSE_BANNED', + coreServiceStatus: 'down', + platformStatus: 'degraded', + }, + }); + + expect(observedState).to.equal( + 'masternodeState=POSE_BANNED, health=unreachable, coreServiceStatus=down, platformStatus=degraded', + ); + }); + + it('should ignore healthy ready nodes', () => { + const observedState = buildObservedState({ + health: 'healthy', + status: { + masternodeState: 'READY', + coreServiceStatus: 'up', + platformStatus: 'up', + }, + }); + + expect(observedState).to.equal(''); + }); + }); + + describe('findRecoveryIncidents', () => { + it('should flag banned, down, and missing expected nodes', () => { + const incidents = findRecoveryIncidents( + ['masternode-1', 'masternode-2', 'hp-masternode-1', 'hp-masternode-2'], + [ + { + name: 'masternode-1', + health: 'healthy', + status: { + masternodeState: 'READY', + coreServiceStatus: 'up', + }, + }, + { + name: 'masternode-2', + health: 'healthy', + status: { + masternodeState: 'POSE_BANNED', + coreServiceStatus: 'up', + }, + }, + { + name: 'hp-masternode-1', + health: 'unreachable', + status: { + masternodeState: 'READY', + coreServiceStatus: 'up', + platformStatus: 'up', + }, + }, + ], + ); + + expect(incidents).to.deep.equal([ + { + nodeName: 'masternode-2', + observedState: 'masternodeState=POSE_BANNED', + }, + { + nodeName: 'hp-masternode-1', + observedState: 'health=unreachable', + }, + { + nodeName: 'hp-masternode-2', + observedState: 'missing from status API', + }, + ]); + }); + }); + + describe('issue helpers', () => { + it('should use a deterministic issue title and public body', () => { + expect(buildIssueTitle('hp-masternode-7')).to.equal('[Testnet Recovery] hp-masternode-7'); + expect(buildIssueBody('hp-masternode-7', 'health=unreachable')).to.equal( + `hp-masternode-7 requires automated recovery investigation. + +Observed state from the testnet status page: health=unreachable. + +Use the automated-recovery skill for investigation and any safe first-response actions. + +If the incident is outside that skill's safe scope, stop and leave a comment describing what you checked and why it was escalated. + +If recovery succeeds, leave a comment with what you did and then close the issue. + +`, + ); + }); + + it('should only create issues for incidents without an open matching issue', () => { + const incidents = [ + { nodeName: 'masternode-1', observedState: 'health=unreachable' }, + { nodeName: 'hp-masternode-1', observedState: 'masternodeState=POSE_BANNED' }, + ]; + + const openIssues = [ + { number: 28, title: '[Testnet Recovery] hp-masternode-1' }, + ]; + + expect(filterIncidentsWithoutOpenIssues(incidents, openIssues)).to.deep.equal([ + { nodeName: 'masternode-1', observedState: 'health=unreachable' }, + ]); + }); + }); +}); From 4bdd09a8c871191cdc2ed612a1fe02efdc6e7f58 Mon Sep 17 00:00:00 2001 From: vivekgsharma Date: Wed, 15 Apr 2026 12:39:08 +0000 Subject: [PATCH 3/3] fix: shorten dashmon authorized key ansible line --- ansible/roles/status_monitoring/tasks/main.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ansible/roles/status_monitoring/tasks/main.yml b/ansible/roles/status_monitoring/tasks/main.yml index 39b6a505..1e4e8fb0 100644 --- a/ansible/roles/status_monitoring/tasks/main.yml +++ b/ansible/roles/status_monitoring/tasks/main.yml @@ -18,10 +18,19 @@ - name: Install dashmon authorized key with forced command ansible.builtin.copy: dest: "{{ status_monitoring_home }}/.ssh/authorized_keys" - content: "command=\"{{ status_monitoring_forced_command }}\",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty {{ lookup('file', role_path + '/files/dashmon-testnet.pub') | trim }}\n" + content: >- + {{ status_monitoring_authorized_key_options | join(',') }} + {{ lookup('file', role_path + '/files/dashmon-testnet.pub') | trim }} owner: "{{ status_monitoring_user }}" group: "{{ status_monitoring_user }}" mode: "0600" + vars: + status_monitoring_authorized_key_options: + - 'command="{{ status_monitoring_forced_command }}"' + - no-port-forwarding + - no-X11-forwarding + - no-agent-forwarding + - no-pty - name: Copy dashmon-check monitoring script ansible.builtin.copy: