11<?php
22
3+ declare (strict_types=1 );
4+
35namespace Hypernode \Deploy \Brancher ;
46
57use Hypernode \Api \Exception \HypernodeApiClientException ;
1416
1517class BrancherHypernodeManager
1618{
19+ /**
20+ * Relevant flow names to poll for delivery
21+ *
22+ * @var string[]
23+ */
24+ public const RELEVANT_FLOW_NAMES = ['ensure_app ' , 'ensure_copied_app ' ];
25+ public const PRE_POLL_SUCCESS_COUNT = 3 ;
26+ public const PRE_POLL_FAIL_COUNT = 5 ;
27+
1728 private LoggerInterface $ log ;
1829 private HypernodeClient $ hypernodeClient ;
30+ private SshPoller $ sshPoller ;
1931
20- public function __construct (LoggerInterface $ log )
21- {
32+ public function __construct (
33+ LoggerInterface $ log ,
34+ ?HypernodeClient $ hypernodeClient = null ,
35+ ?SshPoller $ sshPoller = null
36+ ) {
2237 $ this ->log = $ log ;
23- $ this ->hypernodeClient = HypernodeClientFactory::create (getenv ('HYPERNODE_API_TOKEN ' ) ?: '' );
38+ $ this ->hypernodeClient = $ hypernodeClient
39+ ?? HypernodeClientFactory::create (getenv ('HYPERNODE_API_TOKEN ' ) ?: '' );
40+ $ this ->sshPoller = $ sshPoller ?? new SshPoller ();
2441 }
2542
2643 /**
@@ -105,6 +122,11 @@ public function createForHypernode(string $hypernode, array $data = []): string
105122 /**
106123 * Wait for brancher Hypernode to become available.
107124 *
125+ * This method first attempts a quick SSH connectivity check. If the brancher is already
126+ * reachable (e.g., when reusing an existing brancher), it returns early. Otherwise, it
127+ * falls back to polling the API logbook for delivery status, then performs a final SSH
128+ * reachability check.
129+ *
108130 * @param string $brancherHypernode Name of the brancher Hypernode
109131 * @param int $timeout Maximum time to wait for availability
110132 * @param int $reachabilityCheckCount Number of consecutive successful checks required
@@ -121,24 +143,58 @@ public function waitForAvailability(
121143 int $ reachabilityCheckCount = 6 ,
122144 int $ reachabilityCheckInterval = 10
123145 ): void {
124- $ latest = microtime (true );
125- $ timeElapsed = 0 ;
146+ $ latest = $ this ->sshPoller ->microtime ();
147+ $ timeElapsed = 0.0 ;
148+
149+ // Phase 1: SSH-first check, early return for reused delivered branchers
150+ $ this ->log ->info (
151+ sprintf ('Attempting SSH connectivity check for brancher Hypernode %s... ' , $ brancherHypernode )
152+ );
153+
154+ $ isReachable = $ this ->pollSshConnectivity (
155+ $ brancherHypernode ,
156+ self ::PRE_POLL_SUCCESS_COUNT ,
157+ self ::PRE_POLL_FAIL_COUNT ,
158+ $ reachabilityCheckInterval ,
159+ $ timeElapsed ,
160+ $ latest ,
161+ $ timeout
162+ );
163+ if ($ isReachable ) {
164+ $ this ->log ->info (
165+ sprintf ('Brancher Hypernode %s is reachable! ' , $ brancherHypernode )
166+ );
167+ return ;
168+ }
169+
170+ $ this ->log ->info (
171+ sprintf (
172+ 'SSH check inconclusive for brancher Hypernode %s, falling back to delivery check... ' ,
173+ $ brancherHypernode
174+ )
175+ );
176+
177+ // Phase 2: Wait for delivery by polling the logbook
126178 $ resolved = false ;
127179 $ interval = 3 ;
128180 $ allowedErrorWindow = 3 ;
181+ $ logbookStartTime = $ timeElapsed ;
129182
130183 while ($ timeElapsed < $ timeout ) {
131- $ now = microtime (true );
184+ $ now = $ this -> sshPoller -> microtime ();
132185 $ timeElapsed += $ now - $ latest ;
133186 $ latest = $ now ;
134187
135188 try {
136189 $ flows = $ this ->hypernodeClient ->logbook ->getList ($ brancherHypernode );
137- $ relevantFlows = array_filter ($ flows , fn (Flow $ flow ) => in_array ($ flow ->name , ["ensure_app " , "ensure_copied_app " ], true ));
190+ $ relevantFlows = array_filter (
191+ $ flows ,
192+ fn (Flow $ flow ) => in_array ($ flow ->name , self ::RELEVANT_FLOW_NAMES , true )
193+ );
138194 $ failedFlows = array_filter ($ relevantFlows , fn (Flow $ flow ) => $ flow ->isReverted ());
139195 $ completedFlows = array_filter ($ relevantFlows , fn (Flow $ flow ) => $ flow ->isComplete ());
140196
141- if (count ($ failedFlows ) === count ($ relevantFlows )) {
197+ if (count ($ relevantFlows ) > 0 && count ( $ failedFlows ) === count ($ relevantFlows )) {
142198 throw new CreateBrancherHypernodeFailedException ();
143199 }
144200
@@ -151,21 +207,26 @@ public function waitForAvailability(
151207 // Otherwise, there's an error, and it should be propagated.
152208 if ($ e ->getCode () !== 404 ) {
153209 throw $ e ;
154- } elseif ($ timeElapsed < $ allowedErrorWindow ) {
210+ } elseif (( $ timeElapsed - $ logbookStartTime ) < $ allowedErrorWindow ) {
155211 // Sometimes we get an error where the logbook is not yet available, but it will be soon.
156- // We allow a small window for this to happen, and then we throw an exception .
212+ // We allow a small window for this to happen, and then we continue polling .
157213 $ this ->log ->info (
158214 sprintf (
159215 'Got an expected exception during the allowed error window of HTTP code %d, waiting for %s to become available. ' ,
160216 $ e ->getCode (),
161217 $ brancherHypernode
162218 )
163219 );
164- continue ;
165220 }
166221 }
167222
168- sleep ($ interval );
223+ $ this ->sshPoller ->sleep ($ interval );
224+ }
225+
226+ if (!$ resolved ) {
227+ throw new TimeoutException (
228+ sprintf ('Timed out waiting for brancher Hypernode %s to be delivered ' , $ brancherHypernode )
229+ );
169230 }
170231
171232 $ this ->log ->info (
@@ -175,63 +236,94 @@ public function waitForAvailability(
175236 )
176237 );
177238
178- if (!$ resolved ) {
239+ // Phase 3: Final SSH reachability check
240+ $ isReachable = $ this ->pollSshConnectivity (
241+ $ brancherHypernode ,
242+ $ reachabilityCheckCount ,
243+ 0 , // No max failures, rely on timeout
244+ $ reachabilityCheckInterval ,
245+ $ timeElapsed ,
246+ $ latest ,
247+ $ timeout
248+ );
249+ if (!$ isReachable ) {
179250 throw new TimeoutException (
180- sprintf ('Timed out waiting for brancher Hypernode %s to be delivered ' , $ brancherHypernode )
251+ sprintf ('Timed out waiting for brancher Hypernode %s to become reachable ' , $ brancherHypernode )
181252 );
182253 }
183254
255+ $ this ->log ->info (
256+ sprintf ('Brancher Hypernode %s became reachable! ' , $ brancherHypernode )
257+ );
258+ }
259+
260+ /**
261+ * Poll SSH connectivity until we get enough consecutive successes or hit a limit.
262+ *
263+ * @param string $brancherHypernode Hostname to check
264+ * @param int $requiredConsecutiveSuccesses Number of consecutive successes required
265+ * @param int $maxFailedAttempts Maximum failed attempts before giving up (0 = no limit, use timeout only)
266+ * @param int $checkInterval Seconds between checks
267+ * @param float $timeElapsed Reference to track elapsed time
268+ * @param float $latest Reference to track latest timestamp
269+ * @param int $timeout Maximum time allowed
270+ * @return bool True if SSH check succeeded, false if we should fall back to other methods
271+ */
272+ private function pollSshConnectivity (
273+ string $ brancherHypernode ,
274+ int $ requiredConsecutiveSuccesses ,
275+ int $ maxFailedAttempts ,
276+ int $ checkInterval ,
277+ float &$ timeElapsed ,
278+ float &$ latest ,
279+ int $ timeout
280+ ): bool {
184281 $ consecutiveSuccesses = 0 ;
282+ $ failedAttempts = 0 ;
283+
185284 while ($ timeElapsed < $ timeout ) {
186- $ now = microtime (true );
285+ $ now = $ this -> sshPoller -> microtime ();
187286 $ timeElapsed += $ now - $ latest ;
188287 $ latest = $ now ;
189288
190- $ connection = @fsockopen (sprintf ("%s.hypernode.io " , $ brancherHypernode ), 22 );
191- if ($ connection ) {
192- fclose ($ connection );
289+ // Check if we've hit the max failed attempts limit (0 = unlimited)
290+ if ($ maxFailedAttempts > 0 && $ failedAttempts >= $ maxFailedAttempts ) {
291+ return false ;
292+ }
293+
294+ if ($ this ->sshPoller ->poll ($ brancherHypernode )) {
193295 $ consecutiveSuccesses ++;
194296 $ this ->log ->info (
195297 sprintf (
196298 'Brancher Hypernode %s reachability check %d/%d succeeded. ' ,
197299 $ brancherHypernode ,
198300 $ consecutiveSuccesses ,
199- $ reachabilityCheckCount
301+ $ requiredConsecutiveSuccesses
200302 )
201303 );
202304
203- if ($ consecutiveSuccesses >= $ reachabilityCheckCount ) {
204- break ;
305+ if ($ consecutiveSuccesses >= $ requiredConsecutiveSuccesses ) {
306+ return true ;
205307 }
206- sleep ($ reachabilityCheckInterval );
207308 } else {
208309 if ($ consecutiveSuccesses > 0 ) {
209310 $ this ->log ->info (
210311 sprintf (
211312 'Brancher Hypernode %s reachability check failed, resetting counter (was at %d/%d). ' ,
212313 $ brancherHypernode ,
213314 $ consecutiveSuccesses ,
214- $ reachabilityCheckCount
315+ $ requiredConsecutiveSuccesses
215316 )
216317 );
217318 }
218319 $ consecutiveSuccesses = 0 ;
219- sleep ( $ reachabilityCheckInterval ) ;
320+ $ failedAttempts ++ ;
220321 }
221- }
222322
223- if ($ consecutiveSuccesses < $ reachabilityCheckCount ) {
224- throw new TimeoutException (
225- sprintf ('Timed out waiting for brancher Hypernode %s to become reachable ' , $ brancherHypernode )
226- );
323+ $ this ->sshPoller ->sleep ($ checkInterval );
227324 }
228325
229- $ this ->log ->info (
230- sprintf (
231- 'Brancher Hypernode %s became reachable! ' ,
232- $ brancherHypernode
233- )
234- );
326+ return false ;
235327 }
236328
237329 /**
0 commit comments