11<?php
22
3+ declare (strict_types=1 );
4+
35namespace Hypernode \Deploy \Brancher ;
46
57use Hypernode \Api \Exception \HypernodeApiClientException ;
1416
1517class BrancherHypernodeManager
1618{
19+ /**
20+ * Relevant flow names to poll for delivery
21+ *
22+ * @var string[]
23+ */
24+ public const RELEVANT_FLOW_NAMES = ['ensure_app ' , 'ensure_copied_app ' ];
25+
1726 private LoggerInterface $ log ;
1827 private HypernodeClient $ hypernodeClient ;
28+ private SshPoller $ sshPoller ;
1929
20- public function __construct (LoggerInterface $ log )
21- {
30+ public function __construct (
31+ LoggerInterface $ log ,
32+ ?HypernodeClient $ hypernodeClient = null ,
33+ ?SshPoller $ sshPoller = null
34+ ) {
2235 $ this ->log = $ log ;
23- $ this ->hypernodeClient = HypernodeClientFactory::create (getenv ('HYPERNODE_API_TOKEN ' ) ?: '' );
36+ $ this ->hypernodeClient = $ hypernodeClient
37+ ?? HypernodeClientFactory::create (getenv ('HYPERNODE_API_TOKEN ' ) ?: '' );
38+ $ this ->sshPoller = $ sshPoller ?? new SshPoller ();
2439 }
2540
2641 /**
@@ -105,6 +120,11 @@ public function createForHypernode(string $hypernode, array $data = []): string
105120 /**
106121 * Wait for brancher Hypernode to become available.
107122 *
123+ * This method first attempts a quick SSH connectivity check. If the brancher is already
124+ * reachable (e.g., when reusing an existing brancher), it returns early. Otherwise, it
125+ * falls back to polling the API logbook for delivery status, then performs a final SSH
126+ * reachability check.
127+ *
108128 * @param string $brancherHypernode Name of the brancher Hypernode
109129 * @param int $timeout Maximum time to wait for availability
110130 * @param int $reachabilityCheckCount Number of consecutive successful checks required
@@ -121,24 +141,58 @@ public function waitForAvailability(
121141 int $ reachabilityCheckCount = 6 ,
122142 int $ reachabilityCheckInterval = 10
123143 ): void {
124- $ latest = microtime (true );
125- $ timeElapsed = 0 ;
144+ $ latest = $ this ->sshPoller ->microtime ();
145+ $ timeElapsed = 0.0 ;
146+
147+ // Phase 1: SSH-first check, early return for reused delivered branchers
148+ $ this ->log ->info (
149+ sprintf ('Attempting SSH connectivity check for brancher Hypernode %s... ' , $ brancherHypernode )
150+ );
151+
152+ $ isReachable = $ this ->pollSshConnectivity (
153+ $ brancherHypernode ,
154+ 3 ,
155+ 5 ,
156+ $ reachabilityCheckInterval ,
157+ $ timeElapsed ,
158+ $ latest ,
159+ $ timeout
160+ );
161+ if ($ isReachable ) {
162+ $ this ->log ->info (
163+ sprintf ('Brancher Hypernode %s is reachable! ' , $ brancherHypernode )
164+ );
165+ return ;
166+ }
167+
168+ $ this ->log ->info (
169+ sprintf (
170+ 'SSH check inconclusive for brancher Hypernode %s, falling back to delivery check... ' ,
171+ $ brancherHypernode
172+ )
173+ );
174+
175+ // Phase 2: Wait for delivery by polling the logbook
126176 $ resolved = false ;
127177 $ interval = 3 ;
128178 $ allowedErrorWindow = 3 ;
179+ $ logbookStartTime = $ timeElapsed ;
129180
130181 while ($ timeElapsed < $ timeout ) {
131- $ now = microtime (true );
182+ $ now = $ this -> sshPoller -> microtime ();
132183 $ timeElapsed += $ now - $ latest ;
133184 $ latest = $ now ;
134185
135186 try {
136187 $ flows = $ this ->hypernodeClient ->logbook ->getList ($ brancherHypernode );
137- $ relevantFlows = array_filter ($ flows , fn (Flow $ flow ) => in_array ($ flow ->name , ["ensure_app " , "ensure_copied_app " ], true ));
188+ $ relevantFlows = array_filter (
189+ $ flows ,
190+ fn (Flow $ flow ) => in_array ($ flow ->name , self ::RELEVANT_FLOW_NAMES , true )
191+ );
138192 $ failedFlows = array_filter ($ relevantFlows , fn (Flow $ flow ) => $ flow ->isReverted ());
139193 $ completedFlows = array_filter ($ relevantFlows , fn (Flow $ flow ) => $ flow ->isComplete ());
140194
141- if (count ($ failedFlows ) === count ($ relevantFlows )) {
195+ if (count ($ relevantFlows ) > 0 && count ( $ failedFlows ) === count ($ relevantFlows )) {
142196 throw new CreateBrancherHypernodeFailedException ();
143197 }
144198
@@ -151,21 +205,26 @@ public function waitForAvailability(
151205 // Otherwise, there's an error, and it should be propagated.
152206 if ($ e ->getCode () !== 404 ) {
153207 throw $ e ;
154- } elseif ($ timeElapsed < $ allowedErrorWindow ) {
208+ } elseif (( $ timeElapsed - $ logbookStartTime ) < $ allowedErrorWindow ) {
155209 // Sometimes we get an error where the logbook is not yet available, but it will be soon.
156- // We allow a small window for this to happen, and then we throw an exception .
210+ // We allow a small window for this to happen, and then we continue polling .
157211 $ this ->log ->info (
158212 sprintf (
159213 'Got an expected exception during the allowed error window of HTTP code %d, waiting for %s to become available. ' ,
160214 $ e ->getCode (),
161215 $ brancherHypernode
162216 )
163217 );
164- continue ;
165218 }
166219 }
167220
168- sleep ($ interval );
221+ $ this ->sshPoller ->sleep ($ interval );
222+ }
223+
224+ if (!$ resolved ) {
225+ throw new TimeoutException (
226+ sprintf ('Timed out waiting for brancher Hypernode %s to be delivered ' , $ brancherHypernode )
227+ );
169228 }
170229
171230 $ this ->log ->info (
@@ -175,63 +234,94 @@ public function waitForAvailability(
175234 )
176235 );
177236
178- if (!$ resolved ) {
237+ // Phase 3: Final SSH reachability check
238+ $ isReachable = $ this ->pollSshConnectivity (
239+ $ brancherHypernode ,
240+ $ reachabilityCheckCount ,
241+ 0 , // No max failures, rely on timeout
242+ $ reachabilityCheckInterval ,
243+ $ timeElapsed ,
244+ $ latest ,
245+ $ timeout
246+ );
247+ if (!$ isReachable ) {
179248 throw new TimeoutException (
180- sprintf ('Timed out waiting for brancher Hypernode %s to be delivered ' , $ brancherHypernode )
249+ sprintf ('Timed out waiting for brancher Hypernode %s to become reachable ' , $ brancherHypernode )
181250 );
182251 }
183252
253+ $ this ->log ->info (
254+ sprintf ('Brancher Hypernode %s became reachable! ' , $ brancherHypernode )
255+ );
256+ }
257+
258+ /**
259+ * Poll SSH connectivity until we get enough consecutive successes or hit a limit.
260+ *
261+ * @param string $brancherHypernode Hostname to check
262+ * @param int $requiredConsecutiveSuccesses Number of consecutive successes required
263+ * @param int $maxFailedAttempts Maximum failed attempts before giving up (0 = no limit, use timeout only)
264+ * @param int $checkInterval Seconds between checks
265+ * @param float $timeElapsed Reference to track elapsed time
266+ * @param float $latest Reference to track latest timestamp
267+ * @param int $timeout Maximum time allowed
268+ * @return bool True if SSH check succeeded, false if we should fall back to other methods
269+ */
270+ private function pollSshConnectivity (
271+ string $ brancherHypernode ,
272+ int $ requiredConsecutiveSuccesses ,
273+ int $ maxFailedAttempts ,
274+ int $ checkInterval ,
275+ float &$ timeElapsed ,
276+ float &$ latest ,
277+ int $ timeout
278+ ): bool {
184279 $ consecutiveSuccesses = 0 ;
280+ $ failedAttempts = 0 ;
281+
185282 while ($ timeElapsed < $ timeout ) {
186- $ now = microtime (true );
283+ $ now = $ this -> sshPoller -> microtime ();
187284 $ timeElapsed += $ now - $ latest ;
188285 $ latest = $ now ;
189286
190- $ connection = @fsockopen (sprintf ("%s.hypernode.io " , $ brancherHypernode ), 22 );
191- if ($ connection ) {
192- fclose ($ connection );
287+ // Check if we've hit the max failed attempts limit (0 = unlimited)
288+ if ($ maxFailedAttempts > 0 && $ failedAttempts >= $ maxFailedAttempts ) {
289+ return false ;
290+ }
291+
292+ if ($ this ->sshPoller ->poll ($ brancherHypernode )) {
193293 $ consecutiveSuccesses ++;
194294 $ this ->log ->info (
195295 sprintf (
196296 'Brancher Hypernode %s reachability check %d/%d succeeded. ' ,
197297 $ brancherHypernode ,
198298 $ consecutiveSuccesses ,
199- $ reachabilityCheckCount
299+ $ requiredConsecutiveSuccesses
200300 )
201301 );
202302
203- if ($ consecutiveSuccesses >= $ reachabilityCheckCount ) {
204- break ;
303+ if ($ consecutiveSuccesses >= $ requiredConsecutiveSuccesses ) {
304+ return true ;
205305 }
206- sleep ($ reachabilityCheckInterval );
207306 } else {
208307 if ($ consecutiveSuccesses > 0 ) {
209308 $ this ->log ->info (
210309 sprintf (
211310 'Brancher Hypernode %s reachability check failed, resetting counter (was at %d/%d). ' ,
212311 $ brancherHypernode ,
213312 $ consecutiveSuccesses ,
214- $ reachabilityCheckCount
313+ $ requiredConsecutiveSuccesses
215314 )
216315 );
217316 }
218317 $ consecutiveSuccesses = 0 ;
219- sleep ( $ reachabilityCheckInterval ) ;
318+ $ failedAttempts ++ ;
220319 }
221- }
222320
223- if ($ consecutiveSuccesses < $ reachabilityCheckCount ) {
224- throw new TimeoutException (
225- sprintf ('Timed out waiting for brancher Hypernode %s to become reachable ' , $ brancherHypernode )
226- );
321+ $ this ->sshPoller ->sleep ($ checkInterval );
227322 }
228323
229- $ this ->log ->info (
230- sprintf (
231- 'Brancher Hypernode %s became reachable! ' ,
232- $ brancherHypernode
233- )
234- );
324+ return false ;
235325 }
236326
237327 /**
0 commit comments