Skip to content

Commit 5b638bc

Browse files
committed
Brancher: ping host before logbook poll
This makes it possible to reuse a brancher node that does not have any items in the logbook anymore, which typically happens on older Brancher nodes. Fixes #185
1 parent 8957106 commit 5b638bc

File tree

6 files changed

+505
-36
lines changed

6 files changed

+505
-36
lines changed

grumphp.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ grumphp:
55
warning_severity: 0
66
whitelist_patterns:
77
- /^src\/(.*)/
8+
- /^tests\/(.*)/
89
triggered_by: [php]
910
psalm:
1011
config: psalm.xml

psalm.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?xml version="1.0"?>
22
<psalm
3-
phpVersion="8.4"
3+
phpVersion="8.1"
44
errorLevel="4"
55
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
66
xmlns="https://getpsalm.org/schema/config"

src/Brancher/BrancherHypernodeManager.php

Lines changed: 125 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
<?php
22

3+
declare(strict_types=1);
4+
35
namespace Hypernode\Deploy\Brancher;
46

57
use Hypernode\Api\Exception\HypernodeApiClientException;
@@ -14,13 +16,26 @@
1416

1517
class BrancherHypernodeManager
1618
{
19+
/**
20+
* Relevant flow names to poll for delivery
21+
*
22+
* @var string[]
23+
*/
24+
public const RELEVANT_FLOW_NAMES = ['ensure_app', 'ensure_copied_app'];
25+
1726
private LoggerInterface $log;
1827
private HypernodeClient $hypernodeClient;
28+
private SshPoller $sshPoller;
1929

20-
public function __construct(LoggerInterface $log)
21-
{
30+
public function __construct(
31+
LoggerInterface $log,
32+
?HypernodeClient $hypernodeClient = null,
33+
?SshPoller $sshPoller = null
34+
) {
2235
$this->log = $log;
23-
$this->hypernodeClient = HypernodeClientFactory::create(getenv('HYPERNODE_API_TOKEN') ?: '');
36+
$this->hypernodeClient = $hypernodeClient
37+
?? HypernodeClientFactory::create(getenv('HYPERNODE_API_TOKEN') ?: '');
38+
$this->sshPoller = $sshPoller ?? new SshPoller();
2439
}
2540

2641
/**
@@ -105,6 +120,11 @@ public function createForHypernode(string $hypernode, array $data = []): string
105120
/**
106121
* Wait for brancher Hypernode to become available.
107122
*
123+
* This method first attempts a quick SSH connectivity check. If the brancher is already
124+
* reachable (e.g., when reusing an existing brancher), it returns early. Otherwise, it
125+
* falls back to polling the API logbook for delivery status, then performs a final SSH
126+
* reachability check.
127+
*
108128
* @param string $brancherHypernode Name of the brancher Hypernode
109129
* @param int $timeout Maximum time to wait for availability
110130
* @param int $reachabilityCheckCount Number of consecutive successful checks required
@@ -121,24 +141,58 @@ public function waitForAvailability(
121141
int $reachabilityCheckCount = 6,
122142
int $reachabilityCheckInterval = 10
123143
): void {
124-
$latest = microtime(true);
125-
$timeElapsed = 0;
144+
$latest = $this->sshPoller->microtime();
145+
$timeElapsed = 0.0;
146+
147+
// Phase 1: SSH-first check, early return for reused delivered branchers
148+
$this->log->info(
149+
sprintf('Attempting SSH connectivity check for brancher Hypernode %s...', $brancherHypernode)
150+
);
151+
152+
$isReachable = $this->pollSshConnectivity(
153+
$brancherHypernode,
154+
3,
155+
5,
156+
$reachabilityCheckInterval,
157+
$timeElapsed,
158+
$latest,
159+
$timeout
160+
);
161+
if ($isReachable) {
162+
$this->log->info(
163+
sprintf('Brancher Hypernode %s is reachable!', $brancherHypernode)
164+
);
165+
return;
166+
}
167+
168+
$this->log->info(
169+
sprintf(
170+
'SSH check inconclusive for brancher Hypernode %s, falling back to delivery check...',
171+
$brancherHypernode
172+
)
173+
);
174+
175+
// Phase 2: Wait for delivery by polling the logbook
126176
$resolved = false;
127177
$interval = 3;
128178
$allowedErrorWindow = 3;
179+
$logbookStartTime = $timeElapsed;
129180

130181
while ($timeElapsed < $timeout) {
131-
$now = microtime(true);
182+
$now = $this->sshPoller->microtime();
132183
$timeElapsed += $now - $latest;
133184
$latest = $now;
134185

135186
try {
136187
$flows = $this->hypernodeClient->logbook->getList($brancherHypernode);
137-
$relevantFlows = array_filter($flows, fn(Flow $flow) => in_array($flow->name, ["ensure_app", "ensure_copied_app"], true));
188+
$relevantFlows = array_filter(
189+
$flows,
190+
fn(Flow $flow) => in_array($flow->name, self::RELEVANT_FLOW_NAMES, true)
191+
);
138192
$failedFlows = array_filter($relevantFlows, fn(Flow $flow) => $flow->isReverted());
139193
$completedFlows = array_filter($relevantFlows, fn(Flow $flow) => $flow->isComplete());
140194

141-
if (count($failedFlows) === count($relevantFlows)) {
195+
if (count($relevantFlows) > 0 && count($failedFlows) === count($relevantFlows)) {
142196
throw new CreateBrancherHypernodeFailedException();
143197
}
144198

@@ -151,21 +205,26 @@ public function waitForAvailability(
151205
// Otherwise, there's an error, and it should be propagated.
152206
if ($e->getCode() !== 404) {
153207
throw $e;
154-
} elseif ($timeElapsed < $allowedErrorWindow) {
208+
} elseif (($timeElapsed - $logbookStartTime) < $allowedErrorWindow) {
155209
// Sometimes we get an error where the logbook is not yet available, but it will be soon.
156-
// We allow a small window for this to happen, and then we throw an exception.
210+
// We allow a small window for this to happen, and then we continue polling.
157211
$this->log->info(
158212
sprintf(
159213
'Got an expected exception during the allowed error window of HTTP code %d, waiting for %s to become available.',
160214
$e->getCode(),
161215
$brancherHypernode
162216
)
163217
);
164-
continue;
165218
}
166219
}
167220

168-
sleep($interval);
221+
$this->sshPoller->sleep($interval);
222+
}
223+
224+
if (!$resolved) {
225+
throw new TimeoutException(
226+
sprintf('Timed out waiting for brancher Hypernode %s to be delivered', $brancherHypernode)
227+
);
169228
}
170229

171230
$this->log->info(
@@ -175,63 +234,94 @@ public function waitForAvailability(
175234
)
176235
);
177236

178-
if (!$resolved) {
237+
// Phase 3: Final SSH reachability check
238+
$isReachable = $this->pollSshConnectivity(
239+
$brancherHypernode,
240+
$reachabilityCheckCount,
241+
0, // No max failures, rely on timeout
242+
$reachabilityCheckInterval,
243+
$timeElapsed,
244+
$latest,
245+
$timeout
246+
);
247+
if (!$isReachable) {
179248
throw new TimeoutException(
180-
sprintf('Timed out waiting for brancher Hypernode %s to be delivered', $brancherHypernode)
249+
sprintf('Timed out waiting for brancher Hypernode %s to become reachable', $brancherHypernode)
181250
);
182251
}
183252

253+
$this->log->info(
254+
sprintf('Brancher Hypernode %s became reachable!', $brancherHypernode)
255+
);
256+
}
257+
258+
/**
259+
* Poll SSH connectivity until we get enough consecutive successes or hit a limit.
260+
*
261+
* @param string $brancherHypernode Hostname to check
262+
* @param int $requiredConsecutiveSuccesses Number of consecutive successes required
263+
* @param int $maxFailedAttempts Maximum failed attempts before giving up (0 = no limit, use timeout only)
264+
* @param int $checkInterval Seconds between checks
265+
* @param float $timeElapsed Reference to track elapsed time
266+
* @param float $latest Reference to track latest timestamp
267+
* @param int $timeout Maximum time allowed
268+
* @return bool True if SSH check succeeded, false if we should fall back to other methods
269+
*/
270+
private function pollSshConnectivity(
271+
string $brancherHypernode,
272+
int $requiredConsecutiveSuccesses,
273+
int $maxFailedAttempts,
274+
int $checkInterval,
275+
float &$timeElapsed,
276+
float &$latest,
277+
int $timeout
278+
): bool {
184279
$consecutiveSuccesses = 0;
280+
$failedAttempts = 0;
281+
185282
while ($timeElapsed < $timeout) {
186-
$now = microtime(true);
283+
$now = $this->sshPoller->microtime();
187284
$timeElapsed += $now - $latest;
188285
$latest = $now;
189286

190-
$connection = @fsockopen(sprintf("%s.hypernode.io", $brancherHypernode), 22);
191-
if ($connection) {
192-
fclose($connection);
287+
// Check if we've hit the max failed attempts limit (0 = unlimited)
288+
if ($maxFailedAttempts > 0 && $failedAttempts >= $maxFailedAttempts) {
289+
return false;
290+
}
291+
292+
if ($this->sshPoller->poll($brancherHypernode)) {
193293
$consecutiveSuccesses++;
194294
$this->log->info(
195295
sprintf(
196296
'Brancher Hypernode %s reachability check %d/%d succeeded.',
197297
$brancherHypernode,
198298
$consecutiveSuccesses,
199-
$reachabilityCheckCount
299+
$requiredConsecutiveSuccesses
200300
)
201301
);
202302

203-
if ($consecutiveSuccesses >= $reachabilityCheckCount) {
204-
break;
303+
if ($consecutiveSuccesses >= $requiredConsecutiveSuccesses) {
304+
return true;
205305
}
206-
sleep($reachabilityCheckInterval);
207306
} else {
208307
if ($consecutiveSuccesses > 0) {
209308
$this->log->info(
210309
sprintf(
211310
'Brancher Hypernode %s reachability check failed, resetting counter (was at %d/%d).',
212311
$brancherHypernode,
213312
$consecutiveSuccesses,
214-
$reachabilityCheckCount
313+
$requiredConsecutiveSuccesses
215314
)
216315
);
217316
}
218317
$consecutiveSuccesses = 0;
219-
sleep($reachabilityCheckInterval);
318+
$failedAttempts++;
220319
}
221-
}
222320

223-
if ($consecutiveSuccesses < $reachabilityCheckCount) {
224-
throw new TimeoutException(
225-
sprintf('Timed out waiting for brancher Hypernode %s to become reachable', $brancherHypernode)
226-
);
321+
$this->sshPoller->sleep($checkInterval);
227322
}
228323

229-
$this->log->info(
230-
sprintf(
231-
'Brancher Hypernode %s became reachable!',
232-
$brancherHypernode
233-
)
234-
);
324+
return false;
235325
}
236326

237327
/**

src/Brancher/SshPoller.php

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Hypernode\Deploy\Brancher;
6+
7+
class SshPoller
8+
{
9+
/**
10+
* Check if SSH port is reachable on the given hostname.
11+
*
12+
* @param string $hostname The hostname to check (without .hypernode.io suffix)
13+
* @return bool True if SSH port 22 is reachable
14+
*/
15+
public function poll(string $hostname): bool
16+
{
17+
$connection = @fsockopen(sprintf('%s.hypernode.io', $hostname), 22);
18+
if ($connection) {
19+
fclose($connection);
20+
return true;
21+
}
22+
return false;
23+
}
24+
25+
/**
26+
* Sleep for the given number of seconds.
27+
*
28+
* @param int $seconds Number of seconds to sleep
29+
*/
30+
public function sleep(int $seconds): void
31+
{
32+
sleep($seconds);
33+
}
34+
35+
/**
36+
* Get the current time in microseconds.
37+
*
38+
* @return float Current time as a float
39+
*/
40+
public function microtime(): float
41+
{
42+
return microtime(true);
43+
}
44+
}

0 commit comments

Comments
 (0)