@@ -215,46 +215,58 @@ function createState(string $dsn): State
215215
216216 $ healthy = true ;
217217 foreach ($ health ->run ()->getNodes () as $ node ) {
218- $ hostname = $ node ->getHostname ();
219- $ executor = $ executors [$ hostname ] ?? [];
220- $ newStatus = $ node ->isOnline () ? 'online ' : 'offline ' ;
221-
222- if ($ firstCheck || Http::isDevelopment () || $ executor ['status ' ] !== $ newStatus ) {
223- if ($ newStatus === 'online ' ) {
224- Console::info ('Executor " ' . $ hostname . '" went online ' );
225- } else {
226- $ message = $ node ->getState ()['message ' ] ?? 'Unexpected error. ' ;
227- $ error = new Exception ('Executor " ' . $ hostname . '" went offline: ' . $ message , 500 );
228- logError ($ error , "healthCheckError " , $ logger , null );
218+ try {
219+ $ hostname = $ node ->getHostname ();
220+ $ executor = $ executors [$ hostname ] ?? [];
221+ $ newStatus = $ node ->isOnline () ? 'online ' : 'offline ' ;
222+
223+ if ($ firstCheck || Http::isDevelopment () || $ executor ['status ' ] !== $ newStatus ) {
224+ if ($ newStatus === 'online ' ) {
225+ Console::info ('Executor " ' . $ hostname . '" went online ' );
226+ } else {
227+ $ message = $ node ->getState ()['message ' ] ?? 'Unexpected error. ' ;
228+ $ error = new Exception ('Executor " ' . $ hostname . '" went offline: ' . $ message , 500 );
229+ logError ($ error , "healthCheckError " , $ logger , null );
230+ }
229231 }
230- }
231232
232- if (!$ node ->isOnline ()) {
233- $ healthy = false ;
234- }
233+ if (!$ node ->isOnline ()) {
234+ $ healthy = false ;
235+ }
235236
236- $ state ->save (
237- resource: RESOURCE_EXECUTORS ,
238- name: $ hostname ,
239- status: $ node ->isOnline () ? 'online ' : 'offline ' ,
240- usage: $ node ->getState ()['usage ' ] ?? 0
241- );
237+ $ state ->save (
238+ resource: RESOURCE_EXECUTORS ,
239+ name: $ hostname ,
240+ status: $ node ->isOnline () ? 'online ' : 'offline ' ,
241+ usage: $ node ->getState ()['usage ' ] ?? 0
242+ );
242243
243- $ runtimes = [];
244+ $ runtimes = [];
244245
245- Console::log ('Executor " ' . $ hostname . '" healthcheck returned ' . \count ($ node ->getState ()['runtimes ' ] ?? []) . ' runtimes ' );
246- foreach ($ node ->getState ()['runtimes ' ] ?? [] as $ runtimeId => $ runtime ) {
247- if (!\is_string ($ runtimeId ) || !\is_array ($ runtime )) {
248- Console::warning ('Invalid runtime data for ' . $ hostname . ' runtime ' . $ runtimeId );
249- continue ;
250- }
246+ Console::log ('Executor " ' . $ hostname . '" healthcheck returned ' . \count ($ node ->getState ()['runtimes ' ] ?? []) . ' runtimes ' );
247+ foreach ($ node ->getState ()['runtimes ' ] ?? [] as $ runtimeId => $ runtime ) {
248+ if (!\is_string ($ runtimeId ) || !\is_array ($ runtime )) {
249+ Console::warning ('Invalid runtime data for ' . $ hostname . ' runtime ' . $ runtimeId );
250+ continue ;
251+ }
251252
252- $ runtimes [$ runtimeId ] = [
253- 'status ' => $ runtime ['status ' ] ?? 'offline ' ,
254- 'usage ' => $ runtime ['usage ' ] ?? 100 ,
255- ];
253+ $ runtimes [$ runtimeId ] = [
254+ 'status ' => $ runtime ['status ' ] ?? 'offline ' ,
255+ 'usage ' => $ runtime ['usage ' ] ?? 100 ,
256+ ];
257+ }
258+ $ state ->saveAll (RESOURCE_RUNTIMES . $ hostname , $ runtimes );
259+ } catch (\Throwable $ th ) {
260+ try {
261+ $ healthy = false ;
262+ Console::warning ('Health check failed for ' . $ node ->getHostname () . ': ' . $ th ->getMessage () . ' - removing from state ' );
263+
264+ $ state ->save (RESOURCE_EXECUTORS , $ node ->getHostname (), 'offline ' , 100 );
265+ $ state ->saveAll (RESOURCE_RUNTIMES . $ node ->getHostname (), []);
266+ } catch (\Throwable $ th ) {
267+ Console::warning ('Failed to remove executor from state: ' . $ th ->getMessage ());
268+ }
256269 }
257- $ state ->saveAll (RESOURCE_RUNTIMES . $ hostname , $ runtimes );
258270 }
259271
260272 if (Http::getEnv ('OPR_PROXY_HEALTHCHECK_URL ' , '' ) !== '' && $ healthy ) {
0 commit comments