Skip to content

Commit 10bb656

Browse files
author
Janosch Machowinski
committed
fix: Fixed spurious wake-ups on ROS_TIME timers with ROS_TIME enabled
Added special handling for timers with a clock that has time override enabled. For these timer we should not compute a timeout, as the waitset is waken up by the associated guard condition. Before this change, the waitset could wait up, because of an expected ready timer, that was acutally not ready, as the time update to the ROS_TIME had not yet arrived. Signed-off-by: Janosch Machowinski <j.machowinski@nospam.org>
1 parent de9c525 commit 10bb656

File tree

2 files changed

+243
-28
lines changed

2 files changed

+243
-28
lines changed

rcl/src/rcl/wait.c

Lines changed: 102 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -541,54 +541,123 @@ rcl_wait(rcl_wait_set_t * wait_set, int64_t timeout)
541541
// By default, set the timer to block indefinitely if none of the below conditions are met.
542542
rmw_time_t * timeout_argument = NULL;
543543
rmw_time_t temporary_timeout_storage;
544+
bool is_non_blocking = timeout == 0;
544545

545-
bool is_timer_timeout = false;
546-
int64_t min_timeout = timeout > 0 ? timeout : INT64_MAX;
547-
{ // scope to prevent i from colliding below
548-
uint64_t i = 0;
549-
for (i = 0; i < wait_set->impl->timer_index; ++i) {
550-
if (!wait_set->timers[i]) {
546+
for (uint64_t t_idx = 0; t_idx < wait_set->impl->timer_index; ++t_idx) {
547+
if (!wait_set->timers[t_idx]) {
548+
continue; // Skip NULL timers.
549+
}
550+
rmw_guard_conditions_t * rmw_gcs = &(wait_set->impl->rmw_guard_conditions);
551+
size_t gc_idx = wait_set->size_of_guard_conditions + t_idx;
552+
if (NULL != rmw_gcs->guard_conditions[gc_idx]) {
553+
// This timer has a guard condition, so move it to make a legal wait set.
554+
rmw_gcs->guard_conditions[rmw_gcs->guard_condition_count] =
555+
rmw_gcs->guard_conditions[gc_idx];
556+
++(rmw_gcs->guard_condition_count);
557+
}
558+
}
559+
560+
int64_t min_next_call_time[RCL_STEADY_TIME + 1];
561+
rcl_clock_t * clocks[RCL_STEADY_TIME + 1] = {0, 0, 0, 0};
562+
563+
min_next_call_time[RCL_ROS_TIME] = INT64_MAX;
564+
min_next_call_time[RCL_SYSTEM_TIME] = INT64_MAX;
565+
min_next_call_time[RCL_STEADY_TIME] = INT64_MAX;
566+
567+
if (!is_non_blocking) {
568+
for (size_t t_idx = 0; t_idx < wait_set->impl->timer_index; ++t_idx) {
569+
if (!wait_set->timers[t_idx]) {
551570
continue; // Skip NULL timers.
552571
}
553-
rmw_guard_conditions_t * rmw_gcs = &(wait_set->impl->rmw_guard_conditions);
554-
size_t gc_idx = wait_set->size_of_guard_conditions + i;
555-
if (NULL != rmw_gcs->guard_conditions[gc_idx]) {
556-
// This timer has a guard condition, so move it to make a legal wait set.
557-
rmw_gcs->guard_conditions[rmw_gcs->guard_condition_count] =
558-
rmw_gcs->guard_conditions[gc_idx];
559-
++(rmw_gcs->guard_condition_count);
572+
573+
rcl_clock_t * clock;
574+
if (rcl_timer_clock(wait_set->timers[t_idx], &clock) != RCL_RET_OK) {
575+
// should never happen
576+
return RCL_RET_ERROR;
560577
}
561-
// use timer time to to set the rmw_wait timeout
562-
// TODO(sloretz) fix spurious wake-ups on ROS_TIME timers with ROS_TIME enabled
563-
int64_t timer_timeout = INT64_MAX;
564-
rcl_ret_t ret = rcl_timer_get_time_until_next_call(wait_set->timers[i], &timer_timeout);
578+
579+
if (clock->type == RCL_ROS_TIME) {
580+
bool timer_override_active = false;
581+
if (rcl_is_enabled_ros_time_override(clock, &timer_override_active) != RCL_RET_OK) {
582+
// should never happen
583+
return RCL_RET_ERROR;
584+
}
585+
586+
if (timer_override_active) {
587+
//we need to check, it the timer is already ready
588+
bool override_timer_is_ready = false;
589+
if (rcl_timer_is_ready(wait_set->timers[t_idx], &override_timer_is_ready) != RCL_RET_OK) {
590+
// should never happen
591+
return RCL_RET_ERROR;
592+
}
593+
594+
if (override_timer_is_ready) {
595+
// no need to search further for the timeout, we need to wake up instantly
596+
is_non_blocking = true;
597+
break;
598+
}
599+
600+
// if the timer override is active, there is no point in computing a wait time,
601+
// as it might be on a total wrong time basis. In case this timer becomes ready,
602+
// the guard_condition above will wake us.
603+
continue;
604+
}
605+
}
606+
607+
// get the time of the next call to the timer
608+
int64_t next_call_time = INT64_MAX;
609+
rcl_ret_t ret = rcl_timer_get_next_call_time(wait_set->timers[t_idx], &next_call_time);
565610
if (ret == RCL_RET_TIMER_CANCELED) {
566-
wait_set->timers[i] = NULL;
611+
wait_set->timers[t_idx] = NULL;
567612
continue;
568613
}
569614
if (ret != RCL_RET_OK) {
570615
return ret; // The rcl error state should already be set.
571616
}
572-
if (timer_timeout < min_timeout) {
573-
is_timer_timeout = true;
574-
min_timeout = timer_timeout;
617+
if (next_call_time < min_next_call_time[clock->type]) {
618+
clocks[clock->type] = clock;
619+
min_next_call_time[clock->type] = next_call_time;
575620
}
576621
}
577622
}
578623

579-
if (timeout == 0) {
580-
// Then it is non-blocking, so set the temporary storage to 0, 0 and pass it.
624+
if (is_non_blocking) {
581625
temporary_timeout_storage.sec = 0;
582626
temporary_timeout_storage.nsec = 0;
583627
timeout_argument = &temporary_timeout_storage;
584-
} else if (timeout > 0 || is_timer_timeout) {
628+
} else {
629+
int64_t min_timeout = timeout > 0 ? timeout : INT64_MAX;
630+
bool has_valid_timeout = timeout > 0;
631+
632+
// determine the min timeout of all clocks
633+
for (size_t i = RCL_ROS_TIME; i <= RCL_STEADY_TIME; i++) {
634+
if (clocks[i] == 0) {
635+
continue;
636+
}
637+
638+
int64_t cur_time;
639+
rmw_ret_t ret = rcl_clock_get_now(clocks[i], &cur_time);
640+
if (ret != RCL_RET_OK) {
641+
return ret; // The rcl error state should already be set.
642+
}
643+
644+
int64_t timer_timeout = min_next_call_time[i] - cur_time;
645+
646+
if (timer_timeout <= min_timeout) {
647+
has_valid_timeout = true;
648+
min_timeout = timer_timeout;
649+
}
650+
}
651+
585652
// If min_timeout was negative, we need to wake up immediately.
586653
if (min_timeout < 0) {
587654
min_timeout = 0;
588655
}
589-
temporary_timeout_storage.sec = RCL_NS_TO_S(min_timeout);
590-
temporary_timeout_storage.nsec = min_timeout % 1000000000;
591-
timeout_argument = &temporary_timeout_storage;
656+
if (has_valid_timeout) {
657+
temporary_timeout_storage.sec = RCL_NS_TO_S(min_timeout);
658+
temporary_timeout_storage.nsec = min_timeout % 1000000000;
659+
timeout_argument = &temporary_timeout_storage;
660+
}
592661
}
593662

594663
// Wait.
@@ -604,20 +673,25 @@ rcl_wait(rcl_wait_set_t * wait_set, int64_t timeout)
604673
// Items that are not ready will have been set to NULL by rmw_wait.
605674
// We now update our handles accordingly.
606675

676+
bool timer_is_ready = false;
677+
607678
// Check for ready timers
608679
// and set not ready timers (which includes canceled timers) to NULL.
609680
size_t i;
610681
for (i = 0; i < wait_set->impl->timer_index; ++i) {
611682
if (!wait_set->timers[i]) {
612683
continue;
613684
}
685+
614686
bool is_ready = false;
615687
rcl_ret_t ret = rcl_timer_is_ready(wait_set->timers[i], &is_ready);
616688
if (ret != RCL_RET_OK) {
617689
return ret; // The rcl error state should already be set.
618690
}
619691
if (!is_ready) {
620692
wait_set->timers[i] = NULL;
693+
} else {
694+
timer_is_ready = true;
621695
}
622696
}
623697
// Check for timeout, return RCL_RET_TIMEOUT only if it wasn't a timer.
@@ -661,7 +735,7 @@ rcl_wait(rcl_wait_set_t * wait_set, int64_t timeout)
661735
}
662736
}
663737

664-
if (RMW_RET_TIMEOUT == ret && !is_timer_timeout) {
738+
if (RMW_RET_TIMEOUT == ret && !timer_is_ready) {
665739
return RCL_RET_TIMEOUT;
666740
}
667741
return RCL_RET_OK;

rcl/test/rcl/test_wait.cpp

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,147 @@ TEST_F(WaitSetTestFixture, zero_timeout_overrun_timer) {
321321
EXPECT_LE(diff, RCL_MS_TO_NS(50));
322322
}
323323

324+
// Test rcl_wait with a timeout value and an overrun timer
325+
TEST_F(WaitSetTestFixture, no_wakeup_on_override_timer) {
326+
rcl_wait_set_t wait_set = rcl_get_zero_initialized_wait_set();
327+
rcl_ret_t ret =
328+
rcl_wait_set_init(&wait_set, 0, 0, 1, 0, 0, 0, context_ptr, rcl_get_default_allocator());
329+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
330+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
331+
{
332+
ret = rcl_wait_set_fini(&wait_set);
333+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
334+
});
335+
rcl_clock_t clock;
336+
rcl_allocator_t allocator = rcl_get_default_allocator();
337+
ret = rcl_clock_init(RCL_ROS_TIME, &clock, &allocator);
338+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
339+
{
340+
ret = rcl_clock_fini(&clock);
341+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
342+
});
343+
ASSERT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
344+
345+
ret = rcl_enable_ros_time_override(&clock);
346+
ASSERT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
347+
348+
rcl_timer_t timer = rcl_get_zero_initialized_timer();
349+
ret = rcl_timer_init2(
350+
&timer, &clock, this->context_ptr, RCL_MS_TO_NS(10), nullptr, rcl_get_default_allocator(),
351+
true);
352+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
353+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
354+
{
355+
ret = rcl_timer_fini(&timer);
356+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
357+
});
358+
ret = rcl_wait_set_add_timer(&wait_set, &timer, NULL);
359+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
360+
361+
// Time spent during wait should be negligible, definitely less than the given timeout
362+
std::chrono::steady_clock::time_point before_sc = std::chrono::steady_clock::now();
363+
ret = rcl_wait(&wait_set, RCL_MS_TO_NS(100));
364+
std::chrono::steady_clock::time_point after_sc = std::chrono::steady_clock::now();
365+
// We don't expect a timeout here (since the guard condition had already been triggered)
366+
ASSERT_EQ(RCL_RET_TIMEOUT, ret) << rcl_get_error_string().str;
367+
int64_t diff = std::chrono::duration_cast<std::chrono::nanoseconds>(after_sc - before_sc).count();
368+
EXPECT_GE(diff, RCL_MS_TO_NS(100));
369+
}
370+
371+
// Test rcl_wait with a timeout value and an overrun timer
372+
TEST_F(WaitSetTestFixture, wakeup_on_override_timer) {
373+
rcl_wait_set_t wait_set = rcl_get_zero_initialized_wait_set();
374+
rcl_ret_t ret =
375+
rcl_wait_set_init(&wait_set, 0, 0, 1, 0, 0, 0, context_ptr, rcl_get_default_allocator());
376+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
377+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
378+
{
379+
ret = rcl_wait_set_fini(&wait_set);
380+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
381+
});
382+
rcl_clock_t clock;
383+
rcl_allocator_t allocator = rcl_get_default_allocator();
384+
ret = rcl_clock_init(RCL_ROS_TIME, &clock, &allocator);
385+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
386+
{
387+
ret = rcl_clock_fini(&clock);
388+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
389+
});
390+
ASSERT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
391+
392+
ret = rcl_enable_ros_time_override(&clock);
393+
ASSERT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
394+
395+
rcl_timer_t timer = rcl_get_zero_initialized_timer();
396+
ret = rcl_timer_init2(
397+
&timer, &clock, this->context_ptr, RCL_MS_TO_NS(10), nullptr, rcl_get_default_allocator(),
398+
true);
399+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
400+
OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT(
401+
{
402+
ret = rcl_timer_fini(&timer);
403+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
404+
});
405+
ret = rcl_wait_set_add_timer(&wait_set, &timer, NULL);
406+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
407+
408+
// Time spent during wait should be negligible, definitely less than the given timeout
409+
std::chrono::steady_clock::time_point before_sc = std::chrono::steady_clock::now();
410+
411+
rcl_ret_t wait_ret = RCL_RET_ERROR;
412+
std::atomic_bool wait_done = false;
413+
std::thread t([&wait_ret, &wait_set, &wait_done]() {
414+
wait_ret = rcl_wait(&wait_set, RCL_MS_TO_NS(100));
415+
wait_done = true;
416+
});
417+
418+
ret = rcl_set_ros_time_override(&clock, RCL_MS_TO_NS(5));
419+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
420+
421+
std::this_thread::sleep_for(std::chrono::milliseconds(15));
422+
EXPECT_EQ(wait_done, false) << "Error, wait terminated to early";
423+
424+
ret = rcl_set_ros_time_override(&clock, RCL_MS_TO_NS(15));
425+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
426+
427+
std::this_thread::sleep_for(std::chrono::milliseconds(5));
428+
429+
EXPECT_EQ(wait_done, true) << "Error, wait waken up by time jump";
430+
431+
EXPECT_NE(wait_ret, RCL_RET_TIMEOUT);
432+
433+
t.join();
434+
435+
wait_done = false;
436+
wait_ret = RCL_RET_ERROR;
437+
438+
ret = rcl_wait_set_clear(&wait_set);
439+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
440+
ret = rcl_wait_set_add_timer(&wait_set, &timer, NULL);
441+
EXPECT_EQ(RCL_RET_OK, ret) << rcl_get_error_string().str;
442+
443+
// now all guard conditions are cleared, if we wait again,
444+
// we should still wake up instantly as next call time was not advance yet
445+
// and therefore the timer should still be ready
446+
t = std::thread(
447+
[&wait_ret, &wait_set, &wait_done]() {
448+
wait_ret = rcl_wait(&wait_set, RCL_MS_TO_NS(100));
449+
wait_done = true;
450+
});
451+
452+
std::this_thread::sleep_for(std::chrono::milliseconds(5));
453+
454+
EXPECT_EQ(wait_done, true) << "Error, wait waken up by time jump";
455+
456+
EXPECT_NE(wait_ret, RCL_RET_TIMEOUT);
457+
458+
t.join();
459+
460+
std::chrono::steady_clock::time_point after_sc = std::chrono::steady_clock::now();
461+
int64_t diff = std::chrono::duration_cast<std::chrono::nanoseconds>(after_sc - before_sc).count();
462+
EXPECT_LT(diff, RCL_MS_TO_NS(100));
463+
}
464+
324465
// Check that a canceled timer doesn't wake up rcl_wait
325466
TEST_F(WaitSetTestFixture, canceled_timer) {
326467
rcl_wait_set_t wait_set = rcl_get_zero_initialized_wait_set();

0 commit comments

Comments
 (0)