Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added test cased for missing events & double call to take_data #2371

Closed
wants to merge 7 commits into from
228 changes: 226 additions & 2 deletions rclcpp/test/rclcpp/executors/test_executors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,32 @@

using namespace std::chrono_literals;


template<typename T>
class TestExecutorsOnlyNode : public ::testing::Test
{
public:
void SetUp()
{
rclcpp::init(0, nullptr);

const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
std::stringstream test_name;
test_name << test_info->test_case_name() << "_" << test_info->name();
node = std::make_shared<rclcpp::Node>("node", test_name.str());

}

void TearDown()
{
node.reset();

rclcpp::shutdown();
}

rclcpp::Node::SharedPtr node;
};

template<typename T>
class TestExecutors : public ::testing::Test
{
Expand Down Expand Up @@ -122,6 +148,8 @@ class ExecutorTypeNames
// is updated.
TYPED_TEST_SUITE(TestExecutors, ExecutorTypes, ExecutorTypeNames);

TYPED_TEST_SUITE(TestExecutorsOnlyNode, ExecutorTypes, ExecutorTypeNames);

// StaticSingleThreadedExecutor is not included in these tests for now, due to:
// https://github.com/ros2/rclcpp/issues/1219
using StandardExecutors =
Expand Down Expand Up @@ -392,13 +420,24 @@ class TestWaitable : public rclcpp::Waitable
bool
is_ready(rcl_wait_set_t * wait_set) override
{
(void)wait_set;
return true;
for (size_t i = 0; i < wait_set->size_of_guard_conditions; ++i) {
if (&gc_.get_rcl_guard_condition() == wait_set->guard_conditions[i]) {
is_ready_called_before_take_data = true;
return true;
}
}
return false;
}

std::shared_ptr<void>
take_data() override
{
if (!is_ready_called_before_take_data) {
throw std::runtime_error(
"TestWaitable : Internal error, take data was called, but is_ready was not called before");
}

is_ready_called_before_take_data = false;
return nullptr;
}

Expand Down Expand Up @@ -442,10 +481,12 @@ class TestWaitable : public rclcpp::Waitable
}

private:
bool is_ready_called_before_take_data = false;
size_t count_ = 0;
rclcpp::GuardCondition gc_;
};


TYPED_TEST(TestExecutors, spinAll)
{
using ExecutorType = TypeParam;
Expand Down Expand Up @@ -488,6 +529,189 @@ TYPED_TEST(TestExecutors, spinAll)
spinner.join();
}

TEST(TestExecutorsOnlyNode, double_take_data)
{
rclcpp::init(0, nullptr);

const auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
std::stringstream test_name;
test_name << test_info->test_case_name() << "_" << test_info->name();
rclcpp::Node::SharedPtr node = std::make_shared<rclcpp::Node>("node", test_name.str());

class MyExecutor : public rclcpp::executors::SingleThreadedExecutor
{
public:
/**
* This is a copy of Executor::get_next_executable with a callback, to test
* for a special race condition
*/
bool get_next_executable_with_callback(
rclcpp::AnyExecutable & any_executable,
std::chrono::nanoseconds timeout,
std::function<void(void)> inbetween)
{
bool success = false;
// Check to see if there are any subscriptions or timers needing service
// TODO(wjwwood): improve run to run efficiency of this function
success = get_next_ready_executable(any_executable);
// If there are none
if (!success) {

inbetween();

// Wait for subscriptions or timers to work on
wait_for_work(timeout);
if (!spinning.load()) {
return false;
}
// Try again
success = get_next_ready_executable(any_executable);
}
return success;
}

void spin_once_with_callback(
std::chrono::nanoseconds timeout,
std::function<void(void)> inbetween)
{
rclcpp::AnyExecutable any_exec;
if (get_next_executable_with_callback(any_exec, timeout, inbetween)) {
execute_any_executable(any_exec);
}
}

};

MyExecutor executor;

auto callback_group = node->create_callback_group(
rclcpp::CallbackGroupType::MutuallyExclusive,
true);

std::vector<std::shared_ptr<TestWaitable>> waitables;

auto waitable_interfaces = node->get_node_waitables_interface();

for (int i = 0; i < 3; i++) {
auto waitable = std::make_shared<TestWaitable>();
waitables.push_back(waitable);
waitable_interfaces->add_waitable(waitable, callback_group);
}
executor.add_node(node);

for (auto & waitable : waitables) {
waitable->trigger();
}

// a node has some default subscribers, that need to get executed first, therefore the loop
for (int i = 0; i < 10; i++) {
executor.spin_once(std::chrono::milliseconds(10));
if (waitables.front()->get_count() > 0) {
// stop execution, after the first waitable has been executed
break;
}
}

EXPECT_EQ(waitables.front()->get_count(), 1);

// block the callback group, this is something that may happen during multi threaded execution
// This removes my_waitable2 from the list of ready events, and triggers a call to wait_for_work
callback_group->can_be_taken_from().exchange(false);

bool no_ready_executable = false;

//now there should be no ready events now,
executor.spin_once_with_callback(
std::chrono::milliseconds(10), [&]() {
no_ready_executable = true;
});

EXPECT_TRUE(no_ready_executable);

//rearm, so that rmw_wait will push a second entry into the queue
for (auto & waitable : waitables) {
waitable->trigger();
}

no_ready_executable = false;

while (!no_ready_executable) {
executor.spin_once_with_callback(
std::chrono::milliseconds(10), [&]() {
//unblock the callback group
callback_group->can_be_taken_from().exchange(true);

no_ready_executable = true;

});
}
EXPECT_TRUE(no_ready_executable);

// now we process all events from get_next_ready_executable
EXPECT_NO_THROW(
for (int i = 0; i < 10; i++) {
executor.spin_once(std::chrono::milliseconds(1));
}
);

node.reset();

rclcpp::shutdown();
}


TYPED_TEST(TestExecutorsOnlyNode, missing_event)
{
using ExecutorType = TypeParam;
ExecutorType executor;

rclcpp::Node::SharedPtr node(this->node);
auto callback_group = node->create_callback_group(
rclcpp::CallbackGroupType::MutuallyExclusive,
true);

auto waitable_interfaces = node->get_node_waitables_interface();
auto my_waitable = std::make_shared<TestWaitable>();
auto my_waitable2 = std::make_shared<TestWaitable>();
waitable_interfaces->add_waitable(my_waitable, callback_group);
waitable_interfaces->add_waitable(my_waitable2, callback_group);
executor.add_node(this->node);

my_waitable->trigger();
my_waitable2->trigger();

// a node has some default subscribers, that need to get executed first, therefore the loop
for (int i = 0; i < 10; i++) {
executor.spin_once(std::chrono::milliseconds(10));
if (my_waitable->get_count() > 0) {
// stop execution, after the first waitable has been executed
break;
}
}

EXPECT_EQ(1u, my_waitable->get_count());
EXPECT_EQ(0u, my_waitable2->get_count());

// block the callback group, this is something that may happen during multi threaded execution
// This removes my_waitable2 from the list of ready events, and triggers a call to wait_for_work
callback_group->can_be_taken_from().exchange(false);

//now there should be no ready event
executor.spin_once(std::chrono::milliseconds(10));

EXPECT_EQ(1u, my_waitable->get_count());
EXPECT_EQ(0u, my_waitable2->get_count());

//unblock the callback group
callback_group->can_be_taken_from().exchange(true);

//now the second waitable should get processed
executor.spin_once(std::chrono::milliseconds(10));

EXPECT_EQ(1u, my_waitable->get_count());
EXPECT_EQ(1u, my_waitable2->get_count());
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least on my machine, this test only fails for the static single threaded executor and then events executor:

% ~/ros2_ws/build/rclcpp/test/rclcpp/test_executors --gtest_filter=\*missing_event
Running main() from /Users/william/ros2_ws/install/src/gtest_vendor/src/gtest_main.cc
Note: Google Test filter = *missing_event
[==========] Running 4 tests from 4 test suites.
[----------] Global test environment set-up.
[----------] 1 test from TestExecutorsOnlyNode/SingleThreadedExecutor, where TypeParam = rclcpp::executors::SingleThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/SingleThreadedExecutor.missing_event
[       OK ] TestExecutorsOnlyNode/SingleThreadedExecutor.missing_event (116 ms)
[----------] 1 test from TestExecutorsOnlyNode/SingleThreadedExecutor (116 ms total)

[----------] 1 test from TestExecutorsOnlyNode/MultiThreadedExecutor, where TypeParam = rclcpp::executors::MultiThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/MultiThreadedExecutor.missing_event
[       OK ] TestExecutorsOnlyNode/MultiThreadedExecutor.missing_event (17 ms)
[----------] 1 test from TestExecutorsOnlyNode/MultiThreadedExecutor (17 ms total)

[----------] 1 test from TestExecutorsOnlyNode/StaticSingleThreadedExecutor, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:692: Failure
Expected equality of these values:
  1u
    Which is: 1
  my_waitable->get_count()
    Which is: 0
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:702: Failure
Expected equality of these values:
  1u
    Which is: 1
  my_waitable->get_count()
    Which is: 0
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:711: Failure
Expected equality of these values:
  1u
    Which is: 1
  my_waitable->get_count()
    Which is: 0
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:712: Failure
Expected equality of these values:
  1u
    Which is: 1
  my_waitable2->get_count()
    Which is: 0
[  FAILED  ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor (107 ms)
[----------] 1 test from TestExecutorsOnlyNode/StaticSingleThreadedExecutor (107 ms total)

[----------] 1 test from TestExecutorsOnlyNode/EventsExecutor, where TypeParam = rclcpp::experimental::executors::EventsExecutor
[ RUN      ] TestExecutorsOnlyNode/EventsExecutor.missing_event
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:703: Failure
Expected equality of these values:
  0u
    Which is: 0
  my_waitable2->get_count()
    Which is: 1
[  FAILED  ] TestExecutorsOnlyNode/EventsExecutor.missing_event, where TypeParam = rclcpp::experimental::executors::EventsExecutor (30 ms)
[----------] 1 test from TestExecutorsOnlyNode/EventsExecutor (30 ms total)

[----------] Global test environment tear-down
[==========] 4 tests from 4 test suites ran. (272 ms total)
[  PASSED  ] 2 tests.
[  FAILED  ] 2 tests, listed below:
[  FAILED  ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor
[  FAILED  ] TestExecutorsOnlyNode/EventsExecutor.missing_event, where TypeParam = rclcpp::experimental::executors::EventsExecutor

 2 FAILED TESTS

Is that your experience too @jmachowinski?

Also, this is a bit more complicated than it needs to be, I'll open a pr (cellumation#1) to make it simpler (without breaking its purpose I think) by only adding the callback group with the waitables to the executor, which you can take or leave/discuss if you see an issue with it. With that change I do get a slightly different result:

% ~/ros2_ws/build/rclcpp/test/rclcpp/test_executors --gtest_filter=\*missing_event
Running main() from /Users/william/ros2_ws/install/src/gtest_vendor/src/gtest_main.cc
Note: Google Test filter = *missing_event
[==========] Running 4 tests from 4 test suites.
[----------] Global test environment set-up.
[----------] 1 test from TestExecutorsOnlyNode/SingleThreadedExecutor, where TypeParam = rclcpp::executors::SingleThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/SingleThreadedExecutor.missing_event
[       OK ] TestExecutorsOnlyNode/SingleThreadedExecutor.missing_event (118 ms)
[----------] 1 test from TestExecutorsOnlyNode/SingleThreadedExecutor (118 ms total)

[----------] 1 test from TestExecutorsOnlyNode/MultiThreadedExecutor, where TypeParam = rclcpp::executors::MultiThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/MultiThreadedExecutor.missing_event
[       OK ] TestExecutorsOnlyNode/MultiThreadedExecutor.missing_event (17 ms)
[----------] 1 test from TestExecutorsOnlyNode/MultiThreadedExecutor (17 ms total)

[----------] 1 test from TestExecutorsOnlyNode/StaticSingleThreadedExecutor, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor
[ RUN      ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:705: Failure
Expected equality of these values:
  1u
    Which is: 1
  my_waitable2->get_count()
    Which is: 0
[  FAILED  ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor (47 ms)
[----------] 1 test from TestExecutorsOnlyNode/StaticSingleThreadedExecutor (47 ms total)

[----------] 1 test from TestExecutorsOnlyNode/EventsExecutor, where TypeParam = rclcpp::experimental::executors::EventsExecutor
[ RUN      ] TestExecutorsOnlyNode/EventsExecutor.missing_event
/Users/william/ros2_ws/src/ros2/rclcpp/rclcpp/test/rclcpp/executors/test_executors.cpp:696: Failure
Expected equality of these values:
  0u
    Which is: 0
  my_waitable2->get_count()
    Which is: 1
[  FAILED  ] TestExecutorsOnlyNode/EventsExecutor.missing_event, where TypeParam = rclcpp::experimental::executors::EventsExecutor (30 ms)
[----------] 1 test from TestExecutorsOnlyNode/EventsExecutor (30 ms total)

[----------] Global test environment tear-down
[==========] 4 tests from 4 test suites ran. (213 ms total)
[  PASSED  ] 2 tests.
[  FAILED  ] 2 tests, listed below:
[  FAILED  ] TestExecutorsOnlyNode/StaticSingleThreadedExecutor.missing_event, where TypeParam = rclcpp::executors::StaticSingleThreadedExecutor
[  FAILED  ] TestExecutorsOnlyNode/EventsExecutor.missing_event, where TypeParam = rclcpp::experimental::executors::EventsExecutor

 2 FAILED TESTS

It is still the events executor and static single threaded executors which fail. Note also that they fail in different ways, i.e. the EventsExecutor is a bit too eager and it executes it when your test assumes it should not be, and the static single threaded executor never executes the second waitable.

I still need to figure out why that is, but at the same time, I think the assumptions in this test are a bit flawed. Specifically, I think that assuming that spin_once() will ever execute the event you think it should next is a dangerous assumption and I don't think it should be a condition we should try to enforce on the spin variants. Instead I think a variant of this test which uses futures and spins in a loop until they are complete, or some timeout is probably better. After all, we care that the events eventually get called and not double "called", not that they happen in a specific order or number of spin_once calls.

I'll open a pr with the futures alternative soon.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the pr for using futures: cellumation#2

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that your experience too @jmachowinski?

Yes, I missed this during my testing, as I set a filter...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm I had a quick look at the SingleStaticExecutor case, this looks like a lost wakeup to me.

@wjwwood This brings me back to a question I asked a while ago, if a lost wakeup would actually be the expected behavior for this test. At some point you mentioned, that an object using a guard condition must make sure that the signal does not get lost.

Copy link
Member

@wjwwood wjwwood Jan 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This brings me back to a question I asked a while ago, if a lost wakeup would actually be the expected behavior for this test. At some point you mentioned, that an object using a guard condition must make sure that the signal does not get lost.

Right, that's a good question. I think that the "TestWaitable" in this test file has fallen into the same issue we discussed elsewhere, which is that if a waitable is triggered but not handled, should that waitable re-trigger itself or should the executor handle the retriggering. I believe it should probably be up to the waitable to ensure that behavior, leaving room for waitables that for some reason it makes sense to not retrigger (that is to say, if the conditions that cause it to be triggered no longer apply, for example if data is no longer available in a subscription due to it exceeding its lifetime QoS setting). So part of this test is testing "if you have two things ready, then wait, then execute one, then wait again, will you execute the second?". I think that's valid test, but whether it passes or not comes down to how we ensure that the second waitable "stays ready" and ensures the second wait doesn't block, and that could either be the executor or the waitable, which is the point of the question you asked. The pr #2109 tried (perhaps naively) to fix this at the executor level, but only for some of the executors, but as we've been suggesting, perhaps that pr isn't correct.

Based on that, I set out to simplify the test further to avoid that particular issue, and test the "other" part of this test only, doing something like this:

  • create a mutually exclusive callback group
  • create a TestWaitable instance and add it to the callback group
  • create the executor and add the callback group
  • trigger the waitable
  • manually set the callback group's can_be_taken_from to false
  • spin until a future is complete, where the future is set when the waitable is executed, expecting a timeout
  • assert it timed out (i.e. the waitable was not executed)
  • manually set the callback group's can_be_taken_from to true
  • spin until a future is complete, where the future is set when the waitable is executed
  • assert the waitable was executed

This is essentially testing whether or not an executor is adhering to the callback group. Another, more contrived, version of this would involve two waitables in the callback group, and ensuring that one is being executed while spinning on the other, but that requires a multi-threaded executor (executing one waitable while spinning on the other implies at least two threads). So it might be "ok" for a single threaded executor to ignore the callback group's can_be_taken_from. Ideally they would not ignore this, even if in normal practice it should never be set while spinning, but it's an under-defined part of the interface. It will, however, become very important when/if we ever get a multi-threaded version of the events executor.


TYPED_TEST(TestExecutors, spinSome)
{
using ExecutorType = TypeParam;
Expand Down