Skip to content

Commit ade0d62

Browse files
Rui GraçaDuarte Fonseca
Rui Graça
authored and
Duarte Fonseca
committed
Integration of Suspend/Resume related fixes
Mutex and flag changes: Move endpoints state_ update to be after the shutdown_and_close as this function forces the state_ to be CLOSED. This could lead to re connections not happening; Set linger to the tcp and local_tcp server endpoint connections, as this is not longer done on suspend; Removed routing_stop_mutex_ (from rmc::stop, rmc::on_net_state_change, rmc::on_message) if_state_running_ and sd_route_set_ are now atomic; Removed state_mutex_ and state_ is now atomic. This was needed as it could cause a lock inversion with the sender_mutex_ and receiver_mutex_; state_mutex_ was being reused for several other collections, and so the following mutexes were added: pending_offers_mutex_, requests_mutex_, requests_to_debounce_mutex_, pending_event_registrations_mutex_, state_condition_mutex_, register_application_timer_mutex_; pending_sd_offers_mutex_ is now recursive_mutex because it is also needed in the rmi::set_routing_state, to resolve any offers that were left pending after resuming; Fix deadlock on rmc::on_net_state_change, which could lock sender_mutex_ and then call on_disconnect which would lock the same mutex; Functional changes: Added function remove_known_client() which erases Known_clients_. routing_manager_base::remove_local will now call remove_known_client to keep the list updated; rmi::set_routing_state will now initialize the service info of any remaining pending_sd_offers_. Before, services that were internally offered while the resume was not ready were not being offered externally; Added a new state RS_DELAYED_RESUME. This state will be set instead of RS_RESUMED if either the if_state_running_ or (when service discovery is enabled) sd_route_set_ are not set. On state RS_SUSPEND the endpoint_manager_impl::suspend will no longer be called, as this could lead to sockets getting suck in ACCEPT4 (after resuming) if the ecu suspended while vsomeip was stil closing sockets; On state RS_RESUME, in endpoint_manager_impl::resume only the client endpoints will be restarted. Restarting server endpoints resulted in a service discovery reboot detection being wrongly triggered; Refactor rmi::on_net_interface_or_route_state_changed, to both simplify the logic and integrated the new RS_DELAYED_RESUME state; Created new function rmi::is_external_routing_ready() to simplify checks on if_state_running_ and sd_route_set_.
1 parent 72d7492 commit ade0d62

18 files changed

+355
-383
lines changed

implementation/endpoints/src/endpoint_manager_base.cpp

+14-13
Original file line numberDiff line numberDiff line change
@@ -35,35 +35,36 @@ endpoint_manager_base::endpoint_manager_base(
3535
}
3636

3737
std::shared_ptr<endpoint> endpoint_manager_base::create_local(client_t _client) {
38-
std::lock_guard<std::mutex> its_lock(local_endpoint_mutex_);
38+
std::scoped_lock its_lock(local_endpoint_mutex_);
3939
return create_local_unlocked(_client);
4040
}
4141

4242
void endpoint_manager_base::remove_local(const client_t _client) {
43-
std::shared_ptr<endpoint> its_endpoint(find_local(_client));
43+
std::scoped_lock its_lock(local_endpoint_mutex_);
44+
VSOMEIP_INFO << "emb::" << __func__ << ": client " << std::hex << _client;
45+
std::shared_ptr<endpoint> its_endpoint(find_local_unlocked(_client));
4446
if (its_endpoint) {
4547
its_endpoint->register_error_handler(nullptr);
4648
its_endpoint->stop();
47-
VSOMEIP_INFO << "Client [" << std::hex << rm_->get_client() << "] is closing connection to ["
48-
<< std::hex << _client << "]" << " endpoint > " << its_endpoint;
49-
std::lock_guard<std::mutex> its_lock(local_endpoint_mutex_);
49+
VSOMEIP_INFO << "Client [" << std::hex << rm_->get_client()
50+
<< "] is closing connection to [" << std::hex << _client << "]"
51+
<< " endpoint > " << its_endpoint;
5052
local_endpoints_.erase(_client);
5153
}
5254
}
5355

5456
std::shared_ptr<endpoint> endpoint_manager_base::find_or_create_local(client_t _client) {
55-
std::shared_ptr<endpoint> its_endpoint {nullptr};
56-
{
57-
std::scoped_lock its_lock {local_endpoint_mutex_};
58-
its_endpoint = find_local_unlocked(_client);
59-
if (!its_endpoint) {
60-
its_endpoint = create_local_unlocked(_client);
61-
}
57+
std::scoped_lock its_lock {local_endpoint_mutex_};
58+
std::shared_ptr<endpoint> its_endpoint {find_local_unlocked(_client)};
59+
if (!its_endpoint) {
60+
VSOMEIP_INFO << "emb::" << __func__ << ": create_client " << std::hex << _client;
61+
its_endpoint = create_local_unlocked(_client);
6262
}
6363
if (its_endpoint) {
6464
its_endpoint->start();
6565
} else {
66-
VSOMEIP_ERROR << __func__ << ": couldn't find or create endpoint for client " << _client;
66+
VSOMEIP_ERROR << "emb::" << __func__ << ": couldn't find or create endpoint for client "
67+
<< std::hex << _client;
6768
}
6869
return its_endpoint;
6970
}

implementation/endpoints/src/endpoint_manager_impl.cpp

+5-118
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ std::shared_ptr<endpoint> endpoint_manager_impl::find_or_create_remote_client(
7171
}
7272
}
7373
if (start_endpoint && its_endpoint && configuration_->is_someip(_service, _instance)
74-
&& rm_->get_routing_state() != routing_state_e::RS_SUSPENDED) {
74+
&& !rm_->is_suspended()) {
7575
its_endpoint->start();
7676
}
7777
return its_endpoint;
@@ -97,7 +97,7 @@ void endpoint_manager_impl::find_or_create_remote_client(
9797
}
9898
}
9999
const bool is_someip {configuration_->is_someip(_service, _instance)};
100-
const bool is_suspended {rm_->get_routing_state() == routing_state_e::RS_SUSPENDED};
100+
const bool is_suspended {rm_->is_suspended()};
101101

102102
if (start_reliable_endpoint && its_reliable_endpoint && is_someip && !is_suspended) {
103103
its_reliable_endpoint->start();
@@ -289,7 +289,7 @@ endpoint_manager_impl::create_server_endpoint(uint16_t _port, bool _reliable, bo
289289

290290
if (its_server_endpoint) {
291291
server_endpoints_[_port][_reliable] = its_server_endpoint;
292-
if (rm_->get_routing_state() != routing_state_e::RS_SUSPENDED) {
292+
if (!rm_->is_suspended()) {
293293
its_server_endpoint->start();
294294
}
295295
} else {
@@ -1395,140 +1395,27 @@ bool endpoint_manager_impl::is_used_endpoint(endpoint* const _endpoint) const {
13951395
}
13961396

13971397
void endpoint_manager_impl::suspend() {
1398-
client_endpoints_t its_client_endpoints;
1399-
server_endpoints_t its_server_endpoints;
1400-
1401-
{
1402-
std::scoped_lock its_lock {endpoint_mutex_};
1403-
its_client_endpoints = client_endpoints_;
1404-
its_server_endpoints = server_endpoints_;
1405-
}
1406-
1407-
// stop client endpoints
1408-
std::set<std::shared_ptr<client_endpoint>> its_suspended_client_endpoints;
1409-
for (const auto& [its_address, ports] : its_client_endpoints) {
1410-
for (const auto& [its_port, protocols] : ports) {
1411-
for (const auto& [its_protocol, partitions] : protocols) {
1412-
for (const auto& [its_partition, its_endpoint] : partitions) {
1413-
its_endpoint->stop();
1414-
auto its_client_endpoint {
1415-
std::dynamic_pointer_cast<client_endpoint>(its_endpoint)};
1416-
if (its_client_endpoint) {
1417-
its_suspended_client_endpoints.insert(its_client_endpoint);
1418-
}
1419-
}
1420-
}
1421-
}
1422-
}
1423-
1424-
// start server endpoints
1425-
for (const auto& [its_port, protocols] : its_server_endpoints) {
1426-
for (const auto& [its_protocol, its_endpoint] : protocols) {
1427-
its_endpoint->stop();
1428-
}
1429-
}
1430-
// check that the clients are established again
1431-
size_t its_interval {MIN_ENDPOINT_WAIT_INTERVAL};
1432-
size_t its_sum {0};
1433-
const size_t its_max {SUM_ENDPOINT_WAIT_INTERVAL};
1434-
bool is_done;
1435-
do {
1436-
is_done = true;
1437-
std::this_thread::sleep_for(std::chrono::milliseconds(its_interval));
1438-
for (auto& its_endpoint : its_suspended_client_endpoints) {
1439-
is_done = is_done && its_endpoint->is_closed();
1440-
if (!is_done)
1441-
break;
1442-
}
1443-
if (its_interval < MAX_ENDPOINT_WAIT_INTERVAL) {
1444-
its_interval <<= 1;
1445-
}
1446-
its_sum += its_interval;
1447-
} while (!is_done && its_sum < its_max);
1448-
1449-
if (!is_done) {
1450-
for (const auto& its_endpoint : its_suspended_client_endpoints) {
1451-
if (!its_endpoint->is_closed()) {
1452-
boost::asio::ip::address its_address;
1453-
(void)its_endpoint->get_remote_address(its_address);
1454-
1455-
VSOMEIP_WARNING << "endpoint_manager_impl::" << __func__
1456-
<< ": Suspending client port [" << std::dec
1457-
<< its_endpoint->get_local_port() << "] --> ["
1458-
<< its_address.to_string() << ":" << its_endpoint->get_remote_port()
1459-
<< "] failed.";
1460-
}
1461-
}
1462-
}
1398+
// do nothing
14631399
}
14641400

14651401
void endpoint_manager_impl::resume() {
14661402
client_endpoints_t its_client_endpoints;
1467-
server_endpoints_t its_server_endpoints;
1468-
14691403
{
14701404
std::scoped_lock its_lock {endpoint_mutex_};
14711405
its_client_endpoints = client_endpoints_;
1472-
its_server_endpoints = server_endpoints_;
14731406
}
14741407

1475-
// start server endpoints
1476-
for (const auto& [its_port, protocols] : its_server_endpoints) {
1477-
for (const auto& [its_protocol, its_endpoint] : protocols) {
1478-
its_endpoint->restart();
1479-
}
1480-
}
1481-
1482-
// start client endpoints
1408+
// restart client endpoints
14831409
std::set<std::shared_ptr<client_endpoint>> its_resumed_client_endpoints;
14841410
for (const auto& [its_address, ports] : its_client_endpoints) {
14851411
for (const auto& [its_port, protocols] : ports) {
14861412
for (const auto& [its_protocol, partitions] : protocols) {
14871413
for (const auto& [its_partition, its_endpoint] : partitions) {
14881414
its_endpoint->restart();
1489-
auto its_client_endpoint {
1490-
std::dynamic_pointer_cast<client_endpoint>(its_endpoint)};
1491-
if (its_client_endpoint) {
1492-
its_resumed_client_endpoints.insert(its_client_endpoint);
1493-
}
14941415
}
14951416
}
14961417
}
14971418
}
1498-
1499-
// check that the clients are established again
1500-
size_t its_interval {MIN_ENDPOINT_WAIT_INTERVAL};
1501-
size_t its_sum {0};
1502-
const size_t its_max {SUM_ENDPOINT_WAIT_INTERVAL};
1503-
bool is_done;
1504-
do {
1505-
is_done = true;
1506-
std::this_thread::sleep_for(std::chrono::milliseconds(its_interval));
1507-
for (const auto& its_endpoint : its_resumed_client_endpoints) {
1508-
is_done = is_done && its_endpoint->is_established();
1509-
if (!is_done)
1510-
break;
1511-
}
1512-
if (its_interval < MAX_ENDPOINT_WAIT_INTERVAL) {
1513-
its_interval <<= 1;
1514-
}
1515-
its_sum += its_interval;
1516-
} while (!is_done && its_sum < its_max);
1517-
1518-
if (!is_done) {
1519-
for (const auto& its_endpoint : its_resumed_client_endpoints) {
1520-
if (!its_endpoint->is_established()) {
1521-
boost::asio::ip::address its_address;
1522-
(void)its_endpoint->get_remote_address(its_address);
1523-
1524-
VSOMEIP_WARNING << "endpoint_manager_impl::" << __func__
1525-
<< ": Resuming client port [" << std::dec
1526-
<< its_endpoint->get_local_port() << "] --> ["
1527-
<< its_address.to_string() << ":" << its_endpoint->get_remote_port()
1528-
<< "] failed.";
1529-
}
1530-
}
1531-
}
15321419
}
15331420

15341421
} // namespace vsomeip_v3

implementation/endpoints/src/local_tcp_client_endpoint_impl.cpp

+22-22
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ void local_tcp_client_endpoint_impl::restart(bool _force) {
4646
if (!_force && state_ == cei_state_e::CONNECTING) {
4747
return;
4848
}
49-
state_ = cei_state_e::CONNECTING;
5049
{
5150
std::lock_guard<std::recursive_mutex> its_lock(mutex_);
5251
sending_blocked_ = false;
@@ -57,6 +56,7 @@ void local_tcp_client_endpoint_impl::restart(bool _force) {
5756
std::lock_guard<std::mutex> its_lock(socket_mutex_);
5857
shutdown_and_close_socket_unlocked(true);
5958
}
59+
state_ = cei_state_e::CONNECTING;
6060
was_not_connected_ = true;
6161
reconnect_counter_ = 0;
6262
start_connect_timer();
@@ -119,45 +119,45 @@ void local_tcp_client_endpoint_impl::connect() {
119119
if (its_error) {
120120
VSOMEIP_WARNING << "ltcei::connect: couldn't disable "
121121
<< "Nagle algorithm: " << its_error.message()
122-
<< " remote:" << remote_.port()
123-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
122+
<< " remote: " << remote_.port() << " endpoint: " << this
123+
<< " state_: " << static_cast<int>(state_.load());
124124
}
125125
socket_->set_option(boost::asio::socket_base::keep_alive(true), its_error);
126126
if (its_error) {
127127
VSOMEIP_WARNING << "ltcei::connect: couldn't enable "
128-
<< "keep_alive: " << its_error.message()
129-
<< " remote:" << remote_.port()
130-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
128+
<< "keep_alive: " << its_error.message() << " remote:" << remote_.port()
129+
<< " endpoint > " << this << " state_ > "
130+
<< static_cast<int>(state_.load());
131131
}
132132
// Setting the TIME_WAIT to 0 seconds forces RST to always be sent in reponse to a FIN
133133
// Since this is endpoint for internal communication, setting the TIME_WAIT to 5 seconds
134134
// should be enough to ensure the ACK to the FIN arrives to the server endpoint.
135135
socket_->set_option(boost::asio::socket_base::linger(true, 5), its_error);
136136
if (its_error) {
137137
VSOMEIP_WARNING << "ltcei::connect: couldn't enable "
138-
<< "SO_LINGER: " << its_error.message()
139-
<< " remote:" << remote_.port()
140-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
138+
<< "SO_LINGER: " << its_error.message() << " remote:" << remote_.port()
139+
<< " endpoint > " << this << " state_ > "
140+
<< static_cast<int>(state_.load());
141141
}
142142
socket_->set_option(boost::asio::socket_base::reuse_address(true), its_error);
143143
if (its_error) {
144-
VSOMEIP_WARNING << "ltcei::" << __func__
145-
<< ": Cannot enable SO_REUSEADDR" << "(" << its_error.message() << ")"
146-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
144+
VSOMEIP_WARNING << "ltcei::" << __func__ << ": Cannot enable SO_REUSEADDR" << "("
145+
<< its_error.message() << ")"
146+
<< " endpoint > " << this << " state_ > "
147+
<< static_cast<int>(state_.load());
147148
}
148149
socket_->bind(local_, its_error);
149150
if (its_error) {
150-
VSOMEIP_WARNING << "ltcei::" << __func__
151-
<< ": Cannot bind to client port " << local_.port() << "("
152-
<< its_error.message() << ")"
153-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
151+
VSOMEIP_WARNING << "ltcei::" << __func__ << ": Cannot bind to client port "
152+
<< local_.port() << "(" << its_error.message() << ")"
153+
<< " endpoint > " << this << " state_ > "
154+
<< static_cast<int>(state_.load());
154155
try {
155-
strand_.post(
156-
std::bind(&client_endpoint_impl::connect_cbk, shared_from_this(),
157-
its_connect_error));
158-
} catch (const std::exception &e) {
159-
VSOMEIP_ERROR << "ltcei::connect: " << e.what()
160-
<< " endpoint > " << this << " state_ > " << static_cast<int>(state_.load());
156+
strand_.post(std::bind(&client_endpoint_impl::connect_cbk, shared_from_this(),
157+
its_connect_error));
158+
} catch (const std::exception& e) {
159+
VSOMEIP_ERROR << "ltcei::connect: " << e.what() << " endpoint > " << this
160+
<< " state_ > " << static_cast<int>(state_.load());
161161
}
162162
return;
163163
}

implementation/endpoints/src/local_tcp_server_endpoint_impl.cpp

+15-10
Original file line numberDiff line numberDiff line change
@@ -241,27 +241,34 @@ void local_tcp_server_endpoint_impl::accept_cbk(
241241
// Nagle algorithm off
242242
new_connection_socket.set_option(boost::asio::ip::tcp::no_delay(true), its_error);
243243
if (its_error) {
244-
VSOMEIP_WARNING << "ltsei::accept_cbk: couldn't disable "
244+
VSOMEIP_WARNING << "ltsei::" << __func__ << ": couldn't disable "
245245
<< "Nagle algorithm: " << its_error.message()
246246
<< " endpoint > " << this;
247247
}
248248
new_connection_socket.set_option(boost::asio::socket_base::keep_alive(true), its_error);
249249
if (its_error) {
250-
VSOMEIP_WARNING << "ltsei::accept_cbk: couldn't enable "
250+
VSOMEIP_WARNING << "ltsei::" << __func__ << ": couldn't enable "
251251
<< "keep_alive: " << its_error.message()
252252
<< " endpoint > " << this;
253253
}
254+
// Setting the TIME_WAIT to 0 seconds forces RST to always be sent in reponse to a FIN
255+
// Since this is endpoint for internal communication, setting the TIME_WAIT to 5 seconds
256+
// should be enough to ensure the ACK to the FIN arrives to the server endpoint.
257+
new_connection_socket.set_option(boost::asio::socket_base::linger(true, 5), its_error);
258+
if (its_error) {
259+
VSOMEIP_WARNING << "ltsei::" << __func__ << ": setting SO_LINGER failed ("
260+
<< its_error.message() << ") " << this;
261+
}
254262
}
255263
}
256264
if (_error != boost::asio::error::bad_descriptor
257265
&& _error != boost::asio::error::operation_aborted
258266
&& _error != boost::asio::error::no_descriptors) {
259267
start();
260268
} else if (_error == boost::asio::error::no_descriptors) {
261-
VSOMEIP_ERROR << "ltsei::accept_cbk: "
262-
<< _error.message() << " (" << std::dec << _error.value()
263-
<< ") Will try to accept again in 1000ms"
264-
<< " endpoint > " << this;
269+
VSOMEIP_ERROR << "ltsei::" << __func__ << ": " << _error.message() << " (" << std::dec
270+
<< _error.value() << ") Will try to accept again in 1000ms"
271+
<< " endpoint > " << this;
265272
auto its_timer =
266273
std::make_shared<boost::asio::steady_timer>(io_,
267274
std::chrono::milliseconds(1000));
@@ -723,10 +730,8 @@ void local_tcp_server_endpoint_impl::connection::receive_cbk(
723730
} while (recv_buffer_size_ > 0 && found_message);
724731
}
725732

726-
if (is_stopped_
727-
|| _error == boost::asio::error::eof
728-
|| _error == boost::asio::error::connection_reset
729-
|| is_error) {
733+
if (is_stopped_ || _error == boost::asio::error::eof
734+
|| _error == boost::asio::error::connection_reset || is_error) {
730735
shutdown_and_close();
731736
its_server->remove_connection(bound_client_);
732737
its_server->configuration_->get_policy_manager()->remove_client_to_sec_client_mapping(bound_client_);

implementation/endpoints/src/local_uds_client_endpoint_impl.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ void local_uds_client_endpoint_impl::restart(bool _force) {
4949
if (!_force && state_ == cei_state_e::CONNECTING) {
5050
return;
5151
}
52-
state_ = cei_state_e::CONNECTING;
5352
{
5453
std::lock_guard<std::recursive_mutex> its_lock(mutex_);
5554
sending_blocked_ = false;
@@ -60,6 +59,7 @@ void local_uds_client_endpoint_impl::restart(bool _force) {
6059
std::lock_guard<std::mutex> its_lock(socket_mutex_);
6160
shutdown_and_close_socket_unlocked(true);
6261
}
62+
state_ = cei_state_e::CONNECTING;
6363
was_not_connected_ = true;
6464
reconnect_counter_ = 0;
6565
start_connect_timer();

implementation/endpoints/src/local_uds_server_endpoint_impl.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -826,10 +826,8 @@ void local_uds_server_endpoint_impl::connection::receive_cbk(
826826
} while (recv_buffer_size_ > 0 && found_message);
827827
}
828828

829-
if (is_stopped_
830-
|| _error == boost::asio::error::eof
831-
|| _error == boost::asio::error::connection_reset
832-
|| is_error) {
829+
if (is_stopped_ || _error == boost::asio::error::eof
830+
|| _error == boost::asio::error::connection_reset || is_error) {
833831
shutdown_and_close();
834832
its_server->remove_connection(bound_client_);
835833
its_server->configuration_->get_policy_manager()->remove_client_to_sec_client_mapping(bound_client_);

0 commit comments

Comments
 (0)