Skip to content

Commit c64966f

Browse files
healthcheck segfault while retrying Whiteboard (#17836)
1 parent 83f4fbe commit c64966f

File tree

2 files changed

+62
-8
lines changed

2 files changed

+62
-8
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,20 +1159,28 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11591159
auto nodeId = ev->Get()->NodeId;
11601160
switch (eventId) {
11611161
case TEvWhiteboard::EvSystemStateRequest:
1162-
NodeSystemState.erase(nodeId);
1163-
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId);
1162+
if (!NodeSystemState[nodeId].IsDone()) {
1163+
NodeSystemState.erase(nodeId);
1164+
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId, {-1});
1165+
}
11641166
break;
11651167
case TEvWhiteboard::EvVDiskStateRequest:
1166-
NodeVDiskState.erase(nodeId);
1167-
NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1168+
if (!NodeVDiskState[nodeId].IsDone()) {
1169+
NodeVDiskState.erase(nodeId);
1170+
NodeVDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvVDiskStateRequest>(nodeId);
1171+
}
11681172
break;
11691173
case TEvWhiteboard::EvPDiskStateRequest:
1170-
NodePDiskState.erase(nodeId);
1171-
NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1174+
if (!NodePDiskState[nodeId].IsDone()) {
1175+
NodePDiskState.erase(nodeId);
1176+
NodePDiskState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvPDiskStateRequest>(nodeId);
1177+
}
11721178
break;
11731179
case TEvWhiteboard::EvBSGroupStateRequest:
1174-
NodeBSGroupState.erase(nodeId);
1175-
NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1180+
if (!NodeBSGroupState[nodeId].IsDone()) {
1181+
NodeBSGroupState.erase(nodeId);
1182+
NodeBSGroupState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvBSGroupStateRequest>(nodeId);
1183+
}
11761184
break;
11771185
default:
11781186
RequestDone("unsupported event scheduled");

ydb/core/health_check/health_check_ut.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2416,5 +2416,51 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
24162416
}
24172417
UNIT_ASSERT(!pdiskIssueFoundInResult);
24182418
}
2419+
2420+
Y_UNIT_TEST(TestSystemStateRetriesAfterReceivingResponse) {
2421+
TPortManager tp;
2422+
ui16 port = tp.GetPort(2134);
2423+
ui16 grpcPort = tp.GetPort(2135);
2424+
auto settings = TServerSettings(port)
2425+
.SetNodeCount(1)
2426+
.SetDynamicNodeCount(1)
2427+
.SetUseRealThreads(false)
2428+
.SetDomainName("Root");
2429+
TServer server(settings);
2430+
server.EnableGRpc(grpcPort);
2431+
TClient client(settings);
2432+
TTestActorRuntime& runtime = *server.GetRuntime();
2433+
2434+
TActorId sender = runtime.AllocateEdgeActor();
2435+
TAutoPtr<IEventHandle> handle;
2436+
2437+
std::optional<TActorId> targetActor;
2438+
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
2439+
switch (ev->GetTypeRewrite()) {
2440+
case TEvWhiteboard::EvSystemStateResponse: {
2441+
if (ev->Cookie == 1) {
2442+
if (!targetActor) {
2443+
targetActor = ev->Recipient;
2444+
runtime.Send(ev.Release());
2445+
runtime.Send(new IEventHandle(
2446+
*targetActor,
2447+
sender,
2448+
new NHealthCheck::TEvPrivate::TEvRetryNodeWhiteboard(1, TEvWhiteboard::TEvSystemStateRequest::EventType)
2449+
));
2450+
2451+
}
2452+
return TTestActorRuntime::EEventAction::DROP;
2453+
}
2454+
break;
2455+
}
2456+
}
2457+
return TTestActorRuntime::EEventAction::PROCESS;
2458+
};
2459+
runtime.SetObserverFunc(observerFunc);
2460+
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
2461+
2462+
auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
2463+
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
2464+
}
24192465
}
24202466
}

0 commit comments

Comments
 (0)