Skip to content

Commit 157d3c3

Browse files
authored
do not propagate disk issues to group level when disk is operational (#16993)
1 parent 3df4531 commit 157d3c3

File tree

2 files changed

+6
-20
lines changed

2 files changed

+6
-20
lines changed

ydb/core/health_check/health_check.cpp

-10
Original file line numberDiff line numberDiff line change
@@ -2293,10 +2293,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
22932293
break;
22942294
}
22952295

2296-
context.ReportWithMaxChildStatus("VDisk have space issue",
2297-
ETags::VDiskState,
2298-
{ETags::PDiskSpace});
2299-
23002296
storageVDiskStatus.set_overall(context.GetOverallStatus());
23012297
}
23022298

@@ -2562,8 +2558,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
25622558
if (ErasureSpecies == NONE) {
25632559
if (FailedDisks > 0) {
25642560
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState});
2565-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
2566-
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
25672561
}
25682562
} else if (ErasureSpecies == BLOCK_4_2) {
25692563
if (FailedDisks > 2) {
@@ -2576,8 +2570,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
25762570
} else {
25772571
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
25782572
}
2579-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
2580-
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
25812573
}
25822574
} else if (ErasureSpecies == MIRROR_3_DC) {
25832575
if (FailedRealms.size() > 2 || (FailedRealms.size() == 2 && FailedRealms[0].second > 1 && FailedRealms[1].second > 1)) {
@@ -2590,8 +2582,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
25902582
} else {
25912583
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
25922584
}
2593-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
2594-
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
25952585
}
25962586
}
25972587
}

ydb/core/health_check/health_check_ut.cpp

+6-10
Original file line numberDiff line numberDiff line change
@@ -719,25 +719,21 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
719719
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
720720
}
721721

722-
Y_UNIT_TEST(YellowGroupIssueOnYellowSpace) {
722+
Y_UNIT_TEST(OnlyDiskIssueOnSpaceIssues) {
723723
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.9);
724724
Cerr << result.ShortDebugString() << Endl;
725-
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1);
725+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 0);
726726
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0);
727+
CheckHcResultHasIssuesWithStatus(result, "PDISK", Ydb::Monitoring::StatusFlag::YELLOW, 3, "");
727728
}
728729

729-
Y_UNIT_TEST(RedGroupIssueOnRedSpace) {
730-
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.95);
731-
Cerr << result.ShortDebugString() << Endl;
732-
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1);
733-
}
734-
735-
Y_UNIT_TEST(YellowIssueReadyVDisksOnFaultyPDisks) {
730+
Y_UNIT_TEST(OnlyDiskIssueOnFaultyPDisks) {
736731
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, {NKikimrBlobStorage::READY, NKikimrBlobStorage::FAULTY}});
737732
Cerr << result.ShortDebugString() << Endl;
738-
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1);
733+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 0);
739734
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 0);
740735
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0);
736+
CheckHcResultHasIssuesWithStatus(result, "PDISK", Ydb::Monitoring::StatusFlag::RED, 3, "");
741737
}
742738

743739
/* HC currently infers group status on its own, so it's never unknown

0 commit comments

Comments
 (0)