Skip to content

Commit 9c83ef9

Browse files
xiaoxmengfacebook-github-bot
authored andcommitted
Fix server crash caused by memory pool abort race (facebookincubator#9408)
Summary: When memory arbitrator aborts a query, it waits for all the driver threads of the query to finish, and then abort the query operators by freeing their memory resources. The query operator or driver thread which is currently under memory arbitration is under suspended driver state which is not counted as running. We shouldn't abort such operator as it might be under memory processing within a component such as row container in hash build, and the component might be reset by operator abort. Even though we won't continue the query procesing but the excception throw might need to access the component state on the exception return path. This PR fixes this problem by avoiding abort an operator which driver is running and under suspension state. Pull Request resolved: facebookincubator#9408 Reviewed By: bikramSingh91, oerling Differential Revision: D55883111 Pulled By: xiaoxmeng fbshipit-source-id: c5fd68401bf82be92db2b9f34b8e8a7d1a7f13c2
1 parent fa177d3 commit 9c83ef9

File tree

5 files changed

+238
-398
lines changed

5 files changed

+238
-398
lines changed

velox/common/memory/tests/SharedArbitratorTest.cpp

-66
Original file line numberDiff line numberDiff line change
@@ -709,72 +709,6 @@ DEBUG_ONLY_TEST_F(
709709
waitForAllTasksToBeDeleted();
710710
}
711711

712-
DEBUG_ONLY_TEST_F(SharedArbitrationTest, raceBetweenMaybeReserveAndTaskAbort) {
713-
setupMemory(kMemoryCapacity, 0);
714-
const int numVectors = 10;
715-
std::vector<RowVectorPtr> vectors;
716-
for (int i = 0; i < numVectors; ++i) {
717-
vectors.push_back(makeRowVector(rowType_, fuzzerOpts_));
718-
}
719-
createDuckDbTable(vectors);
720-
721-
auto queryCtx = newQueryCtx(memoryManager_, executor_, kMemoryCapacity);
722-
ASSERT_EQ(queryCtx->pool()->capacity(), 0);
723-
724-
// Create a fake query to hold some memory to trigger memory arbitration.
725-
auto fakeQueryCtx = newQueryCtx(memoryManager_, executor_, kMemoryCapacity);
726-
auto fakeLeafPool = fakeQueryCtx->pool()->addLeafChild(
727-
"fakeLeaf", true, FakeMemoryReclaimer::create());
728-
TestAllocation fakeAllocation{
729-
fakeLeafPool.get(),
730-
fakeLeafPool->allocate(kMemoryCapacity / 3),
731-
kMemoryCapacity / 3};
732-
733-
std::unique_ptr<TestAllocation> injectAllocation;
734-
std::atomic<bool> injectAllocationOnce{true};
735-
SCOPED_TESTVALUE_SET(
736-
"facebook::velox::common::memory::MemoryPoolImpl::maybeReserve",
737-
std::function<void(memory::MemoryPool*)>([&](memory::MemoryPool* pool) {
738-
if (!injectAllocationOnce.exchange(false)) {
739-
return;
740-
}
741-
// The injection memory allocation (with the given size) makes sure that
742-
// maybeReserve fails and abort this query itself.
743-
const size_t injectAllocationSize =
744-
pool->freeBytes() + arbitrator_->stats().freeCapacityBytes;
745-
injectAllocation.reset(new TestAllocation{
746-
fakeLeafPool.get(),
747-
fakeLeafPool->allocate(injectAllocationSize),
748-
injectAllocationSize});
749-
}));
750-
751-
const int numDrivers = 1;
752-
const auto spillDirectory = exec::test::TempDirectoryPath::create();
753-
std::thread queryThread([&]() {
754-
VELOX_ASSERT_THROW(
755-
AssertQueryBuilder(duckDbQueryRunner_)
756-
.queryCtx(queryCtx)
757-
.spillDirectory(spillDirectory->path)
758-
.config(core::QueryConfig::kSpillEnabled, "true")
759-
.config(core::QueryConfig::kJoinSpillEnabled, "true")
760-
.config(core::QueryConfig::kSpillNumPartitionBits, "2")
761-
.maxDrivers(numDrivers)
762-
.plan(PlanBuilder()
763-
.values(vectors)
764-
.localPartition({"c0", "c1"})
765-
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
766-
.localPartition(std::vector<std::string>{})
767-
.planNode())
768-
.copyResults(pool()),
769-
"Exceeded memory pool cap");
770-
});
771-
772-
queryThread.join();
773-
fakeAllocation.free();
774-
injectAllocation->free();
775-
waitForAllTasksToBeDeleted();
776-
}
777-
778712
DEBUG_ONLY_TEST_F(SharedArbitrationTest, asyncArbitratonFromNonDriverContext) {
779713
setupMemory(kMemoryCapacity, 0);
780714
const int numVectors = 10;

velox/exec/Operator.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -665,14 +665,20 @@ void Operator::MemoryReclaimer::abort(
665665
memory::MemoryPool* pool,
666666
const std::exception_ptr& /* error */) {
667667
std::shared_ptr<Driver> driver = ensureDriver();
668-
if (FOLLY_UNLIKELY(driver == nullptr)) {
668+
if (driver == nullptr) {
669669
return;
670670
}
671671
VELOX_CHECK_EQ(pool->name(), op_->pool()->name());
672672
VELOX_CHECK(
673673
!driver->state().isOnThread() || driver->state().isSuspended ||
674674
driver->state().isTerminated);
675675
VELOX_CHECK(driver->task()->isCancelled());
676+
if (driver->state().isOnThread() && driver->state().isSuspended) {
677+
// We can't abort an operator if it is running on a driver thread and
678+
// suspended for memory arbitration. Otherwise, it might cause random crash
679+
// when the driver thread throws after detects the aborted query.
680+
return;
681+
}
676682

677683
// Calls operator close to free up major memory usage.
678684
op_->close();

velox/exec/tests/AggregationTest.cpp

+35-82
Original file line numberDiff line numberDiff line change
@@ -2774,7 +2774,6 @@ void abortPool(memory::MemoryPool* pool) {
27742774
} // namespace
27752775

27762776
DEBUG_ONLY_TEST_F(AggregationTest, abortDuringOutputProcessing) {
2777-
constexpr int64_t kMaxBytes = 1LL << 30; // 1GB
27782777
auto rowType = ROW({"c0", "c1", "c2"}, {INTEGER(), INTEGER(), INTEGER()});
27792778
auto batches = makeVectors(rowType, 1000, 10);
27802779

@@ -2792,77 +2791,53 @@ DEBUG_ONLY_TEST_F(AggregationTest, abortDuringOutputProcessing) {
27922791

27932792
for (const auto& testData : testSettings) {
27942793
SCOPED_TRACE(testData.debugString());
2795-
auto queryCtx = std::make_shared<core::QueryCtx>(executor_.get());
2796-
queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool(
2797-
queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create()));
27982794
auto expectedResult =
27992795
AssertQueryBuilder(
28002796
PlanBuilder()
28012797
.values(batches)
28022798
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
28032799
.planNode())
2804-
.queryCtx(queryCtx)
28052800
.copyResults(pool_.get());
28062801

2807-
folly::EventCount driverWait;
2808-
auto driverWaitKey = driverWait.prepareWait();
2809-
folly::EventCount testWait;
2810-
auto testWaitKey = testWait.prepareWait();
2811-
28122802
std::atomic_bool injectOnce{true};
2813-
Operator* op;
28142803
SCOPED_TESTVALUE_SET(
28152804
"facebook::velox::exec::Driver::runInternal::noMoreInput",
2816-
std::function<void(Operator*)>(([&](Operator* testOp) {
2817-
if (testOp->operatorType() != "Aggregation") {
2805+
std::function<void(Operator*)>(([&](Operator* op) {
2806+
if (op->operatorType() != "Aggregation") {
28182807
return;
28192808
}
2820-
op = testOp;
28212809
if (!injectOnce.exchange(false)) {
28222810
return;
28232811
}
28242812
auto* driver = op->testingOperatorCtx()->driver();
28252813
ASSERT_EQ(
28262814
driver->task()->enterSuspended(driver->state()),
28272815
StopReason::kNone);
2828-
testWait.notify();
2829-
driverWait.wait(driverWaitKey);
2816+
testData.abortFromRootMemoryPool ? abortPool(op->pool()->root())
2817+
: abortPool(op->pool());
2818+
// We can't directly reclaim memory from this hash build operator as
2819+
// its driver thread is running and in suspension state.
2820+
ASSERT_GT(op->pool()->root()->currentBytes(), 0);
28302821
ASSERT_EQ(
28312822
driver->task()->leaveSuspended(driver->state()),
28322823
StopReason::kAlreadyTerminated);
28332824
VELOX_MEM_POOL_ABORTED("Memory pool aborted");
28342825
})));
28352826

2836-
std::thread taskThread([&]() {
2837-
VELOX_ASSERT_THROW(
2838-
AssertQueryBuilder(
2839-
PlanBuilder()
2840-
.values(batches)
2841-
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
2842-
.planNode())
2843-
.queryCtx(queryCtx)
2844-
.maxDrivers(testData.numDrivers)
2845-
.assertResults(expectedResult),
2846-
"");
2847-
});
2848-
2849-
testWait.wait(testWaitKey);
2850-
ASSERT_TRUE(op != nullptr);
2851-
auto task = op->testingOperatorCtx()->task();
2852-
testData.abortFromRootMemoryPool ? abortPool(queryCtx->pool())
2853-
: abortPool(op->pool());
2854-
ASSERT_TRUE(op->pool()->aborted());
2855-
ASSERT_TRUE(queryCtx->pool()->aborted());
2856-
ASSERT_EQ(queryCtx->pool()->currentBytes(), 0);
2857-
driverWait.notify();
2858-
taskThread.join();
2859-
task.reset();
2827+
VELOX_ASSERT_THROW(
2828+
AssertQueryBuilder(
2829+
PlanBuilder()
2830+
.values(batches)
2831+
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
2832+
.planNode())
2833+
.maxDrivers(testData.numDrivers)
2834+
.assertResults(expectedResult),
2835+
"Memory pool manually aborted");
28602836
waitForAllTasksToBeDeleted();
28612837
}
28622838
}
28632839

28642840
DEBUG_ONLY_TEST_F(AggregationTest, abortDuringInputgProcessing) {
2865-
constexpr int64_t kMaxBytes = 1LL << 30; // 1GB
28662841
auto rowType = ROW({"c0", "c1", "c2"}, {INTEGER(), INTEGER(), INTEGER()});
28672842
auto batches = makeVectors(rowType, 1000, 10);
28682843

@@ -2880,72 +2855,50 @@ DEBUG_ONLY_TEST_F(AggregationTest, abortDuringInputgProcessing) {
28802855

28812856
for (const auto& testData : testSettings) {
28822857
SCOPED_TRACE(testData.debugString());
2883-
auto queryCtx = std::make_shared<core::QueryCtx>(executor_.get());
2884-
queryCtx->testingOverrideMemoryPool(memory::memoryManager()->addRootPool(
2885-
queryCtx->queryId(), kMaxBytes, memory::MemoryReclaimer::create()));
28862858
auto expectedResult =
28872859
AssertQueryBuilder(
28882860
PlanBuilder()
28892861
.values(batches)
28902862
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
28912863
.planNode())
2892-
.queryCtx(queryCtx)
28932864
.copyResults(pool_.get());
28942865

2895-
folly::EventCount driverWait;
2896-
auto driverWaitKey = driverWait.prepareWait();
2897-
folly::EventCount testWait;
2898-
auto testWaitKey = testWait.prepareWait();
2899-
29002866
std::atomic_int numInputs{0};
2901-
Operator* op;
29022867
SCOPED_TESTVALUE_SET(
29032868
"facebook::velox::exec::Driver::runInternal::addInput",
2904-
std::function<void(Operator*)>(([&](Operator* testOp) {
2905-
if (testOp->operatorType() != "Aggregation") {
2869+
std::function<void(Operator*)>(([&](Operator* op) {
2870+
if (op->operatorType() != "Aggregation") {
29062871
return;
29072872
}
2908-
op = testOp;
2909-
++numInputs;
2910-
if (numInputs != 2) {
2873+
if (++numInputs != 2) {
29112874
return;
29122875
}
29132876
auto* driver = op->testingOperatorCtx()->driver();
29142877
ASSERT_EQ(
29152878
driver->task()->enterSuspended(driver->state()),
29162879
StopReason::kNone);
2917-
testWait.notify();
2918-
driverWait.wait(driverWaitKey);
2880+
testData.abortFromRootMemoryPool ? abortPool(op->pool()->root())
2881+
: abortPool(op->pool());
2882+
// We can't directly reclaim memory from this hash build operator as
2883+
// its driver thread is running and in suspension state.
2884+
ASSERT_GT(op->pool()->root()->currentBytes(), 0);
29192885
ASSERT_EQ(
29202886
driver->task()->leaveSuspended(driver->state()),
29212887
StopReason::kAlreadyTerminated);
2888+
ASSERT_TRUE(op->pool()->aborted());
2889+
ASSERT_TRUE(op->pool()->root()->aborted());
29222890
VELOX_MEM_POOL_ABORTED("Memory pool aborted");
29232891
})));
29242892

2925-
std::thread taskThread([&]() {
2926-
VELOX_ASSERT_THROW(
2927-
AssertQueryBuilder(
2928-
PlanBuilder()
2929-
.values(batches)
2930-
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
2931-
.planNode())
2932-
.queryCtx(queryCtx)
2933-
.maxDrivers(testData.numDrivers)
2934-
.assertResults(expectedResult),
2935-
"");
2936-
});
2937-
2938-
testWait.wait(testWaitKey);
2939-
ASSERT_TRUE(op != nullptr);
2940-
auto task = op->testingOperatorCtx()->task();
2941-
testData.abortFromRootMemoryPool ? abortPool(queryCtx->pool())
2942-
: abortPool(op->pool());
2943-
ASSERT_TRUE(op->pool()->aborted());
2944-
ASSERT_TRUE(queryCtx->pool()->aborted());
2945-
ASSERT_EQ(queryCtx->pool()->currentBytes(), 0);
2946-
driverWait.notify();
2947-
taskThread.join();
2948-
task.reset();
2893+
VELOX_ASSERT_THROW(
2894+
AssertQueryBuilder(
2895+
PlanBuilder()
2896+
.values(batches)
2897+
.singleAggregation({"c0", "c1"}, {"array_agg(c2)"})
2898+
.planNode())
2899+
.maxDrivers(testData.numDrivers)
2900+
.assertResults(expectedResult),
2901+
"Memory pool manually aborted");
29492902
waitForAllTasksToBeDeleted();
29502903
}
29512904
}

0 commit comments

Comments
 (0)