Skip to content

Commit ddb9cc6

Browse files
committed
Fail query runner when nodes do not come up
1 parent 003d86a commit ddb9cc6

File tree

1 file changed

+44
-15
lines changed

1 file changed

+44
-15
lines changed

presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import java.util.Optional;
7777
import java.util.Set;
7878
import java.util.concurrent.ThreadLocalRandom;
79+
import java.util.concurrent.TimeoutException;
7980
import java.util.concurrent.atomic.AtomicReference;
8081
import java.util.concurrent.locks.Lock;
8182
import java.util.concurrent.locks.ReadWriteLock;
@@ -362,14 +363,15 @@ private DistributedQueryRunner(
362363
}
363364
prestoClients = prestoClientsBuilder.build();
364365

365-
long start = nanoTime();
366-
while (!allNodesGloballyVisible()) {
367-
Assertions.assertLessThan(nanosSince(start), new Duration(100, SECONDS));
368-
MILLISECONDS.sleep(10);
366+
try {
367+
waitForAllNodesGloballyVisible();
368+
}
369+
catch (TimeoutException e) {
370+
closer.close();
371+
throw e;
369372
}
370-
log.info("Announced servers in %s", nanosSince(start).convertToMostSuccinctTimeUnit());
371373

372-
start = nanoTime();
374+
long start = nanoTime();
373375
for (TestingPrestoServer server : servers) {
374376
server.getMetadata().registerBuiltInFunctions(AbstractTestQueries.CUSTOM_FUNCTIONS);
375377
}
@@ -517,22 +519,49 @@ else if (coordinatorSidecar) {
517519
return server;
518520
}
519521

520-
private boolean allNodesGloballyVisible()
522+
private void waitForAllNodesGloballyVisible()
523+
throws Exception
521524
{
522-
int expectedActiveNodesForRm = externalWorkers.size() + servers.size();
523-
int expectedActiveNodesForCoordinator = externalWorkers.size() + servers.size();
525+
long startTimeInMs = nanoTime();
526+
int expectedActiveNodes = externalWorkers.size() + servers.size();
527+
Duration timeout = new Duration(100, SECONDS);
524528

525-
for (TestingPrestoServer server : servers) {
529+
for (int serverIndex = 0; serverIndex < servers.size(); ) {
530+
TestingPrestoServer server = servers.get(serverIndex);
526531
AllNodes allNodes = server.refreshNodes();
527532
int activeNodeCount = allNodes.getActiveNodes().size();
528533

529-
if (!allNodes.getInactiveNodes().isEmpty() ||
530-
(server.isCoordinator() && activeNodeCount != expectedActiveNodesForCoordinator) ||
531-
(server.isResourceManager() && activeNodeCount != expectedActiveNodesForRm)) {
532-
return false;
534+
if (!allNodes.getInactiveNodes().isEmpty()) {
535+
throwTimeoutIfNotReady(
536+
startTimeInMs,
537+
timeout,
538+
format("Timed out waiting for all nodes to be globally visible. Inactive nodes: %s", allNodes.getInactiveNodes()));
539+
MILLISECONDS.sleep(10);
540+
serverIndex = 0;
541+
}
542+
else if ((server.isCoordinator() || server.isResourceManager()) && activeNodeCount != expectedActiveNodes) {
543+
throwTimeoutIfNotReady(
544+
startTimeInMs,
545+
timeout,
546+
format("Timed out waiting for all nodes to be globally visible. Node count: %s, expected: %s", activeNodeCount, expectedActiveNodes));
547+
MILLISECONDS.sleep(10);
548+
serverIndex = 0;
549+
}
550+
else {
551+
log.info("Server %s has %s active nodes", server.getBaseUrl(), activeNodeCount);
552+
serverIndex++;
533553
}
534554
}
535-
return true;
555+
556+
log.info("Announced servers in %s", nanosSince(startTimeInMs).convertToMostSuccinctTimeUnit());
557+
}
558+
559+
private static void throwTimeoutIfNotReady(long startTimeInMs, Duration timeout, String message)
560+
throws TimeoutException
561+
{
562+
if (nanosSince(startTimeInMs).compareTo(timeout) >= 0) {
563+
throw new TimeoutException(message);
564+
}
536565
}
537566

538567
public TestingPrestoClient getRandomClient()

0 commit comments

Comments
 (0)