Skip to content

Commit 84cc7b2

Browse files
committed
Convert worker-manager TCP server to an HTTP server that exposes queue depth
1 parent a246bac commit 84cc7b2

File tree

6 files changed

+109
-101
lines changed

6 files changed

+109
-101
lines changed

README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ Instead of running `pnpm start:base`, you can alternatively use `pnpm start:all`
8282
| :4201 | `/seed` seed realm || 🚫 |
8383
| :4202 | `/test` host test realm, `/node-test` node test realm || 🚫 |
8484
| :4205 | `/test` realm for matrix client tests (playwright controlled) | 🚫 | 🚫 |
85-
| :4210 | Development Worker Manager (spins up 1 worker by default) || 🚫 |
86-
| :4211 | Test Worker Manager (spins up 1 worker by default) || 🚫 |
87-
| :4212 | Test Worker Manager for matrix client tests (playwright controlled - 1 worker) || 🚫 |
88-
| :4213 | Test Worker Manager for matrix client tests - base realm server (playwright controlled - 1 worker) || 🚫 |
85+
| :4210 | Worker Manager (spins up 1 worker by default in development) || 🚫 |
86+
| :4211 | Worker Manager (spins up 1 worker by default) || 🚫 |
87+
| :4212 | Worker Manager for matrix client tests (playwright controlled - 1 worker) | | 🚫 |
88+
| :4213 | Worker Manager for matrix client tests - base realm server (playwright controlled - 1 worker) || 🚫 |
8989
| :5001 | Mail user interface for viewing emails sent to local SMTP || 🚫 |
9090
| :5435 | Postgres DB || 🚫 |
9191
| :8008 | Matrix synapse server || 🚫 |
@@ -223,7 +223,7 @@ There is a ember-freestyle component explorer available to assist with developme
223223

224224
1. `cd packages/boxel-ui/test-app`
225225
2. `pnpm start`
226-
3. Visit http://localhost:4210/ in your browser
226+
3. Visit http://localhost:4220/ in your browser
227227

228228
## Boxel Motion Demo App
229229

packages/boxel-ui/test-app/.ember-cli.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ module.exports = {
1414
1515
Setting `disableAnalytics` to true will prevent any data from being sent.
1616
*/
17-
port: 4210,
17+
port: 4220,
1818
testPort: 7356,
1919
disableAnalytics: false,
2020
};

packages/realm-server/main.ts

+12-42
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import { NodeAdapter } from './node-realm';
1111
import yargs from 'yargs';
1212
import { RealmServer } from './server';
1313
import { resolve } from 'path';
14-
import { createConnection, type Socket } from 'net';
1514
import { makeFastBootIndexRunner } from './fastboot';
1615
import { shimExternals } from './lib/externals';
1716
import * as Sentry from '@sentry/node';
@@ -331,49 +330,20 @@ let autoMigrate = migrateDB || undefined;
331330
process.exit(-3);
332331
});
333332

334-
let workerReadyDeferred: Deferred<boolean> | undefined;
335333
async function waitForWorkerManager(port: number) {
336-
const workerManager = await new Promise<Socket>((r) => {
337-
let socket = createConnection({ port }, () => {
338-
log.info(`Connected to worker manager on port ${port}`);
339-
r(socket);
340-
});
341-
});
342-
343-
workerManager.on('data', (data) => {
344-
let res = data.toString();
345-
if (!workerReadyDeferred) {
346-
throw new Error(
347-
`received unsolicited message from worker manager on port ${port}`,
348-
);
334+
let isReady = false;
335+
let timeout = Date.now() + 30_000;
336+
do {
337+
let response = await fetch(`http://localhost:${port}/`);
338+
if (response.ok) {
339+
let json = await response.json();
340+
isReady = json.ready;
349341
}
350-
switch (res) {
351-
case 'ready':
352-
case 'not-ready':
353-
workerReadyDeferred.fulfill(res === 'ready' ? true : false);
354-
break;
355-
default:
356-
workerReadyDeferred.reject(
357-
`unexpected response from worker manager: ${res}`,
358-
);
359-
}
360-
});
361-
362-
try {
363-
let isReady = false;
364-
let timeout = Date.now() + 30_000;
365-
do {
366-
workerReadyDeferred = new Deferred();
367-
workerManager.write('ready?');
368-
isReady = await workerReadyDeferred.promise;
369-
} while (!isReady && Date.now() < timeout);
370-
if (!isReady) {
371-
throw new Error(
372-
`timed out trying to connect to worker manager on port ${port}`,
373-
);
374-
}
375-
} finally {
376-
workerManager.end();
342+
} while (!isReady && Date.now() < timeout);
343+
if (!isReady) {
344+
throw new Error(
345+
`timed out trying to waiting for worker manager to be ready on port ${port}`,
346+
);
377347
}
378348
log.info('workers are ready');
379349
}

packages/realm-server/scripts/start-worker-production.sh

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
NODE_NO_WARNINGS=1 \
44
ts-node \
55
--transpileOnly worker-manager \
6+
--port=4210 \
67
--allPriorityCount="${WORKER_ALL_PRIORITY_COUNT:-1}" \
78
--highPriorityCount="${WORKER_HIGH_PRIORITY_COUNT:-0}" \
89
--matrixURL='https://matrix.boxel.ai' \

packages/realm-server/scripts/start-worker-staging.sh

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
NODE_NO_WARNINGS=1 \
44
ts-node \
55
--transpileOnly worker-manager \
6+
--port=4210 \
67
--allPriorityCount="${WORKER_ALL_PRIORITY_COUNT:-1}" \
78
--highPriorityCount="${WORKER_HIGH_PRIORITY_COUNT:-0}" \
89
--matrixURL='https://matrix-staging.stack.cards' \

packages/realm-server/worker-manager.ts

+89-53
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,19 @@ import {
44
logger,
55
userInitiatedPriority,
66
systemInitiatedPriority,
7+
query,
78
} from '@cardstack/runtime-common';
89
import yargs from 'yargs';
910
import * as Sentry from '@sentry/node';
10-
import { createServer } from 'net';
1111
import flattenDeep from 'lodash/flattenDeep';
1212
import { spawn } from 'child_process';
1313
import pluralize from 'pluralize';
14+
import Koa from 'koa';
15+
import Router from '@koa/router';
16+
import { ecsMetadata, fullRequestURL, livenessCheck } from './middleware';
17+
import { PgAdapter } from '@cardstack/postgres';
1418

15-
let log = logger('worker');
19+
let log = logger('worker-manager');
1620

1721
const REALM_SECRET_SEED = process.env.REALM_SECRET_SEED;
1822
if (!REALM_SECRET_SEED) {
@@ -34,8 +38,10 @@ let {
3438
.usage('Start worker manager')
3539
.options({
3640
port: {
37-
description: 'TCP port for worker to communicate readiness (for tests)',
41+
description:
42+
'HTTP port for worker manager to communicate readiness and status',
3843
type: 'number',
44+
demandOption: true,
3945
},
4046
highPriorityCount: {
4147
description:
@@ -75,63 +81,93 @@ let isExiting = false;
7581
process.on('SIGINT', () => (isExiting = true));
7682
process.on('SIGTERM', () => (isExiting = true));
7783

78-
if (port != null) {
79-
// in tests we start a simple TCP server to communicate to the realm when
80-
// the worker is ready to start processing jobs
81-
let server = createServer((socket) => {
82-
log.info(`realm connected to worker manager`);
83-
socket.on('data', (data) => {
84-
if (data.toString() === 'ready?') {
85-
socket.write(isReady ? 'ready' : 'not-ready');
86-
}
87-
});
88-
socket.on('close', (hadError) => {
89-
log.info(`realm has disconnected${hadError ? ' due to an error' : ''}.`);
90-
});
91-
socket.on('error', (err: any) => {
92-
console.error(`realm disconnected from worker manager: ${err.message}`);
93-
});
94-
});
95-
server.unref();
84+
let dbAdapter = new PgAdapter({});
9685

97-
server.listen(port, () => {
98-
log.info(`worker manager listening for realm on port ${port}`);
99-
});
86+
let webServer = new Koa<Koa.DefaultState, Koa.Context>();
87+
let router = new Router();
88+
router.head('/', livenessCheck);
89+
router.get('/', async (ctxt: Koa.Context, _next: Koa.Next) => {
90+
let result = {
91+
ready: isReady,
92+
} as Record<string, boolean | number>;
93+
if (isReady) {
94+
let [{ queue_depth }] = (await query(dbAdapter, [
95+
`SELECT COUNT(*) as queue_depth FROM jobs WHERE status='unfulfilled'`,
96+
])) as {
97+
queue_depth: string;
98+
}[];
99+
result = {
100+
...result,
101+
highPriorityWorkers: highPriorityCount,
102+
allPriorityWorkers: allPriorityCount,
103+
queueDepth: parseInt(queue_depth, 10),
104+
};
105+
}
106+
ctxt.set('Content-Type', 'application/json');
107+
ctxt.body = JSON.stringify(result);
108+
ctxt.status = isReady ? 200 : 503;
109+
});
110+
111+
webServer
112+
.use(router.routes())
113+
.use((ctxt: Koa.Context, next: Koa.Next) => {
114+
log.info(
115+
`<-- ${ctxt.method} ${ctxt.req.headers.accept} ${
116+
fullRequestURL(ctxt).href
117+
}`,
118+
);
100119

101-
const shutdown = () => {
102-
log.info(`Shutting down server for worker manager...`);
103-
server.close((err) => {
104-
if (err) {
105-
log.error(`Error while closing the server for worker manager:`, err);
106-
process.exit(1);
107-
}
108-
log.info(`Server closed for worker manager.`);
109-
process.exit(0);
120+
ctxt.res.on('finish', () => {
121+
log.info(
122+
`--> ${ctxt.method} ${ctxt.req.headers.accept} ${
123+
fullRequestURL(ctxt).href
124+
}: ${ctxt.status}`,
125+
);
126+
log.debug(JSON.stringify(ctxt.req.headers));
110127
});
111-
};
128+
return next();
129+
})
130+
.use(ecsMetadata);
112131

113-
process.on('SIGINT', shutdown);
114-
process.on('SIGTERM', shutdown);
115-
process.on('uncaughtException', (err) => {
116-
log.error(`Uncaught exception in worker manager:`, err);
117-
shutdown();
118-
});
132+
webServer.on('error', (err: any) => {
133+
console.error(`worker manager HTTP server error: ${err.message}`);
134+
});
119135

120-
process.on('message', (message) => {
121-
if (message === 'stop') {
122-
console.log(`stopping realm server on port ${port}...`);
123-
server.close(() => {
124-
console.log(`worker manager on port ${port} has stopped`);
125-
if (process.send) {
126-
process.send('stopped');
127-
}
128-
});
129-
} else if (message === 'kill') {
130-
console.log(`Ending worker manager process for ${port}...`);
131-
process.exit(0);
136+
let webServerInstance = webServer.listen(port);
137+
log.info(`worker manager HTTP listening on port ${port}`);
138+
139+
const shutdown = (onShutdown?: () => void) => {
140+
log.info(`Shutting down server for worker manager...`);
141+
webServerInstance.closeAllConnections();
142+
webServerInstance.close((err?: Error) => {
143+
if (err) {
144+
log.error(`Error while closing the server for worker manager HTTP:`, err);
145+
process.exit(1);
132146
}
147+
dbAdapter.close(); // warning this is async
148+
log.info(`worker manager HTTP on port ${port} has stopped.`);
149+
onShutdown?.();
150+
process.exit(0);
133151
});
134-
}
152+
};
153+
154+
process.on('SIGINT', shutdown);
155+
process.on('SIGTERM', shutdown);
156+
process.on('uncaughtException', (err) => {
157+
log.error(`Uncaught exception in worker manager:`, err);
158+
shutdown();
159+
});
160+
161+
process.on('message', (message) => {
162+
if (message === 'stop') {
163+
shutdown(() => {
164+
process.send?.('stopped');
165+
});
166+
} else if (message === 'kill') {
167+
console.log(`Ending worker manager process for ${port}...`);
168+
process.exit(0);
169+
}
170+
});
135171

136172
(async () => {
137173
log.info(

0 commit comments

Comments
 (0)