Skip to content

Commit

Permalink
better logging, another test, make maximumNetworkErrorRetryCount conf…
Browse files Browse the repository at this point in the history
…igurable
  • Loading branch information
toger5 committed Feb 27, 2025
1 parent 4a2d35b commit 631de6e
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 24 deletions.
25 changes: 22 additions & 3 deletions spec/unit/matrixrtc/MembershipManager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -655,14 +655,33 @@ describe.each([
});
it("falls back to using pure state events when some error occurs while sending delayed events !FailsForLegacy", async () => {
const unrecoverableError = jest.fn();
(client._unstable_sendDelayedStateEvent as Mock<any>).mockRejectedValue(new HTTPError("unknown", 501));
(client._unstable_sendDelayedStateEvent as Mock<any>).mockRejectedValue(new HTTPError("unknown", 601));
const manager = new TestMembershipManager({}, room, client, () => undefined);
manager.join([focus], focusActive, unrecoverableError);
await jest.advanceTimersByTimeAsync(1);

await waitForMockCall(client.sendStateEvent);
expect(unrecoverableError).not.toHaveBeenCalledWith();
expect(client.sendStateEvent).toHaveBeenCalled();
});
it("retries before failing in case its a network error !FailsForLegacy", async () => {
const unrecoverableError = jest.fn();
(client._unstable_sendDelayedStateEvent as Mock<any>).mockRejectedValue(new HTTPError("unknown", 501));
const manager = new TestMembershipManager(
{ callMemberEventRetryDelayMinimum: 1000, maximumNetworkErrorRetryCount: 7 },
room,
client,
() => undefined,
);
manager.join([focus], focusActive, unrecoverableError);
for (let retries = 0; retries < 7; retries++) {
expect(client._unstable_sendDelayedStateEvent).toHaveBeenCalledTimes(retries + 1);
await jest.advanceTimersByTimeAsync(1000);
}
expect(unrecoverableError).toHaveBeenCalled();
expect(unrecoverableError.mock.lastCall![0].message).toMatch(
"The MembershipManager has to shut down because of the end condition: Error: Reached maximum",
);
expect(client.sendStateEvent).not.toHaveBeenCalled();
});
it("falls back to using pure state events when UnsupportedEndpointError encountered for delayed events !FailsForLegacy", async () => {
const unrecoverableError = jest.fn();
(client._unstable_sendDelayedStateEvent as Mock<any>).mockRejectedValue(
Expand Down
6 changes: 6 additions & 0 deletions src/matrixrtc/MatrixRTCSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,16 @@ export interface MembershipConfig {
* @deprecated It should be possible to make it stable without this.
*/
callMemberEventRetryJitter?: number;

/**
* The maximum number of retries that the manager will do for delayed event sending/updating and state event sending when a server rate limit has been hit.
*/
maximumRateLimitRetryCount?: number;

/**
* The maximum number of retries that the manager will do for delayed event sending/updating and state event sending when a network error occurs.
*/
maximumNetworkErrorRetryCount?: number;
}

export interface EncryptionConfig {
Expand Down
55 changes: 34 additions & 21 deletions src/matrixrtc/NewMembershipManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ export class MembershipManager implements IMembershipManager {
this.scheduler.state.running = true;
this.scheduler
.startWithActions([{ ts: Date.now(), type: DirectMembershipManagerAction.Join }])
.catch((e) => onError?.(e));
.catch((e) => {
onError?.(e);
});
}
}

Expand Down Expand Up @@ -442,10 +444,10 @@ export class MembershipManager implements IMembershipManager {
private get maximumRateLimitRetryCount(): number {
return this.joinConfig?.maximumRateLimitRetryCount ?? 10;
}
private get maximumRetryCount(): number {
// TODO allow configuring this via `MembershipConfig`.
return 10;
private get maximumNetworkErrorRetryCount(): number {
return this.joinConfig?.maximumNetworkErrorRetryCount ?? 10;
}

// Scheduler:
private scheduler = new ActionScheduler(ActionScheduler.defaultState, this.membershipLoopHandler.bind(this));

Expand Down Expand Up @@ -482,26 +484,28 @@ export class MembershipManager implements IMembershipManager {
});
return;
}
if (this.retryOnNetworkError(e, type)) return;

// log and fall through
if (this.unsupportedDelayedEndpoint(e)) {
logger.info("Not using delayed event because the endpoint is not supported");
this.scheduler.addAction({
ts: Date.now(),
type: MembershipActionType.SendJoinEvent,
});
return;
} else {
logger.info("Not using delayed event because: " + e);
}

if (this.retryOnNetworkError(e, type)) return;

logger.info("Not using delayed event because: " + e.getMessage());
// On any other error we fall back to not using delayed events and send the join state event immediately
this.scheduler.addAction({
ts: Date.now(),
type: MembershipActionType.SendJoinEvent,
});
});
} else {
// Restart case with delayed id.
// This can happen if someone else (or another client) removes our own membership event.
// It will trigger `onRTCSessionMemberUpdate` queue `MembershipActionType.SendFirstDelayedEvent`.
// We might still have our delyed event from the previous participation and dependent on the serve this might not
// get automatically removed if the state changes. Hence It would remove our membership unexpectedly shortly after the rejoin.
//
// In this block we will try to cancel this delayed event before setting up a new one.

// Remove all running updates and restarts
this.scheduler.resetActions([]);
await this.client
Expand Down Expand Up @@ -702,7 +706,6 @@ export class MembershipManager implements IMembershipManager {
})
.catch((e) => {
if (this.rateLimitErrorHandler(e, "sendStateEvent", type)) return;
// TODO add timeout/netowrk error (or just the below)

// Event sending retry (different to rate limit retries)
if (this.retryOnNetworkError(e, type)) return;
Expand Down Expand Up @@ -832,7 +835,7 @@ export class MembershipManager implements IMembershipManager {
}

// Failiour
throw Error("Exceeded maximum retries for " + type + " attempts (client." + method + "): " + error.message);
throw Error("Exceeded maximum retries for " + type + " attempts (client." + method + "): " + (error as Error));
}

/**
Expand All @@ -846,30 +849,40 @@ export class MembershipManager implements IMembershipManager {
*/
private retryOnNetworkError(error: unknown, type: MembershipActionType): boolean {
// "Is a network error"-boundary
const retries = this.scheduler.state.networkErrorRetries.get(type) ?? 0;
const retryDurationString = this.callMemberEventRetryDelayMinimum / 1000 + "s";
const retryCounterString = "(" + retries + "/" + this.maximumNetworkErrorRetryCount + ")";
if (error instanceof Error && error.name === "AbortError") {
logger.warn("Network local timeout error while sending event, retrying in " + retryDurationString, error);
logger.warn(
"Network local timeout error while sending event, retrying in " +
retryDurationString +
" " +
retryCounterString,
error,
);
} else if (
error instanceof HTTPError &&
typeof error.httpStatus === "number" &&
error.httpStatus >= 500 &&
error.httpStatus < 600
) {
logger.warn("Network error while sending event, retrying in " + retryDurationString, error);
logger.warn(
"Network error while sending event, retrying in " + retryDurationString + " " + retryCounterString,
error,
);
} else {
return false;
}

// retry boundary
const retries = this.scheduler.state.networkErrorRetries.get(type) ?? 0;
if (retries < this.maximumRetryCount) {
if (retries < this.maximumNetworkErrorRetryCount) {
this.scheduler.state.networkErrorRetries.set(type, retries + 1);
this.scheduler.addAction({ ts: Date.now() + this.callMemberEventRetryDelayMinimum, type });
return true;
}

// Failiour
throw Error("Reached maximum (" + this.maximumRetryCount + ") retries cause by: " + error);
throw Error("Reached maximum (" + this.maximumNetworkErrorRetryCount + ") retries cause by: " + error);
}

/**
Expand Down

0 comments on commit 631de6e

Please sign in to comment.