Skip to content

Commit 20ff622

Browse files
committed
Capture reasoning content in Matrix events
1 parent 8b4e305 commit 20ff622

File tree

5 files changed

+191
-21
lines changed

5 files changed

+191
-21
lines changed

packages/ai-bot/lib/matrix.ts

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import * as Sentry from '@sentry/node';
55
import { CommandRequest } from '@cardstack/runtime-common/commands';
66
import {
77
APP_BOXEL_COMMAND_REQUESTS_KEY,
8+
APP_BOXEL_REASONING_CONTENT_KEY,
89
APP_BOXEL_MESSAGE_MSGTYPE,
910
} from '@cardstack/runtime-common/matrix-constants';
1011

@@ -57,6 +58,7 @@ export async function sendMessageEvent(
5758
eventIdToReplace: string | undefined,
5859
data: any = {},
5960
commandRequests: Partial<CommandRequest>[] = [],
61+
reasoning: string | undefined = undefined,
6062
) {
6163
log.debug('sending message', body);
6264
let contentObject: IContent = {
@@ -65,6 +67,7 @@ export async function sendMessageEvent(
6567
msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
6668
formatted_body: body,
6769
format: 'org.matrix.custom.html',
70+
[APP_BOXEL_REASONING_CONTENT_KEY]: reasoning,
6871
[APP_BOXEL_COMMAND_REQUESTS_KEY]: commandRequests,
6972
},
7073
...data,
@@ -75,6 +78,7 @@ export async function sendMessageEvent(
7578
msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
7679
formatted_body: body,
7780
format: 'org.matrix.custom.html',
81+
[APP_BOXEL_REASONING_CONTENT_KEY]: reasoning,
7882
[APP_BOXEL_COMMAND_REQUESTS_KEY]: commandRequests,
7983
};
8084
}

packages/ai-bot/lib/responder.ts

+18-4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ export class Responder {
6363

6464
initialMessageSent = false;
6565
responseEventId: string | undefined;
66+
latestReasoning = '';
6667
latestContent = '';
6768
toolCalls: ChatCompletionSnapshot.Choice.Message.ToolCall[] = [];
6869
isStreamingFinished = false;
@@ -90,6 +91,7 @@ export class Responder {
9091
this.toolCalls.map((toolCall) =>
9192
this.toCommandRequest(toolCall as ChatCompletionMessageToolCall),
9293
),
94+
this.latestReasoning,
9395
);
9496
this.messagePromises.push(messagePromise);
9597
await messagePromise;
@@ -100,9 +102,11 @@ export class Responder {
100102
let initialMessage = await sendMessageEvent(
101103
this.client,
102104
this.roomId,
103-
thinkingMessage,
105+
'',
104106
undefined,
105107
{ isStreamingFinished: false },
108+
[],
109+
thinkingMessage,
106110
);
107111
this.responseEventId = initialMessage.event_id;
108112
this.initialMessageSent = true;
@@ -113,7 +117,7 @@ export class Responder {
113117
chunk: OpenAI.Chat.Completions.ChatCompletionChunk,
114118
snapshot: ChatCompletionSnapshot,
115119
) {
116-
const toolCallsSnapshot = snapshot.choices[0].message.tool_calls;
120+
const toolCallsSnapshot = snapshot.choices?.[0]?.message?.tool_calls;
117121
if (toolCallsSnapshot?.length) {
118122
let latestToolCallsJson = JSON.stringify(toolCallsSnapshot);
119123
if (this.toolCallsJson !== latestToolCallsJson) {
@@ -123,7 +127,7 @@ export class Responder {
123127
}
124128
}
125129

126-
let contentSnapshot = snapshot.choices[0].message.content;
130+
let contentSnapshot = snapshot.choices?.[0]?.message?.content;
127131
if (contentSnapshot?.length) {
128132
contentSnapshot = cleanContent(contentSnapshot);
129133
if (this.latestContent !== contentSnapshot) {
@@ -132,7 +136,17 @@ export class Responder {
132136
}
133137
}
134138

135-
if (snapshot.choices[0].finish_reason === 'stop') {
139+
// reasoning does not support snapshots, so we need to handle the delta
140+
let newReasoningContent = chunk.choices?.[0]?.delta?.reasoning;
141+
if (newReasoningContent?.length) {
142+
if (this.latestReasoning === thinkingMessage) {
143+
this.latestReasoning = '';
144+
}
145+
this.latestReasoning = this.latestReasoning + newReasoningContent;
146+
await this.sendMessageEventWithThrottling();
147+
}
148+
149+
if (snapshot.choices?.[0]?.finish_reason === 'stop') {
136150
if (!this.isStreamingFinished) {
137151
this.isStreamingFinished = true;
138152
await this.sendMessageEventWithThrottling();

packages/ai-bot/tests/responding-test.ts

+164-15
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ import FakeTimers from '@sinonjs/fake-timers';
66
import { thinkingMessage } from '../constants';
77
import type { ChatCompletionSnapshot } from 'openai/lib/ChatCompletionStream';
88
import { CommandRequest } from '@cardstack/runtime-common/commands';
9-
import { APP_BOXEL_COMMAND_REQUESTS_KEY } from '@cardstack/runtime-common/matrix-constants';
9+
import {
10+
APP_BOXEL_REASONING_CONTENT_KEY,
11+
APP_BOXEL_COMMAND_REQUESTS_KEY,
12+
} from '@cardstack/runtime-common/matrix-constants';
1013

1114
class FakeMatrixClient implements MatrixClient {
1215
private eventId = 0;
@@ -78,6 +81,26 @@ function snapshotWithContent(content: string): ChatCompletionSnapshot {
7881
};
7982
}
8083

84+
function chunkWithReasoning(
85+
reasoning: string,
86+
): OpenAI.Chat.Completions.ChatCompletionChunk {
87+
return {
88+
choices: [
89+
{
90+
delta: {
91+
reasoning: reasoning,
92+
},
93+
finish_reason: null,
94+
logprobs: null,
95+
index: 0,
96+
},
97+
],
98+
id: '',
99+
created: 0,
100+
model: 'llm',
101+
};
102+
}
103+
81104
function snapshotWithToolCall(
82105
commandRequest: Partial<CommandRequest>,
83106
): ChatCompletionSnapshot {
@@ -146,10 +169,11 @@ module('Responding', (hooks) => {
146169
'Message type should be app.boxel.message',
147170
);
148171
assert.equal(
149-
sentEvents[0].content.body,
172+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
150173
thinkingMessage,
151-
'Message body should match',
174+
'Reasoning content should be thinking message',
152175
);
176+
assert.equal(sentEvents[0].content.body, '', 'Body should be empty');
153177

154178
await responder.ensureThinkingMessageSent();
155179
sentEvents = fakeMatrixClient.getSentEvents();
@@ -171,16 +195,26 @@ module('Responding', (hooks) => {
171195
'Only the initial message and one content message should be sent',
172196
);
173197
assert.equal(
174-
sentEvents[0].content.body,
198+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
175199
thinkingMessage,
176-
'Just the thinking message sent',
200+
'Just the thinking message sent in reasoning',
201+
);
202+
assert.equal(
203+
sentEvents[0].content.body,
204+
'',
205+
'Initial body should be empty',
177206
);
178207

179208
assert.equal(
180209
sentEvents[1].content.body,
181210
'content 0',
182211
'The first new content message should be sent',
183212
);
213+
assert.equal(
214+
sentEvents[1].content[APP_BOXEL_REASONING_CONTENT_KEY],
215+
'',
216+
'No reasoning in content message',
217+
);
184218
assert.deepEqual(
185219
sentEvents[1].content['m.relates_to'],
186220
{
@@ -206,16 +240,26 @@ module('Responding', (hooks) => {
206240
'Only the initial message and one content message should be sent',
207241
);
208242
assert.equal(
209-
sentEvents[0].content.body,
243+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
210244
thinkingMessage,
211-
'Just the thinking message sent',
245+
'Just the thinking message sent in reasoning',
246+
);
247+
assert.equal(
248+
sentEvents[0].content.body,
249+
'',
250+
'Initial body should be empty',
212251
);
213252

214253
assert.equal(
215254
sentEvents[1].content.body,
216255
'content 0',
217256
'The first new content message should be sent',
218257
);
258+
assert.equal(
259+
sentEvents[1].content[APP_BOXEL_REASONING_CONTENT_KEY],
260+
'',
261+
'No reasoning in content message',
262+
);
219263
assert.deepEqual(
220264
sentEvents[1].content['m.relates_to'],
221265
{
@@ -241,6 +285,11 @@ module('Responding', (hooks) => {
241285
'content 9',
242286
'The last new content message should be sent',
243287
);
288+
assert.equal(
289+
sentEvents[2].content[APP_BOXEL_REASONING_CONTENT_KEY],
290+
'',
291+
'No reasoning in content message',
292+
);
244293
assert.deepEqual(
245294
sentEvents[2].content['m.relates_to'],
246295
{
@@ -282,9 +331,14 @@ module('Responding', (hooks) => {
282331
'Thinking message and tool call event should be sent',
283332
);
284333
assert.equal(
285-
sentEvents[0].content.body,
334+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
286335
thinkingMessage,
287-
'Thinking message should be sent first',
336+
'Thinking message should be sent first in reasoning',
337+
);
338+
assert.equal(
339+
sentEvents[0].content.body,
340+
'',
341+
'Initial body should be empty',
288342
);
289343
assert.deepEqual(
290344
sentEvents[1].content[APP_BOXEL_COMMAND_REQUESTS_KEY],
@@ -308,6 +362,11 @@ module('Responding', (hooks) => {
308362
'Tool call event should be sent with correct content',
309363
);
310364
assert.deepEqual(sentEvents[1].content.body, '', 'Body text is empty');
365+
assert.equal(
366+
sentEvents[1].content[APP_BOXEL_REASONING_CONTENT_KEY],
367+
'',
368+
'No reasoning in tool call message',
369+
);
311370
assert.deepEqual(
312371
sentEvents[1].content['m.relates_to'],
313372
{
@@ -362,9 +421,14 @@ module('Responding', (hooks) => {
362421
'Thinking message, and event with content, event with partial tool call, and event with full tool call should be sent',
363422
);
364423
assert.equal(
365-
sentEvents[0].content.body,
424+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
366425
thinkingMessage,
367-
'Thinking message should be sent first',
426+
'Thinking message should be sent first in reasoning',
427+
);
428+
assert.equal(
429+
sentEvents[0].content.body,
430+
'',
431+
'Initial body should be empty',
368432
);
369433
assert.deepEqual(
370434
sentEvents[2].content[APP_BOXEL_COMMAND_REQUESTS_KEY],
@@ -400,9 +464,9 @@ module('Responding', (hooks) => {
400464
'Tool call event should be sent with correct content',
401465
);
402466
assert.equal(
403-
sentEvents[0].content.body,
467+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
404468
thinkingMessage,
405-
'Thinking message should be sent first',
469+
'Thinking message should be sent first in reasoning',
406470
);
407471
assert.deepEqual(
408472
sentEvents[1].content['m.relates_to'],
@@ -500,9 +564,14 @@ module('Responding', (hooks) => {
500564
'Thinking message, and event with content, and event with two tool calls should be sent',
501565
);
502566
assert.equal(
503-
sentEvents[0].content.body,
567+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
504568
thinkingMessage,
505-
'Thinking message should be sent first',
569+
'Thinking message should be sent first in reasoning',
570+
);
571+
assert.equal(
572+
sentEvents[0].content.body,
573+
'',
574+
'Initial body should be empty',
506575
);
507576
assert.deepEqual(
508577
sentEvents[2].content[APP_BOXEL_COMMAND_REQUESTS_KEY],
@@ -547,4 +616,84 @@ module('Responding', (hooks) => {
547616
'The replacement event with the tool calls should replace the original message',
548617
);
549618
});
619+
620+
test('Handles sequence of thinking -> reasoning -> content correctly', async () => {
621+
await responder.ensureThinkingMessageSent();
622+
623+
// Initial state - thinking message
624+
let sentEvents = fakeMatrixClient.getSentEvents();
625+
assert.equal(sentEvents.length, 1, 'Initial thinking message sent');
626+
assert.equal(
627+
sentEvents[0].content[APP_BOXEL_REASONING_CONTENT_KEY],
628+
thinkingMessage,
629+
'Initial thinking message in reasoning',
630+
);
631+
assert.equal(sentEvents[0].content.body, '', 'Initial body empty');
632+
633+
// First reasoning update
634+
await responder.onChunk(chunkWithReasoning('reasoning step 1'), {} as any);
635+
sentEvents = fakeMatrixClient.getSentEvents();
636+
assert.equal(sentEvents.length, 2, 'First reasoning update sent');
637+
assert.equal(
638+
sentEvents[1].content[APP_BOXEL_REASONING_CONTENT_KEY],
639+
'reasoning step 1',
640+
'First reasoning content',
641+
);
642+
assert.equal(sentEvents[1].content.body, '', 'Body still empty');
643+
644+
// Second reasoning update
645+
await responder.onChunk(chunkWithReasoning(' and 2'), {} as any);
646+
clock.tick(250); // Advance clock to trigger throttled update
647+
sentEvents = fakeMatrixClient.getSentEvents();
648+
assert.equal(sentEvents.length, 3, 'Second reasoning update sent');
649+
assert.equal(
650+
sentEvents[2].content[APP_BOXEL_REASONING_CONTENT_KEY],
651+
'reasoning step 1 and 2',
652+
'Second reasoning content',
653+
);
654+
assert.equal(sentEvents[2].content.body, '', 'Body still empty');
655+
656+
// First content update
657+
await responder.onChunk({} as any, snapshotWithContent('content step 1'));
658+
sentEvents = fakeMatrixClient.getSentEvents();
659+
assert.equal(sentEvents.length, 4, 'First content update sent');
660+
assert.equal(
661+
sentEvents[3].content.body,
662+
'content step 1',
663+
'First content body',
664+
);
665+
assert.equal(
666+
sentEvents[3].content[APP_BOXEL_REASONING_CONTENT_KEY],
667+
'reasoning step 1 and 2',
668+
'Reasoning preserved with content update',
669+
);
670+
671+
// Second content update
672+
await responder.onChunk({} as any, snapshotWithContent('content step 2'));
673+
clock.tick(250); // Advance clock to trigger throttled update
674+
sentEvents = fakeMatrixClient.getSentEvents();
675+
assert.equal(sentEvents.length, 5, 'Second content update sent');
676+
assert.equal(
677+
sentEvents[4].content.body,
678+
'content step 2',
679+
'Second content body',
680+
);
681+
assert.equal(
682+
sentEvents[4].content[APP_BOXEL_REASONING_CONTENT_KEY],
683+
'reasoning step 1 and 2',
684+
'Reasoning still preserved',
685+
);
686+
687+
// Verify all updates replaced the original message
688+
for (let i = 1; i < sentEvents.length; i++) {
689+
assert.deepEqual(
690+
sentEvents[i].content['m.relates_to'],
691+
{
692+
rel_type: 'm.replace',
693+
event_id: '0',
694+
},
695+
`Update ${i} replaced original message`,
696+
);
697+
}
698+
});
550699
});

0 commit comments

Comments
 (0)