@@ -6,7 +6,10 @@ import FakeTimers from '@sinonjs/fake-timers';
6
6
import { thinkingMessage } from '../constants' ;
7
7
import type { ChatCompletionSnapshot } from 'openai/lib/ChatCompletionStream' ;
8
8
import { CommandRequest } from '@cardstack/runtime-common/commands' ;
9
- import { APP_BOXEL_COMMAND_REQUESTS_KEY } from '@cardstack/runtime-common/matrix-constants' ;
9
+ import {
10
+ APP_BOXEL_REASONING_CONTENT_KEY ,
11
+ APP_BOXEL_COMMAND_REQUESTS_KEY ,
12
+ } from '@cardstack/runtime-common/matrix-constants' ;
10
13
11
14
class FakeMatrixClient implements MatrixClient {
12
15
private eventId = 0 ;
@@ -78,6 +81,26 @@ function snapshotWithContent(content: string): ChatCompletionSnapshot {
78
81
} ;
79
82
}
80
83
84
+ function chunkWithReasoning (
85
+ reasoning : string ,
86
+ ) : OpenAI . Chat . Completions . ChatCompletionChunk {
87
+ return {
88
+ choices : [
89
+ {
90
+ delta : {
91
+ reasoning : reasoning ,
92
+ } ,
93
+ finish_reason : null ,
94
+ logprobs : null ,
95
+ index : 0 ,
96
+ } ,
97
+ ] ,
98
+ id : '' ,
99
+ created : 0 ,
100
+ model : 'llm' ,
101
+ } ;
102
+ }
103
+
81
104
function snapshotWithToolCall (
82
105
commandRequest : Partial < CommandRequest > ,
83
106
) : ChatCompletionSnapshot {
@@ -146,10 +169,11 @@ module('Responding', (hooks) => {
146
169
'Message type should be app.boxel.message' ,
147
170
) ;
148
171
assert . equal (
149
- sentEvents [ 0 ] . content . body ,
172
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
150
173
thinkingMessage ,
151
- 'Message body should match ' ,
174
+ 'Reasoning content should be thinking message ' ,
152
175
) ;
176
+ assert . equal ( sentEvents [ 0 ] . content . body , '' , 'Body should be empty' ) ;
153
177
154
178
await responder . ensureThinkingMessageSent ( ) ;
155
179
sentEvents = fakeMatrixClient . getSentEvents ( ) ;
@@ -171,16 +195,26 @@ module('Responding', (hooks) => {
171
195
'Only the initial message and one content message should be sent' ,
172
196
) ;
173
197
assert . equal (
174
- sentEvents [ 0 ] . content . body ,
198
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
175
199
thinkingMessage ,
176
- 'Just the thinking message sent' ,
200
+ 'Just the thinking message sent in reasoning' ,
201
+ ) ;
202
+ assert . equal (
203
+ sentEvents [ 0 ] . content . body ,
204
+ '' ,
205
+ 'Initial body should be empty' ,
177
206
) ;
178
207
179
208
assert . equal (
180
209
sentEvents [ 1 ] . content . body ,
181
210
'content 0' ,
182
211
'The first new content message should be sent' ,
183
212
) ;
213
+ assert . equal (
214
+ sentEvents [ 1 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
215
+ '' ,
216
+ 'No reasoning in content message' ,
217
+ ) ;
184
218
assert . deepEqual (
185
219
sentEvents [ 1 ] . content [ 'm.relates_to' ] ,
186
220
{
@@ -206,16 +240,26 @@ module('Responding', (hooks) => {
206
240
'Only the initial message and one content message should be sent' ,
207
241
) ;
208
242
assert . equal (
209
- sentEvents [ 0 ] . content . body ,
243
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
210
244
thinkingMessage ,
211
- 'Just the thinking message sent' ,
245
+ 'Just the thinking message sent in reasoning' ,
246
+ ) ;
247
+ assert . equal (
248
+ sentEvents [ 0 ] . content . body ,
249
+ '' ,
250
+ 'Initial body should be empty' ,
212
251
) ;
213
252
214
253
assert . equal (
215
254
sentEvents [ 1 ] . content . body ,
216
255
'content 0' ,
217
256
'The first new content message should be sent' ,
218
257
) ;
258
+ assert . equal (
259
+ sentEvents [ 1 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
260
+ '' ,
261
+ 'No reasoning in content message' ,
262
+ ) ;
219
263
assert . deepEqual (
220
264
sentEvents [ 1 ] . content [ 'm.relates_to' ] ,
221
265
{
@@ -241,6 +285,11 @@ module('Responding', (hooks) => {
241
285
'content 9' ,
242
286
'The last new content message should be sent' ,
243
287
) ;
288
+ assert . equal (
289
+ sentEvents [ 2 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
290
+ '' ,
291
+ 'No reasoning in content message' ,
292
+ ) ;
244
293
assert . deepEqual (
245
294
sentEvents [ 2 ] . content [ 'm.relates_to' ] ,
246
295
{
@@ -282,9 +331,14 @@ module('Responding', (hooks) => {
282
331
'Thinking message and tool call event should be sent' ,
283
332
) ;
284
333
assert . equal (
285
- sentEvents [ 0 ] . content . body ,
334
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
286
335
thinkingMessage ,
287
- 'Thinking message should be sent first' ,
336
+ 'Thinking message should be sent first in reasoning' ,
337
+ ) ;
338
+ assert . equal (
339
+ sentEvents [ 0 ] . content . body ,
340
+ '' ,
341
+ 'Initial body should be empty' ,
288
342
) ;
289
343
assert . deepEqual (
290
344
sentEvents [ 1 ] . content [ APP_BOXEL_COMMAND_REQUESTS_KEY ] ,
@@ -308,6 +362,11 @@ module('Responding', (hooks) => {
308
362
'Tool call event should be sent with correct content' ,
309
363
) ;
310
364
assert . deepEqual ( sentEvents [ 1 ] . content . body , '' , 'Body text is empty' ) ;
365
+ assert . equal (
366
+ sentEvents [ 1 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
367
+ '' ,
368
+ 'No reasoning in tool call message' ,
369
+ ) ;
311
370
assert . deepEqual (
312
371
sentEvents [ 1 ] . content [ 'm.relates_to' ] ,
313
372
{
@@ -362,9 +421,14 @@ module('Responding', (hooks) => {
362
421
'Thinking message, and event with content, event with partial tool call, and event with full tool call should be sent' ,
363
422
) ;
364
423
assert . equal (
365
- sentEvents [ 0 ] . content . body ,
424
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
366
425
thinkingMessage ,
367
- 'Thinking message should be sent first' ,
426
+ 'Thinking message should be sent first in reasoning' ,
427
+ ) ;
428
+ assert . equal (
429
+ sentEvents [ 0 ] . content . body ,
430
+ '' ,
431
+ 'Initial body should be empty' ,
368
432
) ;
369
433
assert . deepEqual (
370
434
sentEvents [ 2 ] . content [ APP_BOXEL_COMMAND_REQUESTS_KEY ] ,
@@ -400,9 +464,9 @@ module('Responding', (hooks) => {
400
464
'Tool call event should be sent with correct content' ,
401
465
) ;
402
466
assert . equal (
403
- sentEvents [ 0 ] . content . body ,
467
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
404
468
thinkingMessage ,
405
- 'Thinking message should be sent first' ,
469
+ 'Thinking message should be sent first in reasoning ' ,
406
470
) ;
407
471
assert . deepEqual (
408
472
sentEvents [ 1 ] . content [ 'm.relates_to' ] ,
@@ -500,9 +564,14 @@ module('Responding', (hooks) => {
500
564
'Thinking message, and event with content, and event with two tool calls should be sent' ,
501
565
) ;
502
566
assert . equal (
503
- sentEvents [ 0 ] . content . body ,
567
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
504
568
thinkingMessage ,
505
- 'Thinking message should be sent first' ,
569
+ 'Thinking message should be sent first in reasoning' ,
570
+ ) ;
571
+ assert . equal (
572
+ sentEvents [ 0 ] . content . body ,
573
+ '' ,
574
+ 'Initial body should be empty' ,
506
575
) ;
507
576
assert . deepEqual (
508
577
sentEvents [ 2 ] . content [ APP_BOXEL_COMMAND_REQUESTS_KEY ] ,
@@ -547,4 +616,84 @@ module('Responding', (hooks) => {
547
616
'The replacement event with the tool calls should replace the original message' ,
548
617
) ;
549
618
} ) ;
619
+
620
+ test ( 'Handles sequence of thinking -> reasoning -> content correctly' , async ( ) => {
621
+ await responder . ensureThinkingMessageSent ( ) ;
622
+
623
+ // Initial state - thinking message
624
+ let sentEvents = fakeMatrixClient . getSentEvents ( ) ;
625
+ assert . equal ( sentEvents . length , 1 , 'Initial thinking message sent' ) ;
626
+ assert . equal (
627
+ sentEvents [ 0 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
628
+ thinkingMessage ,
629
+ 'Initial thinking message in reasoning' ,
630
+ ) ;
631
+ assert . equal ( sentEvents [ 0 ] . content . body , '' , 'Initial body empty' ) ;
632
+
633
+ // First reasoning update
634
+ await responder . onChunk ( chunkWithReasoning ( 'reasoning step 1' ) , { } as any ) ;
635
+ sentEvents = fakeMatrixClient . getSentEvents ( ) ;
636
+ assert . equal ( sentEvents . length , 2 , 'First reasoning update sent' ) ;
637
+ assert . equal (
638
+ sentEvents [ 1 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
639
+ 'reasoning step 1' ,
640
+ 'First reasoning content' ,
641
+ ) ;
642
+ assert . equal ( sentEvents [ 1 ] . content . body , '' , 'Body still empty' ) ;
643
+
644
+ // Second reasoning update
645
+ await responder . onChunk ( chunkWithReasoning ( ' and 2' ) , { } as any ) ;
646
+ clock . tick ( 250 ) ; // Advance clock to trigger throttled update
647
+ sentEvents = fakeMatrixClient . getSentEvents ( ) ;
648
+ assert . equal ( sentEvents . length , 3 , 'Second reasoning update sent' ) ;
649
+ assert . equal (
650
+ sentEvents [ 2 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
651
+ 'reasoning step 1 and 2' ,
652
+ 'Second reasoning content' ,
653
+ ) ;
654
+ assert . equal ( sentEvents [ 2 ] . content . body , '' , 'Body still empty' ) ;
655
+
656
+ // First content update
657
+ await responder . onChunk ( { } as any , snapshotWithContent ( 'content step 1' ) ) ;
658
+ sentEvents = fakeMatrixClient . getSentEvents ( ) ;
659
+ assert . equal ( sentEvents . length , 4 , 'First content update sent' ) ;
660
+ assert . equal (
661
+ sentEvents [ 3 ] . content . body ,
662
+ 'content step 1' ,
663
+ 'First content body' ,
664
+ ) ;
665
+ assert . equal (
666
+ sentEvents [ 3 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
667
+ 'reasoning step 1 and 2' ,
668
+ 'Reasoning preserved with content update' ,
669
+ ) ;
670
+
671
+ // Second content update
672
+ await responder . onChunk ( { } as any , snapshotWithContent ( 'content step 2' ) ) ;
673
+ clock . tick ( 250 ) ; // Advance clock to trigger throttled update
674
+ sentEvents = fakeMatrixClient . getSentEvents ( ) ;
675
+ assert . equal ( sentEvents . length , 5 , 'Second content update sent' ) ;
676
+ assert . equal (
677
+ sentEvents [ 4 ] . content . body ,
678
+ 'content step 2' ,
679
+ 'Second content body' ,
680
+ ) ;
681
+ assert . equal (
682
+ sentEvents [ 4 ] . content [ APP_BOXEL_REASONING_CONTENT_KEY ] ,
683
+ 'reasoning step 1 and 2' ,
684
+ 'Reasoning still preserved' ,
685
+ ) ;
686
+
687
+ // Verify all updates replaced the original message
688
+ for ( let i = 1 ; i < sentEvents . length ; i ++ ) {
689
+ assert . deepEqual (
690
+ sentEvents [ i ] . content [ 'm.relates_to' ] ,
691
+ {
692
+ rel_type : 'm.replace' ,
693
+ event_id : '0' ,
694
+ } ,
695
+ `Update ${ i } replaced original message` ,
696
+ ) ;
697
+ }
698
+ } ) ;
550
699
} ) ;
0 commit comments