26
26
static int accelerator_cuda_check_addr (const void * addr , int * dev_id , uint64_t * flags );
27
27
static int accelerator_cuda_create_stream (int dev_id , opal_accelerator_stream_t * * stream );
28
28
29
- static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event );
29
+ static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event , bool enable_ipc );
30
30
static int accelerator_cuda_record_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream );
31
31
static int accelerator_cuda_query_event (int dev_id , opal_accelerator_event_t * event );
32
+ static int accelerator_cuda_wait_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream );
32
33
33
34
static int accelerator_cuda_memcpy_async (int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
34
35
opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
@@ -44,10 +45,14 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
44
45
static bool accelerator_cuda_is_ipc_enabled (void );
45
46
static int accelerator_cuda_get_ipc_handle (int dev_id , void * dev_ptr ,
46
47
opal_accelerator_ipc_handle_t * handle );
48
+ static int accelerator_cuda_import_ipc_handle (int dev_id , uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
49
+ opal_accelerator_ipc_handle_t * handle );
47
50
static int accelerator_cuda_open_ipc_handle (int dev_id , opal_accelerator_ipc_handle_t * handle ,
48
51
void * * dev_ptr );
49
52
static int accelerator_cuda_get_ipc_event_handle (opal_accelerator_event_t * event ,
50
53
opal_accelerator_ipc_event_handle_t * handle );
54
+ static int accelerator_cuda_import_ipc_event_handle (uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
55
+ opal_accelerator_ipc_event_handle_t * handle );
51
56
static int accelerator_cuda_open_ipc_event_handle (opal_accelerator_ipc_event_handle_t * handle ,
52
57
opal_accelerator_event_t * event );
53
58
@@ -60,6 +65,8 @@ static int accelerator_cuda_device_can_access_peer( int *access, int dev1, int d
60
65
61
66
static int accelerator_cuda_get_buffer_id (int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
62
67
68
+ #define GET_STREAM (_stream ) (_stream == MCA_ACCELERATOR_STREAM_DEFAULT ? 0 : *((CUstream *)_stream->stream))
69
+
63
70
opal_accelerator_base_module_t opal_accelerator_cuda_module =
64
71
{
65
72
accelerator_cuda_check_addr ,
@@ -69,6 +76,7 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
69
76
accelerator_cuda_create_event ,
70
77
accelerator_cuda_record_event ,
71
78
accelerator_cuda_query_event ,
79
+ accelerator_cuda_wait_event ,
72
80
73
81
accelerator_cuda_memcpy_async ,
74
82
accelerator_cuda_memcpy ,
@@ -79,8 +87,10 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
79
87
80
88
accelerator_cuda_is_ipc_enabled ,
81
89
accelerator_cuda_get_ipc_handle ,
90
+ accelerator_cuda_import_ipc_handle ,
82
91
accelerator_cuda_open_ipc_handle ,
83
92
accelerator_cuda_get_ipc_event_handle ,
93
+ accelerator_cuda_import_ipc_event_handle ,
84
94
accelerator_cuda_open_ipc_event_handle ,
85
95
86
96
accelerator_cuda_host_register ,
@@ -260,7 +270,8 @@ static void opal_accelerator_cuda_stream_destruct(opal_accelerator_cuda_stream_t
260
270
{
261
271
CUresult result ;
262
272
263
- if (NULL != stream -> base .stream ) {
273
+ if (MCA_ACCELERATOR_STREAM_DEFAULT != (opal_accelerator_stream_t * )stream &&
274
+ NULL != stream -> base .stream ) {
264
275
result = cuStreamDestroy (* (CUstream * )stream -> base .stream );
265
276
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
266
277
opal_show_help ("help-accelerator-cuda.txt" , "cuStreamDestroy failed" , true,
@@ -276,7 +287,7 @@ OBJ_CLASS_INSTANCE(
276
287
NULL ,
277
288
opal_accelerator_cuda_stream_destruct );
278
289
279
- static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event )
290
+ static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event , bool enable_ipc )
280
291
{
281
292
CUresult result ;
282
293
int delayed_init = opal_accelerator_cuda_delayed_init ();
@@ -294,7 +305,8 @@ static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **
294
305
OBJ_RELEASE (* event );
295
306
return OPAL_ERR_OUT_OF_RESOURCE ;
296
307
}
297
- result = cuEventCreate ((* event )-> event , CU_EVENT_DISABLE_TIMING );
308
+ result = cuEventCreate ((* event )-> event , enable_ipc ? CU_EVENT_DISABLE_TIMING |CU_EVENT_INTERPROCESS :
309
+ CU_EVENT_DISABLE_TIMING );
298
310
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
299
311
opal_show_help ("help-accelerator-cuda.txt" , "cuEventCreate failed" , true,
300
312
OPAL_PROC_MY_HOSTNAME , result );
@@ -328,11 +340,13 @@ static int accelerator_cuda_record_event(int dev_id, opal_accelerator_event_t *e
328
340
{
329
341
CUresult result ;
330
342
331
- if (NULL == stream || NULL == event ) {
343
+ if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream &&
344
+ (NULL == stream || NULL == stream -> stream )) ||
345
+ NULL == event ) {
332
346
return OPAL_ERR_BAD_PARAM ;
333
347
}
334
348
335
- result = cuEventRecord (* (CUevent * )event -> event , * ( CUstream * ) stream -> stream );
349
+ result = cuEventRecord (* (CUevent * )event -> event , GET_STREAM ( stream ) );
336
350
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
337
351
opal_show_help ("help-accelerator-cuda.txt" , "cuEventRecord failed" , true,
338
352
OPAL_PROC_MY_HOSTNAME , result );
@@ -369,6 +383,10 @@ static int accelerator_cuda_query_event(int dev_id, opal_accelerator_event_t *ev
369
383
}
370
384
}
371
385
}
386
+ static int accelerator_cuda_wait_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream )
387
+ {
388
+ return OPAL_ERR_NOT_IMPLEMENTED ;
389
+ }
372
390
373
391
static int accelerator_cuda_memcpy_async (int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
374
392
opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type )
@@ -380,11 +398,12 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
380
398
return delayed_init ;
381
399
}
382
400
383
- if (NULL == stream || NULL == dest || NULL == src || size <= 0 ) {
401
+ if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream && NULL == stream ) ||
402
+ NULL == dest || NULL == src || size <= 0 ) {
384
403
return OPAL_ERR_BAD_PARAM ;
385
404
}
386
405
387
- result = cuMemcpyAsync ((CUdeviceptr ) dest , (CUdeviceptr ) src , size , * ( CUstream * ) stream -> stream );
406
+ result = cuMemcpyAsync ((CUdeviceptr ) dest , (CUdeviceptr ) src , size , GET_STREAM ( stream ) );
388
407
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
389
408
opal_show_help ("help-accelerator-cuda.txt" , "cuMemcpyAsync failed" , true, dest , src ,
390
409
size , result );
@@ -547,6 +566,12 @@ static int accelerator_cuda_get_ipc_handle(int dev_id, void *dev_ptr,
547
566
return OPAL_ERR_NOT_IMPLEMENTED ;
548
567
}
549
568
569
+ static int accelerator_cuda_import_ipc_handle (int dev_id , uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
570
+ opal_accelerator_ipc_handle_t * handle )
571
+ {
572
+ return OPAL_ERR_NOT_IMPLEMENTED ;
573
+ }
574
+
550
575
static int accelerator_cuda_open_ipc_handle (int dev_id , opal_accelerator_ipc_handle_t * handle ,
551
576
void * * dev_ptr )
552
577
{
@@ -559,6 +584,12 @@ static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event
559
584
return OPAL_ERR_NOT_IMPLEMENTED ;
560
585
}
561
586
587
+ static int accelerator_cuda_import_ipc_event_handle (uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
588
+ opal_accelerator_ipc_event_handle_t * handle )
589
+ {
590
+ return OPAL_ERR_NOT_IMPLEMENTED ;
591
+ }
592
+
562
593
static int accelerator_cuda_open_ipc_event_handle (opal_accelerator_ipc_event_handle_t * handle ,
563
594
opal_accelerator_event_t * event )
564
595
{
0 commit comments