Skip to content

Commit c573b51

Browse files
committed
accelerator: update API
- extend the accelerator API by three more functions - add a new constant for default accelerator - minor bug fixes in the existing files/version - update the create_event interface - implementation of the new IPC functions in the rocm and null component - add stubs for new functions to cuda and ze components Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent 6911950 commit c573b51

File tree

9 files changed

+374
-45
lines changed

9 files changed

+374
-45
lines changed

ompi/mca/pml/ob1/pml_ob1_accelerator.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ int mca_pml_ob1_accelerator_init(void)
223223

224224
/* Create the events since they can be reused. */
225225
for (i = 0; i < accelerator_event_max; i++) {
226-
result = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_dtoh_array[i]);
226+
result = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_dtoh_array[i], false);
227227
if (OPAL_SUCCESS != result) {
228228
opal_output_verbose(1, mca_pml_ob1_output, "Accelerator create event failed.");
229229
rc = OPAL_ERROR;
@@ -256,7 +256,7 @@ int mca_pml_ob1_accelerator_init(void)
256256

257257
/* Create the events since they can be reused. */
258258
for (i = 0; i < accelerator_event_max; i++) {
259-
result = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_htod_array[i]);
259+
result = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_htod_array[i], false);
260260
if (OPAL_SUCCESS != result) {
261261
opal_output_verbose(1, mca_pml_ob1_output, "Accelerator create event failed.");
262262
rc = OPAL_ERROR;

opal/include/opal/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ headers += \
3030
opal/frameworks.h \
3131
opal/opal_portable_platform.h \
3232
opal/opal_portable_platform_real.h \
33-
opal/opal_cuda.h
33+
opal/opal_gpu.h
3434

3535
nodist_headers += \
3636
opal/version.h

opal/mca/accelerator/accelerator.h

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
* reserved.
55
* Copyright (c) Amazon.com, Inc. or its affiliates.
66
* All Rights reserved.
7+
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+
*
79
* $COPYRIGHT$
810
*
911
* Additional copyrights may follow
@@ -110,19 +112,28 @@ struct opal_accelerator_stream_t {
110112
void *stream;
111113
};
112114
typedef struct opal_accelerator_stream_t opal_accelerator_stream_t;
115+
OBJ_CLASS_DECLARATION(opal_accelerator_stream_t);
116+
117+
/* Constant indicating the default/zero stream */
118+
#define MCA_ACCELERATOR_STREAM_DEFAULT (opal_accelerator_stream_t *)0x00000002
113119

114120
#define IPC_MAX_HANDLE_SIZE 64
115121
struct opal_accelerator_ipc_handle_t {
122+
opal_object_t super;
116123
size_t size;
117124
uint8_t handle[IPC_MAX_HANDLE_SIZE];
125+
void* dev_ptr;
118126
};
119127
typedef struct opal_accelerator_ipc_handle_t opal_accelerator_ipc_handle_t;
128+
OBJ_CLASS_DECLARATION(opal_accelerator_ipc_handle_t);
120129

121130
struct opal_accelerator_ipc_event_handle_t {
131+
opal_object_t super;
122132
size_t size;
123133
uint8_t handle[IPC_MAX_HANDLE_SIZE];
124134
};
125135
typedef struct opal_accelerator_ipc_event_handle_t opal_accelerator_ipc_event_handle_t;
136+
OBJ_CLASS_DECLARATION(opal_accelerator_ipc_event_handle_t);
126137

127138
struct opal_accelerator_pci_attr_t {
128139
uint16_t domain_id;
@@ -132,7 +143,6 @@ struct opal_accelerator_pci_attr_t {
132143
};
133144
typedef struct opal_accelerator_pci_attr_t opal_accelerator_pci_attr_t;
134145

135-
OBJ_CLASS_DECLARATION(opal_accelerator_stream_t);
136146

137147
struct opal_accelerator_event_t {
138148
opal_object_t super;
@@ -180,14 +190,15 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
180190
* corresponding stream. This function will allocate memory for the object.
181191
* To release the memory, call OBJ_RELEASE(*event);
182192
*
183-
* @param[IN] dev_id Associated device for the event or
193+
* @param[IN] dev_id Associated device for the event or
184194
* MCA_ACCELERATOR_NO_DEVICE_ID
185-
* @param[IN] event Event to create
195+
* @param[OUT] event Event to create
196+
* @param[IN] enable_ipc support inter-process tracking of the event
186197
*
187198
* @return OPAL_SUCCESS or error status on failure.
188199
*/
189200
typedef int (*opal_accelerator_base_module_create_event_fn_t)(
190-
int dev_id, opal_accelerator_event_t **event);
201+
int dev_id, opal_accelerator_event_t **event, bool enable_ipc);
191202

192203
/**
193204
* Records an event on a stream. An event recorded on the stream is
@@ -219,6 +230,19 @@ typedef int (*opal_accelerator_base_module_record_event_fn_t)(
219230
typedef int (*opal_accelerator_base_module_query_event_fn_t)(
220231
int dev_id, opal_accelerator_event_t *event);
221232

233+
/**
234+
* Make a stream wait on an event
235+
*
236+
* @param[IN] dev_id Associated device for the event or
237+
* MCA_ACCELERATOR_NO_DEVICE_ID
238+
* @param[IN] event Event to wait on
239+
* @param[IN] stream Stream to wait
240+
*
241+
* @return OPAL_SUCCESS or error status on failure
242+
*/
243+
typedef int (*opal_accelerator_base_module_wait_event_fn_t)(
244+
int dev_id, opal_accelerator_event_t *event, opal_accelerator_stream_t *stream);
245+
222246
/**
223247
* Copies memory asynchronously from src to dest. Memory of dest and src
224248
* may not overlap. Optionally can specify the transfer type to
@@ -342,8 +366,10 @@ typedef int (*opal_accelerator_base_module_get_address_range_fn_t)(
342366
*
343367
* opal_accelerator_base_module_get_ipc_handle_fn_t()
344368
* opal_accelerator_base_module_open_ipc_handle_fn_t()
369+
* opal_accelerator_base_module_import_ipc_event_handle_fn_t()
345370
* opal_accelerator_base_module_get_ipc_event_handle_fn_t()
346371
* opal_accelerator_base_module_open_ipc_event_handle_fn_t()
372+
* opal_accelerator_base_module_import_ipc_event_handle_fn_t()
347373
*
348374
* must be implemented.
349375
*
@@ -354,6 +380,8 @@ typedef bool (*opal_accelerator_base_module_is_ipc_enabled_fn_t)(void);
354380

355381
/**
356382
* Gets an IPC memory handle for an existing device memory allocation.
383+
* This interface assumes that the object has been declared statically,
384+
* hence one has to call OBJ_DESTRUCT(handle) on it.
357385
*
358386
* @param[IN] dev_id Associated device for the IPC memory handle or
359387
* MCA_ACCELERATOR_NO_DEVICE_ID
@@ -366,31 +394,67 @@ typedef bool (*opal_accelerator_base_module_is_ipc_enabled_fn_t)(void);
366394
typedef int (*opal_accelerator_base_module_get_ipc_handle_fn_t)(
367395
int dev_id, void *dev_ptr, opal_accelerator_ipc_handle_t *handle);
368396

397+
/**
398+
* Creates an opal_accelerator_ipc_handle object given the 64byte IPC handle,
399+
* which was created using module_get_ipc_handle_fn on another process.
400+
* This interface assumes that the object has been declared statically,
401+
* hence one has to call OBJ_DESTRUCT(handle) on it.
402+
*
403+
* @param[IN] dev_id Associated device for the IPC memory handle or
404+
* MCA_ACCELERATOR_NO_DEVICE_ID
405+
* @param[IN] ipc_handle 64 byte IPC handle transfered from another process
406+
* @param[OUT] handle Pointer to IPC handle object
407+
*
408+
* @return OPAL_SUCCESS or error status on failure
409+
*
410+
*/
411+
typedef int (*opal_accelerator_base_module_import_ipc_handle_fn_t)(
412+
int dev_id, uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE], opal_accelerator_ipc_handle_t *handle);
413+
369414
/**
370415
* Opens an IPC memory handle from another process and returns
371416
* a device pointer usable in the local process.
372417
*
373418
* @param[IN] dev_id Associated device for the IPC memory handle or
374419
* MCA_ACCELERATOR_NO_DEVICE_ID
375-
* @param[IN] handle IPC handle object from another process
420+
* @param[IN] handle IPC handle created using the module_create_ipc_handle_fn
376421
* @param[OUT] dev_ptr Returned device pointer
377422
*
378-
* @return OPAL_SUCCESS or error status on failure
423+
* @return OPAL_SUCCESS on success,
424+
* OPAL_ERR_WOULD_BLOCK if the memory region is already mapped
425+
* or error status on other failures
379426
*/
380427
typedef int (*opal_accelerator_base_module_open_ipc_handle_fn_t)(
381428
int dev_id, opal_accelerator_ipc_handle_t *handle, void **dev_ptr);
382429

383430
/**
384431
* Gets an IPC event handle for an event created by opal_accelerator_base_module_create_event_fn_t.
432+
* This interface assumes that the object has been declared statically,
433+
* hence one has to call OBJ_DESTRUCT(handle) on it.
385434
*
386-
* @param[IN] event Event created previously
387-
* @param[OUT] handle Pointer to IPC event handle object
435+
* @param[IN] event Event created previously
436+
* @param[IN] handle Pointer to IPC event handle object
388437
*
389438
* @return OPAL_SUCCESS or error status on failure
390439
*/
391440
typedef int (*opal_accelerator_base_module_get_ipc_event_handle_fn_t)(
392441
opal_accelerator_event_t *event, opal_accelerator_ipc_event_handle_t *handle);
393442

443+
/**
444+
* Creates an opal_accelerator_ipc_event_handle object using the 64 byte IPC event handle
445+
* which was created using module_get_ipc_event_handle_fn on another process.
446+
* This interface assumes that the object has been declared statically,
447+
* hence one has to call OBJ_DESTRUCT(handle) on it.
448+
*
449+
* @param[IN] event Event created previously
450+
* @param[IN] ipc_handle 64 byte IPC handle object
451+
* @param[OUT] handle Pointer to IPC event handle object
452+
*
453+
* @return OPAL_SUCCESS or error status on failure
454+
*/
455+
typedef int (*opal_accelerator_base_module_import_ipc_event_handle_fn_t)(
456+
uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE], opal_accelerator_ipc_event_handle_t *handle);
457+
394458
/**
395459
* Opens an IPC event handle from another process opened by
396460
* opal_accelerator_base_module_get_ipc_event_handle_fn_t.
@@ -490,6 +554,7 @@ typedef struct {
490554
opal_accelerator_base_module_create_event_fn_t create_event;
491555
opal_accelerator_base_module_record_event_fn_t record_event;
492556
opal_accelerator_base_module_query_event_fn_t query_event;
557+
opal_accelerator_base_module_wait_event_fn_t wait_event;
493558

494559
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async;
495560
opal_accelerator_base_module_memcpy_fn_t mem_copy;
@@ -501,8 +566,10 @@ typedef struct {
501566

502567
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled;
503568
opal_accelerator_base_module_get_ipc_handle_fn_t get_ipc_handle;
569+
opal_accelerator_base_module_import_ipc_handle_fn_t import_ipc_handle;
504570
opal_accelerator_base_module_open_ipc_handle_fn_t open_ipc_handle;
505571
opal_accelerator_base_module_get_ipc_event_handle_fn_t get_ipc_event_handle;
572+
opal_accelerator_base_module_import_ipc_event_handle_fn_t import_ipc_event_handle;
506573
opal_accelerator_base_module_open_ipc_event_handle_fn_t open_ipc_event_handle;
507574

508575
opal_accelerator_base_module_host_register_fn_t host_register;

opal/mca/accelerator/base/accelerator_base_frame.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,19 @@ OBJ_CLASS_INSTANCE(
5757
NULL,
5858
NULL);
5959

60+
OBJ_CLASS_INSTANCE(
61+
opal_accelerator_ipc_handle_t,
62+
opal_object_t,
63+
NULL,
64+
NULL);
65+
66+
OBJ_CLASS_INSTANCE(
67+
opal_accelerator_ipc_event_handle_t,
68+
opal_object_t,
69+
NULL,
70+
NULL);
71+
72+
6073
MCA_BASE_FRAMEWORK_DECLARE(opal, accelerator, "OPAL Accelerator Framework",
6174
opal_accelerator_base_frame_register, opal_accelerator_base_frame_open,
6275
opal_accelerator_base_frame_close, mca_accelerator_base_static_components,

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@
2626
static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags);
2727
static int accelerator_cuda_create_stream(int dev_id, opal_accelerator_stream_t **stream);
2828

29-
static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **event);
29+
static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **event, bool enable_ipc);
3030
static int accelerator_cuda_record_event(int dev_id, opal_accelerator_event_t *event, opal_accelerator_stream_t *stream);
3131
static int accelerator_cuda_query_event(int dev_id, opal_accelerator_event_t *event);
32+
static int accelerator_cuda_wait_event(int dev_id, opal_accelerator_event_t *event, opal_accelerator_stream_t *stream);
3233

3334
static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
3435
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type);
@@ -44,10 +45,14 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
4445
static bool accelerator_cuda_is_ipc_enabled(void);
4546
static int accelerator_cuda_get_ipc_handle(int dev_id, void *dev_ptr,
4647
opal_accelerator_ipc_handle_t *handle);
48+
static int accelerator_cuda_import_ipc_handle(int dev_id, uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
49+
opal_accelerator_ipc_handle_t *handle);
4750
static int accelerator_cuda_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
4851
void **dev_ptr);
4952
static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event,
5053
opal_accelerator_ipc_event_handle_t *handle);
54+
static int accelerator_cuda_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
55+
opal_accelerator_ipc_event_handle_t *handle);
5156
static int accelerator_cuda_open_ipc_event_handle(opal_accelerator_ipc_event_handle_t *handle,
5257
opal_accelerator_event_t *event);
5358

@@ -60,6 +65,8 @@ static int accelerator_cuda_device_can_access_peer( int *access, int dev1, int d
6065

6166
static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
6267

68+
#define GET_STREAM(_stream) (_stream == MCA_ACCELERATOR_STREAM_DEFAULT ? 0 : *((CUstream *)_stream->stream))
69+
6370
opal_accelerator_base_module_t opal_accelerator_cuda_module =
6471
{
6572
accelerator_cuda_check_addr,
@@ -69,6 +76,7 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
6976
accelerator_cuda_create_event,
7077
accelerator_cuda_record_event,
7178
accelerator_cuda_query_event,
79+
accelerator_cuda_wait_event,
7280

7381
accelerator_cuda_memcpy_async,
7482
accelerator_cuda_memcpy,
@@ -79,8 +87,10 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
7987

8088
accelerator_cuda_is_ipc_enabled,
8189
accelerator_cuda_get_ipc_handle,
90+
accelerator_cuda_import_ipc_handle,
8291
accelerator_cuda_open_ipc_handle,
8392
accelerator_cuda_get_ipc_event_handle,
93+
accelerator_cuda_import_ipc_event_handle,
8494
accelerator_cuda_open_ipc_event_handle,
8595

8696
accelerator_cuda_host_register,
@@ -260,7 +270,8 @@ static void opal_accelerator_cuda_stream_destruct(opal_accelerator_cuda_stream_t
260270
{
261271
CUresult result;
262272

263-
if (NULL != stream->base.stream) {
273+
if (MCA_ACCELERATOR_STREAM_DEFAULT != (opal_accelerator_stream_t *)stream &&
274+
NULL != stream->base.stream) {
264275
result = cuStreamDestroy(*(CUstream *)stream->base.stream);
265276
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
266277
opal_show_help("help-accelerator-cuda.txt", "cuStreamDestroy failed", true,
@@ -276,7 +287,7 @@ OBJ_CLASS_INSTANCE(
276287
NULL,
277288
opal_accelerator_cuda_stream_destruct);
278289

279-
static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **event)
290+
static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **event, bool enable_ipc)
280291
{
281292
CUresult result;
282293
int delayed_init = opal_accelerator_cuda_delayed_init();
@@ -294,7 +305,8 @@ static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **
294305
OBJ_RELEASE(*event);
295306
return OPAL_ERR_OUT_OF_RESOURCE;
296307
}
297-
result = cuEventCreate((*event)->event, CU_EVENT_DISABLE_TIMING);
308+
result = cuEventCreate((*event)->event, enable_ipc ? CU_EVENT_DISABLE_TIMING|CU_EVENT_INTERPROCESS :
309+
CU_EVENT_DISABLE_TIMING);
298310
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
299311
opal_show_help("help-accelerator-cuda.txt", "cuEventCreate failed", true,
300312
OPAL_PROC_MY_HOSTNAME, result);
@@ -328,11 +340,13 @@ static int accelerator_cuda_record_event(int dev_id, opal_accelerator_event_t *e
328340
{
329341
CUresult result;
330342

331-
if (NULL == stream || NULL == event) {
343+
if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream &&
344+
(NULL == stream || NULL == stream->stream)) ||
345+
NULL == event) {
332346
return OPAL_ERR_BAD_PARAM;
333347
}
334348

335-
result = cuEventRecord(*(CUevent *)event->event, *(CUstream *)stream->stream);
349+
result = cuEventRecord(*(CUevent *)event->event, GET_STREAM(stream));
336350
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
337351
opal_show_help("help-accelerator-cuda.txt", "cuEventRecord failed", true,
338352
OPAL_PROC_MY_HOSTNAME, result);
@@ -369,6 +383,10 @@ static int accelerator_cuda_query_event(int dev_id, opal_accelerator_event_t *ev
369383
}
370384
}
371385
}
386+
static int accelerator_cuda_wait_event(int dev_id, opal_accelerator_event_t *event, opal_accelerator_stream_t *stream)
387+
{
388+
return OPAL_ERR_NOT_IMPLEMENTED;
389+
}
372390

373391
static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
374392
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type)
@@ -380,11 +398,12 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
380398
return delayed_init;
381399
}
382400

383-
if (NULL == stream || NULL == dest || NULL == src || size <= 0) {
401+
if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream && NULL == stream) ||
402+
NULL == dest || NULL == src || size <= 0) {
384403
return OPAL_ERR_BAD_PARAM;
385404
}
386405

387-
result = cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, *(CUstream *)stream->stream);
406+
result = cuMemcpyAsync((CUdeviceptr) dest, (CUdeviceptr) src, size, GET_STREAM(stream));
388407
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
389408
opal_show_help("help-accelerator-cuda.txt", "cuMemcpyAsync failed", true, dest, src,
390409
size, result);
@@ -547,6 +566,12 @@ static int accelerator_cuda_get_ipc_handle(int dev_id, void *dev_ptr,
547566
return OPAL_ERR_NOT_IMPLEMENTED;
548567
}
549568

569+
static int accelerator_cuda_import_ipc_handle(int dev_id, uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
570+
opal_accelerator_ipc_handle_t *handle)
571+
{
572+
return OPAL_ERR_NOT_IMPLEMENTED;
573+
}
574+
550575
static int accelerator_cuda_open_ipc_handle(int dev_id, opal_accelerator_ipc_handle_t *handle,
551576
void **dev_ptr)
552577
{
@@ -559,6 +584,12 @@ static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event
559584
return OPAL_ERR_NOT_IMPLEMENTED;
560585
}
561586

587+
static int accelerator_cuda_import_ipc_event_handle(uint8_t ipc_handle[IPC_MAX_HANDLE_SIZE],
588+
opal_accelerator_ipc_event_handle_t *handle)
589+
{
590+
return OPAL_ERR_NOT_IMPLEMENTED;
591+
}
592+
562593
static int accelerator_cuda_open_ipc_event_handle(opal_accelerator_ipc_event_handle_t *handle,
563594
opal_accelerator_event_t *event)
564595
{

0 commit comments

Comments
 (0)