Skip to content

Commit 12b728e

Browse files
committed
rcache/gpusm and rcache/rgpusm: update components to use new API
update rcache/gpusm and rcache/rgpusm to use accelerator framework API. Work in progress. Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent c573b51 commit 12b728e

File tree

7 files changed

+121
-218
lines changed

7 files changed

+121
-218
lines changed

opal/include/opal/opal_cuda.h renamed to opal/include/opal/opal_gpu.h

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,36 +15,33 @@
1515
* reserved.
1616
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
1717
* All Rights reserved.
18+
* Copyright (c) 2023 Advanced Micro Devices, Inc.
19+
* All Rights reserved.
1820
* $COPYRIGHT$
1921
*
2022
* Additional copyrights may follow
2123
*
2224
* $HEADER$
2325
*
24-
* This file is intended only to carry shared types. If actual cuda
25-
* symbols are required, they need to be added to a new common cuda
26-
* component.
2726
*/
2827

29-
#ifndef OPAL_CUDA_H
30-
#define OPAL_CUDA_H
28+
#ifndef OPAL_GPU_H
29+
#define OPAL_GPU_H
3130
#include "opal/mca/rcache/rcache.h"
31+
#include "opal/mca/accelerator/accelerator.h"
3232

33-
#define MEMHANDLE_SIZE 8
34-
#define EVTHANDLE_SIZE 8
35-
36-
struct mca_opal_cuda_reg_data_t {
37-
uint64_t memHandle[MEMHANDLE_SIZE];
38-
uint64_t evtHandle[EVTHANDLE_SIZE];
39-
uint64_t event;
40-
opal_ptr_t memh_seg_addr;
41-
size_t memh_seg_len;
33+
struct mca_opal_gpu_reg_data_t {
34+
opal_accelerator_ipc_handle_t ipcHandle;
35+
opal_accelerator_ipc_event_handle_t ipcEventHandle;
36+
opal_accelerator_event_t *event;
37+
opal_ptr_t memh_seg_addr;
38+
size_t memh_seg_len;
4239
};
43-
typedef struct mca_opal_cuda_reg_data_t mca_opal_cuda_reg_data_t;
40+
typedef struct mca_opal_gpu_reg_data_t mca_opal_gpu_reg_data_t;
4441

45-
struct mca_opal_cuda_reg_t {
42+
struct mca_opal_gpu_reg_t {
4643
mca_rcache_base_registration_t base;
47-
mca_opal_cuda_reg_data_t data;
44+
mca_opal_gpu_reg_data_t data;
4845
};
49-
typedef struct mca_opal_cuda_reg_t mca_opal_cuda_reg_t;
50-
#endif /* OPAL_CUDA_H */
46+
typedef struct mca_opal_gpu_reg_t mca_opal_gpu_reg_t;
47+
#endif /* OPAL_GPU_H */

opal/mca/rcache/gpusm/configure.m4

Lines changed: 0 additions & 32 deletions
This file was deleted.

opal/mca/rcache/gpusm/rcache_gpusm.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,16 @@
3030
#include "opal_config.h"
3131
#include "opal/class/opal_list.h"
3232
#include "opal/mca/rcache/rcache.h"
33+
#include "opal/mca/accelerator/accelerator.h"
3334

3435
BEGIN_C_DECLS
36+
extern int mca_rcache_gpusm_use_sync_memops;
3537

36-
#define MEMHANDLE_SIZE 8
37-
#define EVTHANDLE_SIZE 8
3838
struct mca_rcache_gpusm_registration_t {
39-
mca_rcache_base_registration_t base;
40-
uint64_t memHandle[MEMHANDLE_SIZE]; /* CUipcMemHandle */
41-
uint64_t evtHandle[EVTHANDLE_SIZE]; /* CUipcEventHandle */
42-
uintptr_t event; /* CUevent */
39+
mca_rcache_base_registration_t base;
40+
opal_accelerator_ipc_handle_t memHandle;
41+
opal_accelerator_ipc_event_handle_t evtHandle;
42+
opal_accelerator_event_t *event;
4343
};
4444
typedef struct mca_rcache_gpusm_registration_t mca_rcache_gpusm_registration_t;
4545
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_rcache_gpusm_registration_t);

opal/mca/rcache/gpusm/rcache_gpusm_component.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ static int gpusm_open(void);
4141
static int gpusm_close(void);
4242
static int gpusm_register(void);
4343
static mca_rcache_base_module_t *gpusm_init(struct mca_rcache_base_resources_t *resources);
44+
int mca_rcache_gpusm_use_sync_memops=1;
4445

4546
mca_rcache_gpusm_component_t mca_rcache_gpusm_component = {{
4647
/* First, the mca_base_component_t struct containing meta
@@ -75,6 +76,14 @@ static int gpusm_open(void)
7576

7677
static int gpusm_register(void)
7778
{
79+
/* Whether to set sync_memops attribute ure use events for ensuring memory updates */
80+
mca_rcache_gpusm_use_sync_memops = 1;
81+
(void) mca_base_component_var_register(&mca_rcache_gpusm_component.super.rcache_version,
82+
"use_sync_memops", "Use sync_memops to ensure memory updates vs. events (default: yes)",
83+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
84+
OPAL_INFO_LVL_9,
85+
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_gpusm_use_sync_memops);
86+
7887
return OPAL_SUCCESS;
7988
}
8089

opal/mca/rcache/gpusm/rcache_gpusm_module.c

Lines changed: 49 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* reserved.
2020
*
2121
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
22+
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved
2223
* $COPYRIGHT$
2324
*
2425
* Additional copyrights may follow
@@ -41,28 +42,25 @@
4142
#include "opal_config.h"
4243
#include "opal/mca/rcache/base/base.h"
4344
#include "opal/mca/rcache/gpusm/rcache_gpusm.h"
44-
#include "opal/include/opal/opal_cuda.h"
45-
#include <cuda.h>
45+
#include "opal/include/opal/opal_gpu.h"
46+
#include "opal/mca/accelerator/base/base.h"
4647

4748
/**
4849
* Called when the registration free list is created. An event is created
4950
* for each entry.
5051
*/
5152
static void mca_rcache_gpusm_registration_constructor(mca_rcache_gpusm_registration_t *item)
5253
{
53-
uintptr_t *event = &item->event;
54-
void *handle = (void *) &item->evtHandle;
55-
CUresult result;
54+
int result;
5655

57-
result = cuEventCreate((CUevent *) event,
58-
CU_EVENT_INTERPROCESS | CU_EVENT_DISABLE_TIMING);
59-
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
60-
opal_output(0, "cuEventCreate failed\n");
56+
result = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &item->event, true);
57+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
58+
opal_output(0, "create_ipc_event failed\n");
6159
}
6260

63-
result = cuIpcGetEventHandle((CUipcEventHandle *) handle, (CUevent) *event);
64-
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
65-
opal_output(0, "cuIpcGetEventHandle failed\n");
61+
result = opal_accelerator.get_ipc_event_handle(item->event, &item->evtHandle);
62+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
63+
opal_output(0, "get_ipc_event_handle failed\n");
6664
}
6765
}
6866

@@ -71,13 +69,7 @@ static void mca_rcache_gpusm_registration_constructor(mca_rcache_gpusm_registrat
7169
*/
7270
static void mca_rcache_gpusm_registration_destructor(mca_rcache_gpusm_registration_t *item)
7371
{
74-
uintptr_t event = item->event;
75-
CUresult result;
76-
77-
result = cuEventDestroy((CUevent) event);
78-
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
79-
opal_output(0, "cuEventDestroy failed");
80-
}
72+
OBJ_RELEASE(item->event);
8173
}
8274

8375
OBJ_CLASS_INSTANCE(mca_rcache_gpusm_registration_t, mca_rcache_base_registration_t,
@@ -100,7 +92,7 @@ void mca_rcache_gpusm_module_init(mca_rcache_gpusm_module_t *rcache)
10092
/* Start with 0 entries in the free list since CUDA may not have
10193
* been initialized when this free list is created and there is
10294
* some CUDA specific activities that need to be done. */
103-
opal_free_list_init(&rcache->reg_list, sizeof(struct mca_opal_cuda_reg_t),
95+
opal_free_list_init(&rcache->reg_list, sizeof(struct mca_opal_gpu_reg_t),
10496
opal_cache_line_size, OBJ_CLASS(mca_rcache_gpusm_registration_t), 0,
10597
opal_cache_line_size, 0, -1, 64, NULL, 0, NULL, NULL, NULL);
10698
}
@@ -122,67 +114,51 @@ int mca_rcache_gpusm_find(mca_rcache_base_module_t *rcache, void *addr, size_t s
122114
*/
123115
static int mca_rcache_gpusm_get_mem_handle(void *base, size_t size, mca_rcache_base_registration_t *newreg)
124116
{
125-
CUmemorytype memType;
126-
CUresult result;
127-
CUipcMemHandle *memHandle;
128-
CUdeviceptr pbase;
129-
size_t psize;
130-
131-
mca_opal_cuda_reg_t *cuda_reg = (mca_opal_cuda_reg_t *) newreg;
132-
memHandle = (CUipcMemHandle *) cuda_reg->data.memHandle;
133-
134-
/* We should only be there if this is a CUDA device pointer */
135-
result = cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
136-
(CUdeviceptr) base);
137-
assert(CUDA_SUCCESS == result);
138-
assert(CU_MEMORYTYPE_DEVICE == memType);
139-
140-
/* Get the memory handle so we can send it to the remote process. */
141-
result = cuIpcGetMemHandle(memHandle, (CUdeviceptr) base);
142-
143-
if (CUDA_SUCCESS != result) {
117+
mca_opal_gpu_reg_t *gpu_reg = (mca_opal_gpu_reg_t *) newreg;
118+
opal_accelerator_buffer_id_t buffer_id;
119+
uint64_t flags;
120+
int dev_id;
121+
int result;
122+
void* pbase;
123+
size_t psize;
124+
125+
result = opal_accelerator.check_addr(base, &dev_id, &flags);
126+
if (0 >= result) {
127+
// This is either an error or host memory. In either case we do not continue
144128
return OPAL_ERROR;
145129
}
146130

147-
/* Need to get the real base and size of the memory handle. This is
148-
* how the remote side saves the handles in a cache. */
149-
result = cuMemGetAddressRange(&pbase, &psize, (CUdeviceptr) base);
150-
if (CUDA_SUCCESS != result) {
131+
result = opal_accelerator.get_ipc_handle(dev_id, base, &gpu_reg->data.ipcHandle);
132+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
133+
return OPAL_ERROR;
134+
}
135+
result = opal_accelerator.get_address_range (dev_id, base, &pbase, &psize);
136+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
151137
return OPAL_ERROR;
152138
}
153139

154140
/* Store all the information in the registration */
155-
cuda_reg->base.base = (void *) pbase;
156-
cuda_reg->base.bound = (unsigned char *) pbase + psize - 1;
157-
cuda_reg->data.memh_seg_addr.pval = (void *) pbase;
158-
cuda_reg->data.memh_seg_len = psize;
159-
160-
#if OPAL_CUDA_SYNC_MEMOPS
161-
/* With CUDA 6.0, we can set an attribute on the memory pointer that will
162-
* ensure any synchronous copies are completed prior to any other access
163-
* of the memory region. This means we do not need to record an event
164-
* and send to the remote side.
165-
*/
166-
memType = 1; /* Just use this variable since we already have it */
167-
result = cuPointerSetAttribute(&memType, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
168-
(CUdeviceptr) base);
169-
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
170-
return OPAL_ERROR;
141+
gpu_reg->base.base = pbase;
142+
gpu_reg->base.bound = (unsigned char *) pbase + psize - 1;
143+
gpu_reg->data.memh_seg_addr.pval = (void *) pbase;
144+
gpu_reg->data.memh_seg_len = psize;
145+
146+
// converting the ifdef into a mca runtime parameter
147+
if (mca_rcache_gpusm_use_sync_memops) {
148+
// need to revisit. This function also sets sync_memops
149+
// we might want to separate that out into a separate function
150+
result = opal_accelerator.get_buffer_id(dev_id, base, &buffer_id);
151+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
152+
return OPAL_ERROR;
153+
}
171154
}
172-
#else
173-
/* Need to record the event to ensure that any memcopies into the
174-
* device memory have completed. The event handle associated with
175-
* this event is sent to the remote process so that it will wait
176-
* on this event prior to copying data out of the device memory.
177-
* Note that this needs to be the NULL stream to make since it is
178-
* unknown what stream any copies into the device memory were done
179-
* with. */
180-
result = cuEventRecord((CUevent) cuda_reg->data.event, 0);
181-
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
182-
return OPAL_ERROR;
155+
else {
156+
result = opal_accelerator.record_event(dev_id, gpu_reg->data.event, 0);
157+
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
158+
return OPAL_ERROR;
159+
}
183160
}
184-
#endif /* OPAL_CUDA_SYNC_MEMOPS */
185-
161+
186162
return OPAL_SUCCESS;
187163
}
188164

@@ -225,7 +201,7 @@ int mca_rcache_gpusm_register(mca_rcache_base_module_t *rcache, void *addr, size
225201

226202
rc = mca_rcache_gpusm_get_mem_handle(base, size, gpusm_reg);
227203

228-
if (rc != OPAL_SUCCESS) {
204+
if (OPAL_SUCCESS != rc) {
229205
opal_free_list_return(&rcache_gpusm->reg_list, item);
230206
return rc;
231207
}
@@ -241,7 +217,6 @@ int mca_rcache_gpusm_register(mca_rcache_base_module_t *rcache, void *addr, size
241217
int mca_rcache_gpusm_deregister(struct mca_rcache_base_module_t *rcache,
242218
mca_rcache_base_registration_t *reg)
243219
{
244-
int rc;
245220
mca_rcache_gpusm_module_t *rcache_gpusm = (mca_rcache_gpusm_module_t *) rcache;
246221

247222
opal_free_list_return(&rcache_gpusm->reg_list, (opal_free_list_item_t *) reg);

opal/mca/rcache/rgpusm/configure.m4

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)