19
19
* reserved.
20
20
*
21
21
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
22
+ * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved
22
23
* $COPYRIGHT$
23
24
*
24
25
* Additional copyrights may follow
41
42
#include "opal_config.h"
42
43
#include "opal/mca/rcache/base/base.h"
43
44
#include "opal/mca/rcache/gpusm/rcache_gpusm.h"
44
- #include "opal/include/opal/opal_cuda .h"
45
- #include <cuda.h>
45
+ #include "opal/include/opal/opal_gpu .h"
46
+ #include "opal/mca/accelerator/base/base.h"
46
47
47
48
/**
48
49
* Called when the registration free list is created. An event is created
49
50
* for each entry.
50
51
*/
51
52
static void mca_rcache_gpusm_registration_constructor (mca_rcache_gpusm_registration_t * item )
52
53
{
53
- uintptr_t * event = & item -> event ;
54
- void * handle = (void * ) & item -> evtHandle ;
55
- CUresult result ;
54
+ int result ;
56
55
57
- result = cuEventCreate ((CUevent * ) event ,
58
- CU_EVENT_INTERPROCESS | CU_EVENT_DISABLE_TIMING );
59
- if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
60
- opal_output (0 , "cuEventCreate failed\n" );
56
+ result = opal_accelerator .create_event (MCA_ACCELERATOR_NO_DEVICE_ID , & item -> event , true);
57
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
58
+ opal_output (0 , "create_ipc_event failed\n" );
61
59
}
62
60
63
- result = cuIpcGetEventHandle (( CUipcEventHandle * ) handle , ( CUevent ) * event );
64
- if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
65
- opal_output (0 , "cuIpcGetEventHandle failed\n" );
61
+ result = opal_accelerator . get_ipc_event_handle ( item -> event , & item -> evtHandle );
62
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
63
+ opal_output (0 , "get_ipc_event_handle failed\n" );
66
64
}
67
65
}
68
66
@@ -71,13 +69,7 @@ static void mca_rcache_gpusm_registration_constructor(mca_rcache_gpusm_registrat
71
69
*/
72
70
static void mca_rcache_gpusm_registration_destructor (mca_rcache_gpusm_registration_t * item )
73
71
{
74
- uintptr_t event = item -> event ;
75
- CUresult result ;
76
-
77
- result = cuEventDestroy ((CUevent ) event );
78
- if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
79
- opal_output (0 , "cuEventDestroy failed" );
80
- }
72
+ OBJ_RELEASE (item -> event );
81
73
}
82
74
83
75
OBJ_CLASS_INSTANCE (mca_rcache_gpusm_registration_t , mca_rcache_base_registration_t ,
@@ -100,7 +92,7 @@ void mca_rcache_gpusm_module_init(mca_rcache_gpusm_module_t *rcache)
100
92
/* Start with 0 entries in the free list since CUDA may not have
101
93
* been initialized when this free list is created and there is
102
94
* some CUDA specific activities that need to be done. */
103
- opal_free_list_init (& rcache -> reg_list , sizeof (struct mca_opal_cuda_reg_t ),
95
+ opal_free_list_init (& rcache -> reg_list , sizeof (struct mca_opal_gpu_reg_t ),
104
96
opal_cache_line_size , OBJ_CLASS (mca_rcache_gpusm_registration_t ), 0 ,
105
97
opal_cache_line_size , 0 , -1 , 64 , NULL , 0 , NULL , NULL , NULL );
106
98
}
@@ -122,67 +114,51 @@ int mca_rcache_gpusm_find(mca_rcache_base_module_t *rcache, void *addr, size_t s
122
114
*/
123
115
static int mca_rcache_gpusm_get_mem_handle (void * base , size_t size , mca_rcache_base_registration_t * newreg )
124
116
{
125
- CUmemorytype memType ;
126
- CUresult result ;
127
- CUipcMemHandle * memHandle ;
128
- CUdeviceptr pbase ;
129
- size_t psize ;
130
-
131
- mca_opal_cuda_reg_t * cuda_reg = (mca_opal_cuda_reg_t * ) newreg ;
132
- memHandle = (CUipcMemHandle * ) cuda_reg -> data .memHandle ;
133
-
134
- /* We should only be there if this is a CUDA device pointer */
135
- result = cuPointerGetAttribute (& memType , CU_POINTER_ATTRIBUTE_MEMORY_TYPE ,
136
- (CUdeviceptr ) base );
137
- assert (CUDA_SUCCESS == result );
138
- assert (CU_MEMORYTYPE_DEVICE == memType );
139
-
140
- /* Get the memory handle so we can send it to the remote process. */
141
- result = cuIpcGetMemHandle (memHandle , (CUdeviceptr ) base );
142
-
143
- if (CUDA_SUCCESS != result ) {
117
+ mca_opal_gpu_reg_t * gpu_reg = (mca_opal_gpu_reg_t * ) newreg ;
118
+ opal_accelerator_buffer_id_t buffer_id ;
119
+ uint64_t flags ;
120
+ int dev_id ;
121
+ int result ;
122
+ void * pbase ;
123
+ size_t psize ;
124
+
125
+ result = opal_accelerator .check_addr (base , & dev_id , & flags );
126
+ if (0 >= result ) {
127
+ // This is either an error or host memory. In either case we do not continue
144
128
return OPAL_ERROR ;
145
129
}
146
130
147
- /* Need to get the real base and size of the memory handle. This is
148
- * how the remote side saves the handles in a cache. */
149
- result = cuMemGetAddressRange (& pbase , & psize , (CUdeviceptr ) base );
150
- if (CUDA_SUCCESS != result ) {
131
+ result = opal_accelerator .get_ipc_handle (dev_id , base , & gpu_reg -> data .ipcHandle );
132
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
133
+ return OPAL_ERROR ;
134
+ }
135
+ result = opal_accelerator .get_address_range (dev_id , base , & pbase , & psize );
136
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
151
137
return OPAL_ERROR ;
152
138
}
153
139
154
140
/* Store all the information in the registration */
155
- cuda_reg -> base .base = (void * ) pbase ;
156
- cuda_reg -> base .bound = (unsigned char * ) pbase + psize - 1 ;
157
- cuda_reg -> data .memh_seg_addr .pval = (void * ) pbase ;
158
- cuda_reg -> data .memh_seg_len = psize ;
159
-
160
- #if OPAL_CUDA_SYNC_MEMOPS
161
- /* With CUDA 6.0, we can set an attribute on the memory pointer that will
162
- * ensure any synchronous copies are completed prior to any other access
163
- * of the memory region. This means we do not need to record an event
164
- * and send to the remote side.
165
- */
166
- memType = 1 ; /* Just use this variable since we already have it */
167
- result = cuPointerSetAttribute (& memType , CU_POINTER_ATTRIBUTE_SYNC_MEMOPS ,
168
- (CUdeviceptr ) base );
169
- if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
170
- return OPAL_ERROR ;
141
+ gpu_reg -> base .base = pbase ;
142
+ gpu_reg -> base .bound = (unsigned char * ) pbase + psize - 1 ;
143
+ gpu_reg -> data .memh_seg_addr .pval = (void * ) pbase ;
144
+ gpu_reg -> data .memh_seg_len = psize ;
145
+
146
+ // converting the ifdef into a mca runtime parameter
147
+ if (mca_rcache_gpusm_use_sync_memops ) {
148
+ // need to revisit. This function also sets sync_memops
149
+ // we might want to separate that out into a separate function
150
+ result = opal_accelerator .get_buffer_id (dev_id , base , & buffer_id );
151
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
152
+ return OPAL_ERROR ;
153
+ }
171
154
}
172
- #else
173
- /* Need to record the event to ensure that any memcopies into the
174
- * device memory have completed. The event handle associated with
175
- * this event is sent to the remote process so that it will wait
176
- * on this event prior to copying data out of the device memory.
177
- * Note that this needs to be the NULL stream to make since it is
178
- * unknown what stream any copies into the device memory were done
179
- * with. */
180
- result = cuEventRecord ((CUevent ) cuda_reg -> data .event , 0 );
181
- if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
182
- return OPAL_ERROR ;
155
+ else {
156
+ result = opal_accelerator .record_event (dev_id , gpu_reg -> data .event , 0 );
157
+ if (OPAL_UNLIKELY (OPAL_SUCCESS != result )) {
158
+ return OPAL_ERROR ;
159
+ }
183
160
}
184
- #endif /* OPAL_CUDA_SYNC_MEMOPS */
185
-
161
+
186
162
return OPAL_SUCCESS ;
187
163
}
188
164
@@ -225,7 +201,7 @@ int mca_rcache_gpusm_register(mca_rcache_base_module_t *rcache, void *addr, size
225
201
226
202
rc = mca_rcache_gpusm_get_mem_handle (base , size , gpusm_reg );
227
203
228
- if (rc != OPAL_SUCCESS ) {
204
+ if (OPAL_SUCCESS != rc ) {
229
205
opal_free_list_return (& rcache_gpusm -> reg_list , item );
230
206
return rc ;
231
207
}
@@ -241,7 +217,6 @@ int mca_rcache_gpusm_register(mca_rcache_base_module_t *rcache, void *addr, size
241
217
int mca_rcache_gpusm_deregister (struct mca_rcache_base_module_t * rcache ,
242
218
mca_rcache_base_registration_t * reg )
243
219
{
244
- int rc ;
245
220
mca_rcache_gpusm_module_t * rcache_gpusm = (mca_rcache_gpusm_module_t * ) rcache ;
246
221
247
222
opal_free_list_return (& rcache_gpusm -> reg_list , (opal_free_list_item_t * ) reg );
0 commit comments