@@ -378,7 +378,7 @@ func (p *Plugin) createvdevFiles(current *corev1.Pod, ctr *corev1.Container, req
378
378
}
379
379
dirName := string (current .UID ) + "_" + ctr .Name + "_" + fmt .Sprint (devidx ) + "_" + fmt .Sprint (pipeid ) + "_" + fmt .Sprint (vdevidx ) + "_" + fmt .Sprint (coremsk1 ) + "_" + fmt .Sprint (coremsk2 )
380
380
cacheFileHostDirectory := fmt .Sprintf ("/usr/local/vgpu/dcu/%s" , dirName )
381
- err = createvdevFile (pcibusId , coremsk1 , coremsk2 , reqcores , mem , 0 , vdevidx , pipeid , cacheFileHostDirectory , "vdev0 .conf" )
381
+ err = createvdevFile (pcibusId , coremsk1 , coremsk2 , reqcores , mem , devidx , vdevidx , pipeid , cacheFileHostDirectory , fmt . Sprintf ( "vdev%d .conf", vdevidx ) )
382
382
if err != nil {
383
383
return "" , err
384
384
}
@@ -445,6 +445,11 @@ func (p *Plugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.
445
445
nodelock .ReleaseNodeLock (nodename , NodeLockDCU )
446
446
return & kubeletdevicepluginv1beta1.AllocateResponse {}, err
447
447
}
448
+ drmCards , drmRenders , err := util .ListDcuDrmDevices ()
449
+ if err != nil {
450
+ util .PodAllocationFailed (nodename , current , NodeLockDCU )
451
+ return & kubeletdevicepluginv1beta1.AllocateResponse {}, err
452
+ }
448
453
for idx := range reqs .ContainerRequests {
449
454
currentCtr , devreq , err := util .GetNextDeviceRequest (util .HygonDCUDevice , * current )
450
455
klog .Infoln ("deviceAllocateFromAnnotation=" , devreq )
@@ -479,18 +484,33 @@ func (p *Plugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.
479
484
car .Devices = append (car .Devices , dev )
480
485
481
486
for _ , val := range devreq {
482
- var id int
487
+ var devIdx = - 1
483
488
klog .Infof ("Allocating device ID: %s" , val .UUID )
484
- fmt .Sscanf (val .UUID , "DCU-%d" , & id )
489
+ succeedCount , err := fmt .Sscanf (val .UUID , "DCU-%d" , & devIdx )
490
+ if err != nil || succeedCount == 0 || devIdx == - 1 {
491
+ klog .Errorf ("Invalid request device uuid: %s" , val .UUID )
492
+ util .PodAllocationFailed (nodename , current , NodeLockDCU )
493
+ return & kubeletdevicepluginv1beta1.AllocateResponse {}, fmt .Errorf ("invalid request device uuid %s" , val .UUID )
494
+ }
495
+
496
+ if devIdx > len (drmCards ) || devIdx > len (drmRenders ) {
497
+ klog .Errorf ("Invalid device index: %d, all devices counts is: %d, all renders count is: %d" , devIdx , len (drmCards ), len (drmRenders ))
498
+ util .PodAllocationFailed (nodename , current , NodeLockDCU )
499
+ return & kubeletdevicepluginv1beta1.AllocateResponse {}, fmt .Errorf ("can not match dcu dri request %s. cards %d, renders %d" , val .UUID , len (drmCards ), len (drmRenders ))
500
+ }
485
501
486
- devpath := fmt .Sprintf ("/dev/dri/card%d" , id )
502
+ drmCardName := drmCards [devIdx ]
503
+ klog .Infof ("All dcu dri card devs: %v, mapped dri: %s" , drmCards , drmCardName )
504
+ devpath := fmt .Sprintf ("/dev/dri/%s" , drmCardName )
487
505
dev = new (kubeletdevicepluginv1beta1.DeviceSpec )
488
506
dev .HostPath = devpath
489
507
dev .ContainerPath = devpath
490
508
dev .Permissions = "rw"
491
509
car .Devices = append (car .Devices , dev )
492
510
493
- devpath = fmt .Sprintf ("/dev/dri/renderD%d" , (id + 128 ))
511
+ drmRenderName := drmRenders [devIdx ]
512
+ klog .Infof ("All dcu dri render devs: %v, mapped dri: %s" , drmRenders , drmRenderName )
513
+ devpath = fmt .Sprintf ("/dev/dri/%s" , drmRenderName )
494
514
dev = new (kubeletdevicepluginv1beta1.DeviceSpec )
495
515
dev .HostPath = devpath
496
516
dev .ContainerPath = devpath
0 commit comments