@@ -55,7 +55,7 @@ type Plugin struct {
55
55
Heartbeat chan bool
56
56
vidx []bool
57
57
pipeid [][]bool
58
- coremask []string
58
+ coremask [][] string
59
59
cardtype []string
60
60
count int
61
61
}
@@ -87,9 +87,11 @@ func (p *Plugin) Start() error {
87
87
for idx := range p .cardtype {
88
88
p .cardtype [idx ] = ""
89
89
}
90
- p .coremask = make ([]string , 16 )
90
+ p .coremask = make ([][] string , 16 )
91
91
for idx := range p .coremask {
92
- p .coremask [idx ] = ""
92
+ p .coremask [idx ] = make ([]string , 2 )
93
+ p .coremask [idx ][0 ] = ""
94
+ p .coremask [idx ][1 ] = ""
93
95
}
94
96
p .count = 0
95
97
@@ -136,7 +138,10 @@ func (p *Plugin) Start() error {
136
138
if index % 2 == 0 {
137
139
_ , err := fmt .Sscanf (val , "DCU[%d] : Card Series: %s\n " , & idx , & cardtype )
138
140
if err != nil {
139
- panic (err )
141
+ _ , err := fmt .Sscanf (val , "DCU[%d] : Card Series: DCU %s\n " , & idx , & cardtype )
142
+ if err != nil {
143
+ panic (err )
144
+ }
140
145
}
141
146
p .cardtype [idx ] = fmt .Sprintf ("%v-%v" , "DCU" , cardtype )
142
147
}
@@ -186,7 +191,8 @@ func (p *Plugin) Start() error {
186
191
}
187
192
fmt .Println ("collecting pcibus=" , p .pcibusid , "cores=" , p .totalcores )
188
193
for idx , val := range p .totalcores {
189
- p .coremask [idx ] = initCoreUsage (val )
194
+ p .coremask [idx ][0 ] = initCoreUsage (val )
195
+ p .coremask [idx ][1 ] = initCoreUsage (val )
190
196
}
191
197
go p .WatchAndRegister ()
192
198
return nil
@@ -295,7 +301,8 @@ func (p *Plugin) RefreshContainerDevices() error {
295
301
return err
296
302
}
297
303
for idx := range p .coremask {
298
- p .coremask [idx ] = initCoreUsage (p .totalcores [idx ])
304
+ p .coremask [idx ][0 ] = initCoreUsage (p .totalcores [idx ])
305
+ p .coremask [idx ][1 ] = initCoreUsage (p .totalcores [idx ])
299
306
}
300
307
301
308
for _ , f := range files {
@@ -312,7 +319,8 @@ func (p *Plugin) RefreshContainerDevices() error {
312
319
didx , _ = strconv .Atoi (tmpstr [2 ])
313
320
pid , _ = strconv .Atoi (tmpstr [3 ])
314
321
vdidx , _ = strconv .Atoi (tmpstr [4 ])
315
- p .coremask [didx ], _ = addCoreUsage (p .coremask [didx ], tmpstr [5 ])
322
+ p .coremask [didx ][0 ], _ = addCoreUsage (p .coremask [didx ][0 ], tmpstr [5 ])
323
+ p .coremask [didx ][1 ], _ = addCoreUsage (p .coremask [didx ][1 ], tmpstr [6 ])
316
324
p .vidx [vdidx ] = true
317
325
p.pipeid [didx ][pid ] = true
318
326
}
@@ -326,6 +334,7 @@ func (p *Plugin) RefreshContainerDevices() error {
326
334
p .vidx [vdidx ] = false
327
335
p.pipeid [didx ][pid ] = false
328
336
os .RemoveAll ("/usr/local/vgpu/dcu/" + f .Name ())
337
+ os .Remove (fmt .Sprintf ("/etc/vdev/vdev%d.conf" , vdidx ))
329
338
}
330
339
fmt .Println (f .Name ())
331
340
}
@@ -430,10 +439,14 @@ func getIndexFromUUID(uid string) int {
430
439
}
431
440
432
441
// Create virtual vdev directory and file
433
- func (p * Plugin ) createvdevFile (current * corev1.Pod , ctr * corev1.Container , req util.ContainerDevices ) (string , error ) {
434
- s := ""
442
+ func (p * Plugin ) createvdevFiles (current * corev1.Pod , ctr * corev1.Container , req util.ContainerDevices ) (string , error ) {
435
443
var devidx , pipeid , vdevidx int
436
- coremsk := ""
444
+ var pcibusId string
445
+ var reqcores , mem int32
446
+ var err error
447
+ coremsk1 := initCoreUsage (16 )
448
+ coremsk2 := initCoreUsage (16 )
449
+ reqtmp := 0
437
450
if len (req ) > 1 {
438
451
return "" , nil
439
452
}
@@ -442,44 +455,82 @@ func (p *Plugin) createvdevFile(current *corev1.Pod, ctr *corev1.Container, req
442
455
continue
443
456
}
444
457
idx := getIndexFromUUID (val .UUID )
445
- pcibusId := p .pcibusid [idx ]
446
- s = fmt .Sprintf ("PciBusId: %s\n " , pcibusId )
447
- reqcores := (val .Usedcores * int32 (p .totalcores [idx ])) / 100
448
- coremsk , _ = allocCoreUsage (p .coremask [idx ], int (reqcores ))
449
- s = s + fmt .Sprintf ("cu_mask: 0x%s\n " , coremsk )
450
- s = s + fmt .Sprintf ("cu_count: %d\n " , reqcores )
451
- s = s + fmt .Sprintf ("mem: %d MiB\n " , val .Usedmem )
452
- s = s + fmt .Sprintf ("device_id: %d\n " , 0 )
458
+ pcibusId = p .pcibusid [idx ]
459
+ reqcores = (val .Usedcores * int32 (p .totalcores [idx ])) / 100
460
+ coremsk1 , reqtmp , _ = allocCoreUsage (p .coremask [idx ][0 ], int (reqcores ))
461
+ if reqtmp > 0 {
462
+ coremsk2 , _ , _ = allocCoreUsage (p .coremask [idx ][1 ], reqtmp )
463
+ }
464
+ mem = val .Usedmem
453
465
devidx = idx
454
- vdevidx , err : = p .AllocateVidx ()
466
+ vdevidx , err = p .AllocateVidx ()
455
467
if err != nil {
456
468
return "" , err
457
469
}
458
- s = s + fmt .Sprintf ("vdev_id: %d\n " , vdevidx )
459
470
pipeid , err = p .AllocatePipeID (idx )
460
471
if err != nil {
461
472
return "" , err
462
473
}
463
- s = s + fmt .Sprintf ("pipe_id: %d\n " , pipeid )
464
- s = s + fmt .Sprintln ("enable: 1" )
465
474
}
466
- cacheFileHostDirectory := "/usr/local/vgpu/dcu/" + string (current .UID ) + "_" + ctr .Name + "_" + fmt .Sprint (devidx ) + "_" + fmt .Sprint (pipeid ) + "_" + fmt .Sprint (vdevidx ) + "_" + coremsk
467
- err := os .MkdirAll (cacheFileHostDirectory , 0777 )
475
+ dirName := string (current .UID ) + "_" + ctr .Name + "_" + fmt .Sprint (devidx ) + "_" + fmt .Sprint (pipeid ) + "_" + fmt .Sprint (vdevidx ) + "_" + fmt .Sprint (coremsk1 ) + "_" + fmt .Sprint (coremsk2 )
476
+ cacheFileHostDirectory := fmt .Sprintf ("/usr/local/vgpu/dcu/%s" , dirName )
477
+ err = createvdevFile (pcibusId , coremsk1 , coremsk2 , reqcores , mem , 0 , vdevidx , pipeid , cacheFileHostDirectory , "vdev0.conf" )
468
478
if err != nil {
469
479
return "" , err
470
480
}
471
- err = os .Chmod (cacheFileHostDirectory , 0777 )
481
+ // support dcu-exporter
482
+ err = createvdevFile (pcibusId , coremsk1 , coremsk2 , reqcores , mem , devidx , vdevidx , pipeid , "/etc/vdev/" , fmt .Sprintf ("vdev%d.conf" , vdevidx ))
472
483
if err != nil {
473
484
return "" , err
474
485
}
475
- klog . Infoln ( "s=" , s )
476
- err = os . WriteFile ( cacheFileHostDirectory + "/vdev0.conf" , [] byte ( s ), os . ModePerm )
486
+
487
+ coreUsage1 , err := addCoreUsage ( p . coremask [ devidx ][ 0 ], coremsk1 )
477
488
if err != nil {
478
489
return "" , err
479
490
}
491
+ p .coremask [devidx ][0 ] = coreUsage1
492
+
493
+ coreUsage2 , err := addCoreUsage (p .coremask [devidx ][1 ], coremsk2 )
494
+ if err != nil {
495
+ return "" , err
496
+ }
497
+ p .coremask [devidx ][1 ] = coreUsage2
498
+
480
499
return cacheFileHostDirectory , nil
481
500
}
482
501
502
+ func createvdevFile (pcibusId , coremsk1 , coremsk2 string , reqcores , mem int32 , deviceid , vdevidx , pipeid int , cacheFileHostDirectory , cacheFileName string ) error {
503
+ s := ""
504
+ s = fmt .Sprintf ("PciBusId: %s\n " , pcibusId )
505
+ s = s + fmt .Sprintf ("cu_mask: 0x%s\n " , coremsk1 )
506
+ s = s + fmt .Sprintf ("cu_mask: 0x%s\n " , coremsk2 )
507
+ s = s + fmt .Sprintf ("cu_count: %d\n " , reqcores )
508
+ s = s + fmt .Sprintf ("mem: %d MiB\n " , mem )
509
+ s = s + fmt .Sprintf ("device_id: %d\n " , deviceid )
510
+ s = s + fmt .Sprintf ("vdev_id: %d\n " , vdevidx )
511
+ s = s + fmt .Sprintf ("pipe_id: %d\n " , pipeid )
512
+ s = s + fmt .Sprintln ("enable: 1" )
513
+ klog .Infoln ("s=" , s )
514
+
515
+ _ , err := os .Stat (cacheFileHostDirectory )
516
+ if os .IsNotExist (err ) {
517
+ err := os .MkdirAll (cacheFileHostDirectory , 0777 )
518
+ if err != nil {
519
+ return err
520
+ }
521
+ err = os .Chmod (cacheFileHostDirectory , 0777 )
522
+ if err != nil {
523
+ return err
524
+ }
525
+ }
526
+
527
+ err = os .WriteFile (fmt .Sprintf ("%s/%s" , cacheFileHostDirectory , cacheFileName ), []byte (s ), os .ModePerm )
528
+ if err != nil {
529
+ return err
530
+ }
531
+ return nil
532
+ }
533
+
483
534
func (p * Plugin ) Allocate (ctx context.Context , reqs * kubeletdevicepluginv1beta1.AllocateRequest ) (* kubeletdevicepluginv1beta1.AllocateResponse , error ) {
484
535
var car kubeletdevicepluginv1beta1.ContainerAllocateResponse
485
536
var dev * kubeletdevicepluginv1beta1.DeviceSpec
@@ -544,7 +595,7 @@ func (p *Plugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.
544
595
}
545
596
//Create vdev file
546
597
if len (devreq ) < 2 && devreq [0 ].Usedmem < int32 (p .totalmem [0 ]) {
547
- filename , err := p .createvdevFile (current , & currentCtr , devreq )
598
+ filename , err := p .createvdevFiles (current , & currentCtr , devreq )
548
599
if err != nil {
549
600
util .PodAllocationFailed (nodename , current , NodeLockDCU )
550
601
return & responses , err
0 commit comments