Skip to content

Commit af86d04

Browse files
authored
Merge pull request #3 from ouyangluwei163/master
support k100_ai
2 parents e6bbf64 + eb1aef9 commit af86d04

File tree

3 files changed

+99
-43
lines changed

3 files changed

+99
-43
lines changed

internal/pkg/dcu/corealloc.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,18 @@ package dcu
1919
import (
2020
"fmt"
2121
"strconv"
22+
"strings"
2223
)
2324

2425
func initCoreUsage(req int) string {
25-
res := ""
26-
i := 0
27-
for i < req/4 {
28-
res = res + "0"
29-
i++
30-
}
31-
return res
26+
return strings.Repeat("0", 16)
27+
//res := ""
28+
//i := 0
29+
//for i <= req/4 {
30+
// res = res + "0"
31+
// i++
32+
//}
33+
//return res
3234
}
3335

3436
func addCoreUsage(tot string, c string) (string, error) {
@@ -68,26 +70,29 @@ func byteAlloc(b int, req int) (int, int) {
6870
remains--
6971
res = res + 1
7072
}
73+
if remains <= 0 {
74+
break
75+
}
7176
i++
7277
}
7378
return res, remains
7479
}
7580

76-
func allocCoreUsage(tot string, req int) (string, error) {
77-
i := 0
81+
func allocCoreUsage(tot string, req int) (string, int, error) {
82+
i := len(tot) - 1
7883
res := ""
7984
remains := req
8085
for {
8186
left := int64(0)
8287
alloc := 0
83-
if i < len(tot) && tot[i] != 0 {
88+
if i >= 0 {
8489
left, _ = strconv.ParseInt(string(tot[i]), 16, 0)
8590
alloc, remains = byteAlloc(int(left), remains)
86-
res = fmt.Sprintf("%s%x", res, alloc)
91+
res = fmt.Sprintf("%x%s", alloc, res)
8792
} else {
8893
break
8994
}
90-
i++
95+
i--
9196
}
92-
return res, nil
97+
return res, remains, nil
9398
}

internal/pkg/dcu/corealloc_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ func TestAddCoreUsage(t *testing.T) {
4343

4444
func TestAllocCoreUsage(t *testing.T) {
4545
str1 := "50200fff4000000"
46-
res, _ := allocCoreUsage(str1, 16)
46+
res, _, _ := allocCoreUsage(str1, 16)
4747
t.Log("res=", res)
4848
assert.Equal(t, strings.Compare(res, "afdfe0000000000"), 0)
4949
str1 = "abcde000ad00012"
50-
res, _ = allocCoreUsage(str1, 32)
50+
res, _, _ = allocCoreUsage(str1, 32)
5151
t.Log("res=", res)
5252
}

internal/pkg/dcu/server.go

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ type Plugin struct {
5555
Heartbeat chan bool
5656
vidx []bool
5757
pipeid [][]bool
58-
coremask []string
58+
coremask [][]string
5959
cardtype []string
6060
count int
6161
}
@@ -87,9 +87,11 @@ func (p *Plugin) Start() error {
8787
for idx := range p.cardtype {
8888
p.cardtype[idx] = ""
8989
}
90-
p.coremask = make([]string, 16)
90+
p.coremask = make([][]string, 16)
9191
for idx := range p.coremask {
92-
p.coremask[idx] = ""
92+
p.coremask[idx] = make([]string, 2)
93+
p.coremask[idx][0] = ""
94+
p.coremask[idx][1] = ""
9395
}
9496
p.count = 0
9597

@@ -136,7 +138,10 @@ func (p *Plugin) Start() error {
136138
if index%2 == 0 {
137139
_, err := fmt.Sscanf(val, "DCU[%d] : Card Series: %s\n", &idx, &cardtype)
138140
if err != nil {
139-
panic(err)
141+
_, err := fmt.Sscanf(val, "DCU[%d] : Card Series: DCU %s\n", &idx, &cardtype)
142+
if err != nil {
143+
panic(err)
144+
}
140145
}
141146
p.cardtype[idx] = fmt.Sprintf("%v-%v", "DCU", cardtype)
142147
}
@@ -186,7 +191,8 @@ func (p *Plugin) Start() error {
186191
}
187192
fmt.Println("collecting pcibus=", p.pcibusid, "cores=", p.totalcores)
188193
for idx, val := range p.totalcores {
189-
p.coremask[idx] = initCoreUsage(val)
194+
p.coremask[idx][0] = initCoreUsage(val)
195+
p.coremask[idx][1] = initCoreUsage(val)
190196
}
191197
go p.WatchAndRegister()
192198
return nil
@@ -295,7 +301,8 @@ func (p *Plugin) RefreshContainerDevices() error {
295301
return err
296302
}
297303
for idx := range p.coremask {
298-
p.coremask[idx] = initCoreUsage(p.totalcores[idx])
304+
p.coremask[idx][0] = initCoreUsage(p.totalcores[idx])
305+
p.coremask[idx][1] = initCoreUsage(p.totalcores[idx])
299306
}
300307

301308
for _, f := range files {
@@ -312,7 +319,8 @@ func (p *Plugin) RefreshContainerDevices() error {
312319
didx, _ = strconv.Atoi(tmpstr[2])
313320
pid, _ = strconv.Atoi(tmpstr[3])
314321
vdidx, _ = strconv.Atoi(tmpstr[4])
315-
p.coremask[didx], _ = addCoreUsage(p.coremask[didx], tmpstr[5])
322+
p.coremask[didx][0], _ = addCoreUsage(p.coremask[didx][0], tmpstr[5])
323+
p.coremask[didx][1], _ = addCoreUsage(p.coremask[didx][1], tmpstr[6])
316324
p.vidx[vdidx] = true
317325
p.pipeid[didx][pid] = true
318326
}
@@ -326,6 +334,7 @@ func (p *Plugin) RefreshContainerDevices() error {
326334
p.vidx[vdidx] = false
327335
p.pipeid[didx][pid] = false
328336
os.RemoveAll("/usr/local/vgpu/dcu/" + f.Name())
337+
os.Remove(fmt.Sprintf("/etc/vdev/vdev%d.conf", vdidx))
329338
}
330339
fmt.Println(f.Name())
331340
}
@@ -430,10 +439,14 @@ func getIndexFromUUID(uid string) int {
430439
}
431440

432441
// Create virtual vdev directory and file
433-
func (p *Plugin) createvdevFile(current *corev1.Pod, ctr *corev1.Container, req util.ContainerDevices) (string, error) {
434-
s := ""
442+
func (p *Plugin) createvdevFiles(current *corev1.Pod, ctr *corev1.Container, req util.ContainerDevices) (string, error) {
435443
var devidx, pipeid, vdevidx int
436-
coremsk := ""
444+
var pcibusId string
445+
var reqcores, mem int32
446+
var err error
447+
coremsk1 := initCoreUsage(16)
448+
coremsk2 := initCoreUsage(16)
449+
reqtmp := 0
437450
if len(req) > 1 {
438451
return "", nil
439452
}
@@ -442,44 +455,82 @@ func (p *Plugin) createvdevFile(current *corev1.Pod, ctr *corev1.Container, req
442455
continue
443456
}
444457
idx := getIndexFromUUID(val.UUID)
445-
pcibusId := p.pcibusid[idx]
446-
s = fmt.Sprintf("PciBusId: %s\n", pcibusId)
447-
reqcores := (val.Usedcores * int32(p.totalcores[idx])) / 100
448-
coremsk, _ = allocCoreUsage(p.coremask[idx], int(reqcores))
449-
s = s + fmt.Sprintf("cu_mask: 0x%s\n", coremsk)
450-
s = s + fmt.Sprintf("cu_count: %d\n", reqcores)
451-
s = s + fmt.Sprintf("mem: %d MiB\n", val.Usedmem)
452-
s = s + fmt.Sprintf("device_id: %d\n", 0)
458+
pcibusId = p.pcibusid[idx]
459+
reqcores = (val.Usedcores * int32(p.totalcores[idx])) / 100
460+
coremsk1, reqtmp, _ = allocCoreUsage(p.coremask[idx][0], int(reqcores))
461+
if reqtmp > 0 {
462+
coremsk2, _, _ = allocCoreUsage(p.coremask[idx][1], reqtmp)
463+
}
464+
mem = val.Usedmem
453465
devidx = idx
454-
vdevidx, err := p.AllocateVidx()
466+
vdevidx, err = p.AllocateVidx()
455467
if err != nil {
456468
return "", err
457469
}
458-
s = s + fmt.Sprintf("vdev_id: %d\n", vdevidx)
459470
pipeid, err = p.AllocatePipeID(idx)
460471
if err != nil {
461472
return "", err
462473
}
463-
s = s + fmt.Sprintf("pipe_id: %d\n", pipeid)
464-
s = s + fmt.Sprintln("enable: 1")
465474
}
466-
cacheFileHostDirectory := "/usr/local/vgpu/dcu/" + string(current.UID) + "_" + ctr.Name + "_" + fmt.Sprint(devidx) + "_" + fmt.Sprint(pipeid) + "_" + fmt.Sprint(vdevidx) + "_" + coremsk
467-
err := os.MkdirAll(cacheFileHostDirectory, 0777)
475+
dirName := string(current.UID) + "_" + ctr.Name + "_" + fmt.Sprint(devidx) + "_" + fmt.Sprint(pipeid) + "_" + fmt.Sprint(vdevidx) + "_" + fmt.Sprint(coremsk1) + "_" + fmt.Sprint(coremsk2)
476+
cacheFileHostDirectory := fmt.Sprintf("/usr/local/vgpu/dcu/%s", dirName)
477+
err = createvdevFile(pcibusId, coremsk1, coremsk2, reqcores, mem, 0, vdevidx, pipeid, cacheFileHostDirectory, "vdev0.conf")
468478
if err != nil {
469479
return "", err
470480
}
471-
err = os.Chmod(cacheFileHostDirectory, 0777)
481+
// support dcu-exporter
482+
err = createvdevFile(pcibusId, coremsk1, coremsk2, reqcores, mem, devidx, vdevidx, pipeid, "/etc/vdev/", fmt.Sprintf("vdev%d.conf", vdevidx))
472483
if err != nil {
473484
return "", err
474485
}
475-
klog.Infoln("s=", s)
476-
err = os.WriteFile(cacheFileHostDirectory+"/vdev0.conf", []byte(s), os.ModePerm)
486+
487+
coreUsage1, err := addCoreUsage(p.coremask[devidx][0], coremsk1)
477488
if err != nil {
478489
return "", err
479490
}
491+
p.coremask[devidx][0] = coreUsage1
492+
493+
coreUsage2, err := addCoreUsage(p.coremask[devidx][1], coremsk2)
494+
if err != nil {
495+
return "", err
496+
}
497+
p.coremask[devidx][1] = coreUsage2
498+
480499
return cacheFileHostDirectory, nil
481500
}
482501

502+
func createvdevFile(pcibusId, coremsk1, coremsk2 string, reqcores, mem int32, deviceid, vdevidx, pipeid int, cacheFileHostDirectory, cacheFileName string) error {
503+
s := ""
504+
s = fmt.Sprintf("PciBusId: %s\n", pcibusId)
505+
s = s + fmt.Sprintf("cu_mask: 0x%s\n", coremsk1)
506+
s = s + fmt.Sprintf("cu_mask: 0x%s\n", coremsk2)
507+
s = s + fmt.Sprintf("cu_count: %d\n", reqcores)
508+
s = s + fmt.Sprintf("mem: %d MiB\n", mem)
509+
s = s + fmt.Sprintf("device_id: %d\n", deviceid)
510+
s = s + fmt.Sprintf("vdev_id: %d\n", vdevidx)
511+
s = s + fmt.Sprintf("pipe_id: %d\n", pipeid)
512+
s = s + fmt.Sprintln("enable: 1")
513+
klog.Infoln("s=", s)
514+
515+
_, err := os.Stat(cacheFileHostDirectory)
516+
if os.IsNotExist(err) {
517+
err := os.MkdirAll(cacheFileHostDirectory, 0777)
518+
if err != nil {
519+
return err
520+
}
521+
err = os.Chmod(cacheFileHostDirectory, 0777)
522+
if err != nil {
523+
return err
524+
}
525+
}
526+
527+
err = os.WriteFile(fmt.Sprintf("%s/%s", cacheFileHostDirectory, cacheFileName), []byte(s), os.ModePerm)
528+
if err != nil {
529+
return err
530+
}
531+
return nil
532+
}
533+
483534
func (p *Plugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.AllocateRequest) (*kubeletdevicepluginv1beta1.AllocateResponse, error) {
484535
var car kubeletdevicepluginv1beta1.ContainerAllocateResponse
485536
var dev *kubeletdevicepluginv1beta1.DeviceSpec
@@ -544,7 +595,7 @@ func (p *Plugin) Allocate(ctx context.Context, reqs *kubeletdevicepluginv1beta1.
544595
}
545596
//Create vdev file
546597
if len(devreq) < 2 && devreq[0].Usedmem < int32(p.totalmem[0]) {
547-
filename, err := p.createvdevFile(current, &currentCtr, devreq)
598+
filename, err := p.createvdevFiles(current, &currentCtr, devreq)
548599
if err != nil {
549600
util.PodAllocationFailed(nodename, current, NodeLockDCU)
550601
return &responses, err

0 commit comments

Comments
 (0)