Skip to content

Commit c8bd412

Browse files
committed
drm/amdgpu: Make SDMA phase quantum configurable
Set a configurable SDMA phase quantum when enabling SDMA context switching. The default value significantly reduces SDMA latency in page table updates when user-mode SDMA queues have concurrent activity, compared to the initial HW setting. Change-Id: Id99c52e893d0358374ea9a3fbc2181f0c60b1b42 Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
1 parent d020404 commit c8bd412

File tree

4 files changed

+67
-2
lines changed

4 files changed

+67
-2
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ extern int amdgpu_sched_hw_submission;
9090
extern int amdgpu_powerplay;
9191
extern unsigned amdgpu_pcie_gen_cap;
9292
extern unsigned amdgpu_pcie_lane_cap;
93+
extern unsigned amdgpu_sdma_phase_quantum;
9394

9495
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
9596
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ int amdgpu_sched_hw_submission = 2;
8484
int amdgpu_powerplay = -1;
8585
unsigned amdgpu_pcie_gen_cap = 0;
8686
unsigned amdgpu_pcie_lane_cap = 0;
87+
unsigned amdgpu_sdma_phase_quantum = 32;
8788

8889
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
8990
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -174,6 +175,9 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
174175
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
175176
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
176177

178+
MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
179+
module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
180+
177181
static const struct pci_device_id pciidlist[] = {
178182
#ifdef CONFIG_DRM_AMDGPU_CIK
179183
/* Kaveri */

drivers/gpu/drm/amd/amdgpu/cik_sdma.c

+31-1
Original file line numberDiff line numberDiff line change
@@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
351351
*/
352352
static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
353353
{
354-
u32 f32_cntl;
354+
u32 f32_cntl, phase_quantum = 0;
355355
int i;
356356

357+
if (amdgpu_sdma_phase_quantum) {
358+
unsigned value = amdgpu_sdma_phase_quantum;
359+
unsigned unit = 0;
360+
361+
while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
362+
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
363+
value = (value + 1) >> 1;
364+
unit++;
365+
}
366+
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
367+
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
368+
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
369+
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
370+
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
371+
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
372+
WARN_ONCE(1,
373+
"clamping sdma_phase_quantum to %uK clock cycles\n",
374+
value << unit);
375+
}
376+
phase_quantum =
377+
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
378+
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
379+
}
380+
357381
for (i = 0; i < adev->sdma.num_instances; i++) {
358382
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
359383
if (enable) {
360384
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
361385
AUTO_CTXSW_ENABLE, 1);
386+
if (amdgpu_sdma_phase_quantum) {
387+
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
388+
phase_quantum);
389+
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
390+
phase_quantum);
391+
}
362392
} else {
363393
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
364394
AUTO_CTXSW_ENABLE, 0);

drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

+31-1
Original file line numberDiff line numberDiff line change
@@ -576,16 +576,46 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
576576
*/
577577
static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
578578
{
579-
u32 f32_cntl;
579+
u32 f32_cntl, phase_quantum = 0;
580580
int i;
581581

582+
if (amdgpu_sdma_phase_quantum) {
583+
unsigned value = amdgpu_sdma_phase_quantum;
584+
unsigned unit = 0;
585+
586+
while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
587+
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
588+
value = (value + 1) >> 1;
589+
unit++;
590+
}
591+
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
592+
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
593+
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
594+
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
595+
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
596+
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
597+
WARN_ONCE(1,
598+
"clamping sdma_phase_quantum to %uK clock cycles\n",
599+
value << unit);
600+
}
601+
phase_quantum =
602+
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
603+
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
604+
}
605+
582606
for (i = 0; i < adev->sdma.num_instances; i++) {
583607
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
584608
if (enable) {
585609
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
586610
AUTO_CTXSW_ENABLE, 1);
587611
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
588612
ATC_L1_ENABLE, 1);
613+
if (amdgpu_sdma_phase_quantum) {
614+
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
615+
phase_quantum);
616+
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
617+
phase_quantum);
618+
}
589619
} else {
590620
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
591621
AUTO_CTXSW_ENABLE, 0);

0 commit comments

Comments
 (0)