-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DWC2 DMA support #2576
DWC2 DMA support #2576
Changes from 4 commits
cde7223
394dc06
298f7f2
8765568
02ec486
eefca22
6d4e2f6
7867464
9bfb3e7
40b5517
86b4608
a124438
6a15e78
e483c6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,18 +67,6 @@ | |
// Debug level for DWC2 | ||
#define DWC2_DEBUG 2 | ||
|
||
#ifndef dcache_clean | ||
#define dcache_clean(_addr, _size) | ||
#endif | ||
|
||
#ifndef dcache_invalidate | ||
#define dcache_invalidate(_addr, _size) | ||
#endif | ||
|
||
#ifndef dcache_clean_invalidate | ||
#define dcache_clean_invalidate(_addr, _size) | ||
#endif | ||
|
||
static TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2]; | ||
|
||
typedef struct { | ||
|
@@ -98,6 +86,9 @@ static uint16_t ep0_pending[2]; // Index determines direction as t | |
// TX FIFO RAM allocation so far in words - RX FIFO size is readily available from dwc2->grxfsiz | ||
static uint16_t _allocated_fifo_words_tx; // TX FIFO size in words (IN EPs) | ||
|
||
// Number of IN endpoints active | ||
static uint8_t _allocated_ep_in_count; | ||
|
||
// SOF enabling flag - required for SOF to not get disabled in ISR when SOF was enabled by | ||
static bool _sof_en; | ||
|
||
|
@@ -165,6 +156,12 @@ static bool fifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { | |
dwc2->grxfsiz = sz; | ||
} | ||
} else { | ||
// Check IN endpoints concurrently active limit | ||
if(_dwc2_controller->ep_in_count) { | ||
TU_ASSERT(_allocated_ep_in_count < _dwc2_controller->ep_in_count); | ||
_allocated_ep_in_count++; | ||
} | ||
|
||
// Note if The TXFELVL is configured as half empty. In order | ||
// to be able to write a packet at that point, the fifo must be twice the max_size. | ||
if ((dwc2->gahbcfg & GAHBCFG_TXFELVL) == 0) { | ||
|
@@ -186,6 +183,38 @@ static bool fifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { | |
return true; | ||
} | ||
|
||
static inline bool dma_enabled(uint8_t rhport) | ||
{ | ||
// DMA doesn't support fifo transfer | ||
#ifdef TUD_AUDIO_PREFER_RING_BUFFER | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't want to depend on class driver but maybe it's the simplest way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CFG_TUD_DWC2_DMA must be enabled at compile time, audio driver/user should adapt accordingly. |
||
#if TUD_AUDIO_PREFER_RING_BUFFER | ||
return false; | ||
#endif | ||
#endif | ||
dwc2_regs_t* dwc2 = DWC2_REG(rhport); | ||
// Internal DMA only | ||
return (dwc2->ghwcfg2_bm.arch == 2); | ||
} | ||
|
||
static void dma_stpkt_rx(uint8_t rhport) | ||
{ | ||
dwc2_regs_t* dwc2 = DWC2_REG(rhport); | ||
|
||
if (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a) { | ||
if(dwc2->epout[0].doepctl & DOEPCTL_EPENA) | ||
return; | ||
} | ||
|
||
// Receive only 1 packet | ||
dwc2->epout[0].doeptsiz = 0; | ||
dwc2->epout[0].doeptsiz |= (1 << DOEPTSIZ_STUPCNT_Pos); | ||
dwc2->epout[0].doeptsiz |= 8; | ||
dwc2->epout[0].doeptsiz |= (1 << DOEPTSIZ_PKTCNT_Pos); | ||
dwc2->epout[0].doepdma = (uintptr_t)_setup_packet; | ||
|
||
dwc2->epout[0].doepctl |= DOEPCTL_EPENA | DOEPCTL_USBAEP; | ||
} | ||
|
||
static void edpt_activate(uint8_t rhport, tusb_desc_endpoint_t const * p_endpoint_desc) { | ||
dwc2_regs_t* dwc2 = DWC2_REG(rhport); | ||
uint8_t const epnum = tu_edpt_number(p_endpoint_desc->bEndpointAddress); | ||
|
@@ -272,6 +301,8 @@ static void bus_reset(uint8_t rhport) { | |
|
||
_sof_en = false; | ||
|
||
_allocated_ep_in_count = 1; | ||
|
||
// clear device address | ||
dwc2->dcfg &= ~DCFG_DAD_Msk; | ||
|
||
|
@@ -351,6 +382,12 @@ static void bus_reset(uint8_t rhport) { | |
// Setup the control endpoint 0 | ||
_allocated_fifo_words_tx = 16; | ||
|
||
// DMA needs extra space for processing | ||
if(dma_enabled(rhport)) { | ||
uint16_t reserved = _dwc2_controller[rhport].ep_fifo_size / 4- dwc2->ghwcfg3_bm.total_fifo_size; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Kernel use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, this is the max epinfo where scatt/gather dma is used. We can use gdfifocfg to dynamic change the epinfo base to reduce the DMA region for simple DMA mode. |
||
_allocated_fifo_words_tx += reserved; | ||
} | ||
|
||
// Control IN uses FIFO 0 with 64 bytes ( 16 32-bit word ) | ||
dwc2->dieptxf0 = (16 << DIEPTXF0_TX0FD_Pos) | (_dwc2_controller[rhport].ep_fifo_size / 4 - _allocated_fifo_words_tx); | ||
|
||
|
@@ -361,6 +398,10 @@ static void bus_reset(uint8_t rhport) { | |
|
||
dwc2->epout[0].doeptsiz |= (3 << DOEPTSIZ_STUPCNT_Pos); | ||
|
||
if(dma_enabled(rhport)) { | ||
dma_stpkt_rx(rhport); | ||
} | ||
|
||
dwc2->gintmsk |= GINTMSK_OEPINT | GINTMSK_IEPINT; | ||
} | ||
|
||
|
@@ -369,11 +410,11 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c | |
(void) rhport; | ||
|
||
dwc2_regs_t* dwc2 = DWC2_REG(rhport); | ||
xfer_ctl_t* const xfer = XFER_CTL_BASE(epnum, dir); | ||
|
||
// EP0 is limited to one packet each xfer | ||
// We use multiple transaction of xfer->max_size length to get a whole transfer done | ||
if (epnum == 0) { | ||
xfer_ctl_t* const xfer = XFER_CTL_BASE(epnum, dir); | ||
total_bytes = tu_min16(ep0_pending[dir], xfer->max_size); | ||
ep0_pending[dir] -= total_bytes; | ||
} | ||
|
@@ -386,17 +427,31 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c | |
epin[epnum].dieptsiz = (num_packets << DIEPTSIZ_PKTCNT_Pos) | | ||
((total_bytes << DIEPTSIZ_XFRSIZ_Pos) & DIEPTSIZ_XFRSIZ_Msk); | ||
|
||
epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK; | ||
if(dma_enabled(rhport)) { | ||
epin[epnum].diepdma = (uintptr_t)xfer->buffer; | ||
|
||
// For ISO endpoint set correct odd/even bit for next frame. | ||
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) { | ||
// Take odd/even bit from frame counter. | ||
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos)); | ||
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk); | ||
} | ||
// Enable fifo empty interrupt only if there are something to put in the fifo. | ||
if (total_bytes != 0) { | ||
dwc2->diepempmsk |= (1 << epnum); | ||
// For ISO endpoint set correct odd/even bit for next frame. | ||
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) { | ||
// Take odd/even bit from frame counter. | ||
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos)); | ||
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk); | ||
} | ||
|
||
epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK; | ||
} else { | ||
|
||
epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK; | ||
|
||
// For ISO endpoint set correct odd/even bit for next frame. | ||
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) { | ||
// Take odd/even bit from frame counter. | ||
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos)); | ||
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk); | ||
} | ||
// Enable fifo empty interrupt only if there are something to put in the fifo. | ||
if (total_bytes != 0) { | ||
dwc2->diepempmsk |= (1 << epnum); | ||
} | ||
} | ||
} else { | ||
dwc2_epout_t* epout = dwc2->epout; | ||
|
@@ -406,13 +461,18 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c | |
epout[epnum].doeptsiz |= (num_packets << DOEPTSIZ_PKTCNT_Pos) | | ||
((total_bytes << DOEPTSIZ_XFRSIZ_Pos) & DOEPTSIZ_XFRSIZ_Msk); | ||
|
||
epout[epnum].doepctl |= DOEPCTL_EPENA | DOEPCTL_CNAK; | ||
if ((epout[epnum].doepctl & DOEPCTL_EPTYP) == DOEPCTL_EPTYP_0 && | ||
XFER_CTL_BASE(epnum, dir)->interval == 1) { | ||
// Take odd/even bit from frame counter. | ||
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos)); | ||
epout[epnum].doepctl |= (odd_frame_now ? DOEPCTL_SD0PID_SEVNFRM_Msk : DOEPCTL_SODDFRM_Msk); | ||
} | ||
|
||
if(dma_enabled(rhport)) { | ||
epout[epnum].doepdma = (uintptr_t)xfer->buffer; | ||
} | ||
|
||
epout[epnum].doepctl |= DOEPCTL_EPENA | DOEPCTL_CNAK; | ||
} | ||
} | ||
|
||
|
@@ -619,6 +679,11 @@ void dcd_init(uint8_t rhport) { | |
// Enable global interrupt | ||
dwc2->gahbcfg |= GAHBCFG_GINT; | ||
|
||
if (dma_enabled(rhport)) { | ||
dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2; | ||
dwc2->gintmsk &=~GINTMSK_RXFLVLM; | ||
} | ||
|
||
// make sure we are in device mode | ||
// TU_ASSERT(!(dwc2->gintsts & GINTSTS_CMOD), ); | ||
|
||
|
@@ -706,6 +771,8 @@ void dcd_edpt_close_all(uint8_t rhport) { | |
dwc2_regs_t* dwc2 = DWC2_REG(rhport); | ||
uint8_t const ep_count = _dwc2_controller[rhport].ep_count; | ||
|
||
_allocated_ep_in_count = 1; | ||
|
||
// Disable non-control interrupt | ||
dwc2->daintmsk = (1 << DAINTMSK_OEPM_Pos) | (1 << DAINTMSK_IEPM_Pos); | ||
|
||
|
@@ -730,6 +797,12 @@ void dcd_edpt_close_all(uint8_t rhport) { | |
|
||
fifo_flush_tx(dwc2, 0x10); // all tx fifo | ||
fifo_flush_rx(dwc2); | ||
|
||
// DMA needs extra space for processing | ||
if(dma_enabled(rhport)) { | ||
uint16_t reserved = _dwc2_controller[rhport].ep_fifo_size / 4- dwc2->ghwcfg3_bm.total_fifo_size; | ||
_allocated_fifo_words_tx += reserved; | ||
} | ||
} | ||
|
||
bool dcd_edpt_iso_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t largest_packet_size) { | ||
|
@@ -809,6 +882,9 @@ void dcd_edpt_close(uint8_t rhport, uint8_t ep_addr) { | |
|
||
void dcd_edpt_stall(uint8_t rhport, uint8_t ep_addr) { | ||
edpt_disable(rhport, ep_addr, true); | ||
if((tu_edpt_number(ep_addr) == 0) && dma_enabled(rhport)) { | ||
dma_stpkt_rx(rhport); | ||
} | ||
} | ||
|
||
void dcd_edpt_clear_stall(uint8_t rhport, uint8_t ep_addr) { | ||
|
@@ -991,33 +1067,52 @@ static void handle_epout_irq(uint8_t rhport) { | |
|
||
uint32_t const doepint = epout->doepint; | ||
|
||
// SETUP packet Setup Phase done. | ||
if (doepint & DOEPINT_STUP) { | ||
uint32_t clear_flag = DOEPINT_STUP; | ||
|
||
// STPKTRX is only available for version from 3_00a | ||
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) { | ||
clear_flag |= DOEPINT_STPKTRX; | ||
} | ||
|
||
epout->doepint = clear_flag; | ||
dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); | ||
} | ||
|
||
// OUT XFER complete | ||
if (epout->doepint & DOEPINT_XFRC) { | ||
epout->doepint = DOEPINT_XFRC; | ||
|
||
xfer_ctl_t* xfer = XFER_CTL_BASE(n, TUSB_DIR_OUT); | ||
|
||
// EP0 can only handle one packet | ||
if ((n == 0) && ep0_pending[TUSB_DIR_OUT]) { | ||
// Schedule another packet to be received. | ||
edpt_schedule_packets(rhport, n, TUSB_DIR_OUT, 1, ep0_pending[TUSB_DIR_OUT]); | ||
if (doepint & DOEPINT_STUP) { | ||
// STPKTRX is only available for version from 3_00a | ||
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) { | ||
epout->doepint = DOEPINT_STPKTRX; | ||
} | ||
} else if (doepint & DOEPINT_OTEPSPR) { | ||
epout->doepint = DOEPINT_OTEPSPR; | ||
} else { | ||
dcd_event_xfer_complete(rhport, n, xfer->total_len, XFER_RESULT_SUCCESS, true); | ||
// EP0 can only handle one packet | ||
if ((n == 0) && ep0_pending[TUSB_DIR_OUT]) { | ||
// Schedule another packet to be received. | ||
edpt_schedule_packets(rhport, n, TUSB_DIR_OUT, 1, ep0_pending[TUSB_DIR_OUT]); | ||
} else { | ||
if(dma_enabled(rhport)) { | ||
// Fix packet length | ||
uint16_t remain = (epout->doeptsiz & DOEPTSIZ_XFRSIZ_Msk) >> DOEPTSIZ_XFRSIZ_Pos; | ||
xfer->total_len -= remain; | ||
// this is ZLP, so prepare EP0 for next setup | ||
if(n == 0 && xfer->total_len == 0) { | ||
dma_stpkt_rx(rhport); | ||
} | ||
} | ||
|
||
dcd_event_xfer_complete(rhport, n, xfer->total_len, XFER_RESULT_SUCCESS, true); | ||
} | ||
} | ||
} | ||
|
||
// SETUP packet Setup Phase done. | ||
if (doepint & DOEPINT_STUP) { | ||
epout->doepint = DOEPINT_STUP; | ||
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) { | ||
epout->doepint = DOEPINT_STPKTRX; | ||
} | ||
if(dma_enabled(rhport) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) { | ||
dma_stpkt_rx(rhport); | ||
} | ||
|
||
dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -1042,6 +1137,9 @@ static void handle_epin_irq(uint8_t rhport) { | |
// Schedule another packet to be transmitted. | ||
edpt_schedule_packets(rhport, n, TUSB_DIR_IN, 1, ep0_pending[TUSB_DIR_IN]); | ||
} else { | ||
if((n == 0) && dma_enabled(rhport)) { | ||
dma_stpkt_rx(rhport); | ||
} | ||
dcd_event_xfer_complete(rhport, n | TUSB_DIR_IN_MASK, xfer->total_len, XFER_RESULT_SUCCESS, true); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dcache code should be removed and add MPU suggestion to make usb ram non cacheable:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not entirely sure with this case, as H743 and probably most of stm32
m7h7 USB DMA controller don't have access to DTCMThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At least for stm32f7xx, those all seem to have the AHB-S port of the M7 connected directly to the bus matrix, so it is accessible by the USB DMA in this case. Though, that is only for DTCM, ITCM is still inaccessible by anything besides the CPU.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, I haven't check f7, maybe this is specific to h7 only