diff --git a/CHANGELOG.md b/CHANGELOG.md index 3923aeb..a81a682 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Added a "low pin count" variant of the f730 chip to the crate features: packages <144 pins don't include a high speed USB PHY - Added SPI2_SCK pin for stm32f769i-discovery - Fix mass-erase triggering in `flash` on smaller chips +- Remove the need for software u64 division in the clock setup code, shrinking code (#211) ## [v0.7.0] - 2022-06-05 diff --git a/src/rcc.rs b/src/rcc.rs index b7c5cf2..c765101 100644 --- a/src/rcc.rs +++ b/src/rcc.rs @@ -558,6 +558,43 @@ impl CFGR { self } + // We want to avoid dividing u64 values, because the Cortex-M7 CPU doesn't + // have hardware instructions for that, and the software divide that LLVM + // gives us is a relatively large amount of code. + // + // To do this, we operate in a fixed-point domain, and do a multiply by 1/x + // instead of dividing by x. We can calculate those 1/x values in a u32, if + // the fixed-point decimal place is chosen to be close enough to the LSB. + // + // But we also need to be able to represent the largest numerator, so we + // need enough bits to the left of the virtual decimal point. + // + // All of the chunks of code that do this are structured like: + // + // base_clk * n / m / p + // + // and they all have the same base_clk and n ranges (n up to 432, base_clk + // up to 50MHz). So base*plln can be as high as 216_000_000_000, and to + // represent that we need 38 bits. + // + // (We could use just 37 bits in one of these cases, if we take into account + // that high values of base_clk preclude using high values of n. But the + // other case is checking the output, so we can't assume anything about the + // inputs there.) + // + // So use 26 bits on the right of the decimal place. + // + // Also note, we need to round the 1/x values, not truncate them. So we + // shift left by one more bit, add one, and shift right by one. + const FIXED_POINT_LSHIFT: u32 = 31; + const FIXED_POINT_RSHIFT: u32 = 30; + + // We also drop 4 bits from the base_clk so that it and the fractional part + // (above) can fit into 64 bits. The max base_clk*n value needs 38 bits; + // shifting this out means it can fit into 34, with 30 (above) for the + // fractions. + const BASE_CLK_SHIFT: u32 = 4; + /// Output clock calculation fn calculate_clocks(&self) -> (Clocks, InternalRCCConfig) { let mut config = InternalRCCConfig::default(); @@ -568,21 +605,32 @@ impl CFGR { None => HSI_FREQUENCY, } .raw(), - ); + ) >> Self::BASE_CLK_SHIFT; - let mut sysclk = base_clk; + let mut sysclk = base_clk << Self::BASE_CLK_SHIFT; let mut pll48clk_valid = false; if self.use_pll { - sysclk = base_clk as u64 * self.plln as u64 - / self.pllm as u64 + // These initial divisions have to operate on u32 values to avoid + // the software division. Fortunately our 26 bit choice for the + // decimal place, and the fact that these are 1/N, means we can + // fit them into 26 bits, so a u32 is fine. + let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1; + let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT) / match self.pllp { - PLLP::Div2 => 2, - PLLP::Div4 => 4, - PLLP::Div6 => 6, - PLLP::Div8 => 8, - }; + PLLP::Div2 => 2u32, + PLLP::Div4 => 4u32, + PLLP::Div6 => 6u32, + PLLP::Div8 => 8u32, + } + + 1) + >> 1; + sysclk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64) + >> Self::FIXED_POINT_RSHIFT) + * one_over_p as u64) + >> Self::FIXED_POINT_RSHIFT + << Self::BASE_CLK_SHIFT; } // Check if pll48clk is valid @@ -590,23 +638,39 @@ impl CFGR { match pll48clk { PLL48CLK::Pllq => { pll48clk_valid = { - let pll48clk = base_clk as u64 * self.plln as u64 - / self.pllm as u64 - / self.pllq as u64; + let one_over_m = + ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1; + let one_over_q = + ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllq as u32) + 1) >> 1; + let pll48clk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64) + >> Self::FIXED_POINT_RSHIFT) + * one_over_q as u64) + >> Self::FIXED_POINT_RSHIFT + << Self::BASE_CLK_SHIFT; (48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk) } } PLL48CLK::Pllsai => { pll48clk_valid = { if self.use_pllsai { - let pll48clk = base_clk as u64 * self.pllsain as u64 - / self.pllm as u64 + // base_clk * pllsain has the same range as above + let one_over_m = + ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1; + let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT) / match self.pllsaip { - PLLSAIP::Div2 => 2, - PLLSAIP::Div4 => 4, - PLLSAIP::Div6 => 6, - PLLSAIP::Div8 => 8, - }; + PLLSAIP::Div2 => 2u32, + PLLSAIP::Div4 => 4u32, + PLLSAIP::Div6 => 6u32, + PLLSAIP::Div8 => 8u32, + } + + 1) + >> 1; + let pll48clk = + (((base_clk as u64 * self.pllsain as u64 * one_over_m as u64) + >> Self::FIXED_POINT_RSHIFT) + * one_over_p as u64) + >> Self::FIXED_POINT_RSHIFT + << Self::BASE_CLK_SHIFT; (48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk) } else { false @@ -801,7 +865,13 @@ impl CFGR { n = 432; continue; } - let f_vco_clock = (f_pll_clock_input as u64 * n as u64 / m as u64) as u32; + // See the comments around Self::FIXED_POINT_LSHIFT to see how this works. + let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (m as u32) + 1) >> 1; + let f_vco_clock = (((f_pll_clock_input as u64 >> Self::BASE_CLK_SHIFT) + * n as u64 + * one_over_m as u64) + >> Self::FIXED_POINT_RSHIFT + << Self::BASE_CLK_SHIFT) as u32; if f_vco_clock < 50_000_000 { m += 1; n = 432; @@ -857,15 +927,16 @@ impl CFGR { Some(hse) => hse.freq, None => HSI_FREQUENCY, } - .raw(); + .raw() + >> Self::BASE_CLK_SHIFT; let sysclk = if let Some(clk) = self.sysclk { clk } else { - base_clk + base_clk << Self::BASE_CLK_SHIFT }; - let p = if base_clk == sysclk { + let p = if base_clk << Self::BASE_CLK_SHIFT == sysclk { None } else { Some((sysclk - 1, sysclk + 1)) @@ -885,20 +956,29 @@ impl CFGR { // We check if (pllm, plln, pllp) allow to obtain the requested Sysclk, // so that we don't have to calculate them + let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1; + let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT) + / match self.pllp { + PLLP::Div2 => 2u32, + PLLP::Div4 => 4u32, + PLLP::Div6 => 6u32, + PLLP::Div8 => 8u32, + } + + 1) + >> 1; let p_ok = (sysclk as u64) - == (base_clk as u64 * self.plln as u64 - / self.pllm as u64 - / match self.pllp { - PLLP::Div2 => 2, - PLLP::Div4 => 4, - PLLP::Div6 => 6, - PLLP::Div8 => 8, - }); + == (((base_clk as u64 * self.plln as u64 * one_over_m as u64) + >> Self::FIXED_POINT_RSHIFT) + * one_over_p as u64) + >> Self::FIXED_POINT_RSHIFT + << Self::BASE_CLK_SHIFT; if p_ok && q.is_none() { return; } - if let Some((m, n, p, q)) = CFGR::calculate_mnpq(base_clk, FreqRequest { p, q }) { + if let Some((m, n, p, q)) = + CFGR::calculate_mnpq(base_clk << Self::BASE_CLK_SHIFT, FreqRequest { p, q }) + { self.pllm = m as u8; self.plln = n as u16; if let Some(p) = p {