From 971c7d3b3901e7610cba75cc0ddc4f9603632deb Mon Sep 17 00:00:00 2001 From: root Date: Tue, 7 Jan 2025 10:51:37 +0800 Subject: [PATCH 1/2] add igb_driver --- Cargo.lock | 14 + api/axfeat/Cargo.toml | 1 + igb_driver/Cargo.toml | 25 + igb_driver/src/constants.rs | 232 +++++++ igb_driver/src/descriptor.rs | 198 ++++++ igb_driver/src/hal.rs | 66 ++ igb_driver/src/igb.rs | 946 ++++++++++++++++++++++++++ igb_driver/src/interrupts.rs | 23 + igb_driver/src/lib.rs | 107 +++ igb_driver/src/memory.rs | 295 ++++++++ igb_driver/src/net_igb.rs | 257 +++++++ modules/axdriver/Cargo.toml | 4 +- modules/axdriver/build.rs | 2 +- modules/axdriver/src/drivers.rs | 48 ++ modules/axdriver/src/igb.rs | 41 ++ modules/axdriver/src/lib.rs | 3 + modules/axdriver/src/macros.rs | 5 + modules/axdriver/src/prelude.rs | 7 +- modules/axnet/src/smoltcp_impl/mod.rs | 2 +- scripts/make/qemu.mk | 3 +- ulib/axstd/Cargo.toml | 1 + 21 files changed, 2275 insertions(+), 5 deletions(-) create mode 100644 igb_driver/Cargo.toml create mode 100644 igb_driver/src/constants.rs create mode 100644 igb_driver/src/descriptor.rs create mode 100644 igb_driver/src/hal.rs create mode 100644 igb_driver/src/igb.rs create mode 100644 igb_driver/src/interrupts.rs create mode 100644 igb_driver/src/lib.rs create mode 100644 igb_driver/src/memory.rs create mode 100644 igb_driver/src/net_igb.rs create mode 100644 modules/axdriver/src/igb.rs diff --git a/Cargo.lock b/Cargo.lock index f713d8cdb4..dc640e9399 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -296,6 +296,7 @@ dependencies = [ "axdriver_virtio", "axhal", "cfg-if", + "igb_driver", "log", ] @@ -997,6 +998,19 @@ dependencies = [ "cc", ] +[[package]] +name = "igb_driver" +version = "0.1.0" +dependencies = [ + "axdriver_base", + "bit_field", + "core_detect", + "log", + "smoltcp", + "spin", + "volatile 0.3.0", +] + [[package]] name = "indexmap" version = "2.0.0" diff --git a/api/axfeat/Cargo.toml b/api/axfeat/Cargo.toml index ab86e937b5..5b2be4eff2 100644 --- a/api/axfeat/Cargo.toml +++ b/api/axfeat/Cargo.toml @@ -57,6 +57,7 @@ bus-pci = ["axdriver?/bus-pci"] driver-ramdisk = ["axdriver?/ramdisk", "axfs?/use-ramdisk"] driver-ixgbe = ["axdriver?/ixgbe"] driver-bcm2835-sdhci = ["axdriver?/bcm2835-sdhci"] +driver-igb = ["axdriver?/igb"] # Logging log-level-off = ["axlog/log-level-off"] diff --git a/igb_driver/Cargo.toml b/igb_driver/Cargo.toml new file mode 100644 index 0000000000..3dcb031f2c --- /dev/null +++ b/igb_driver/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "igb_driver" +version = "0.1.0" +edition = "2021" +authors = ["Song Zhiyong "] + +[dependencies] +log = "0.4" +bit_field = "0.10.2" +volatile = "0.3" +core_detect = "1.0.0" +spin = "0.9" +axdriver_base = { git = "https://github.com/arceos-org/axdriver_crates.git", tag = "v0.1.0" } + +[dependencies.smoltcp] +git = "https://github.com/rcore-os/smoltcp.git" +default-features = false +rev = "2ade274" +features = ["alloc", "log","medium-ethernet","proto-ipv4","socket-raw", "socket-icmp", "socket-udp", "socket-tcp", "socket-dns"] + + + +[features] +default = [] +irq = [] diff --git a/igb_driver/src/constants.rs b/igb_driver/src/constants.rs new file mode 100644 index 0000000000..348c313124 --- /dev/null +++ b/igb_driver/src/constants.rs @@ -0,0 +1,232 @@ +#![allow(dead_code)] +#![allow(non_snake_case)] +#![allow(non_camel_case_types)] +#![allow(non_upper_case_globals)] +#![allow(clippy::all)] +// list of all NIC registers and some structs +// copied and changed from the ixy C driver and DPDK + +/******************************************************************************* + +Copyright (c) 2001-2020, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +/* Vendor ID */ +pub const IGB_INTEL_VENDOR_ID: u32 = 0x8086; + +/* Device IDs */ +pub const IGB_DEV_ID_82576: u32 = 0x10C9; + +// unused/unsupported by ixy +pub fn IXGBE_BY_MAC(_hw: u32, _r: u32) -> u32 { + 0 +} + +/* General Registers */ +pub const IGB_CTRL: u32 = 0x00000; +pub const IGB_STATUS: u32 = 0x00008; +pub const IGB_CTRL_EXT: u32 = 0x00018; +pub const IGB_MDIC: u32 = 0x00020; + +pub const IGB_RCTL: u32 = 0x00000100; +pub const IGB_TCTL: u32 = 0x00000400; + +pub const IGB_TXDCTL_WTHRESH: u32 = 0x10000; +pub const IGB_TXDCTL_EN: u32 = 0x2000000; +pub const IGB_TCTL_EN: u32 = 0x2; + + +/* Interrupt Registers */ +pub const IGB_EICR: u32 = 0x01580; +pub const IGB_EICS: u32 = 0x01520; +pub const IGB_EIMS: u32 = 0x01524; +pub const IGB_EIMC: u32 = 0x01528; +pub const IGB_EIAC: u32 = 0x0152C; +pub const IGB_EIAM: u32 = 0x01530; + +pub const IGB_IRQ_CLEAR_MASK: u32 = 0xFFFFFFFF; + +/* Receive DMA Registers */ +pub fn IGB_RDBAL(i: u32) -> u32 { + if i == 0 { + 0x0C000 + } else if i <= 3{ + 0x0C040 + ((i - 1) * 0x40) + } else { + 0xC100 + (i - 4) * 0x40 + } +} +pub fn IGB_RDBAH(i: u32) -> u32 { + 0x0C004 + i * 0x40 +} +pub fn IGB_RDLEN(i: u32) -> u32 { + 0x0C008 + i * 0x40 +} +pub fn IGB_RDH(i: u32) -> u32 { + 0x0C010 + i * 0x40 +} +pub fn IGB_RDT(i: u32) -> u32 { + 0x0C018 + (i * 0x40) +} +pub fn IGB_RXDCTL(i: u32) -> u32 { + 0x0C028 + i * 0x40 +} + +/* + * Split and Replication Receive Control Registers + */ +pub fn IGB_SRRCTL(i: u32) -> u32 { + if i == 0 { + 0x0C00C + } else if i <= 3 { + 0x0C04C + (i - 1) * 0x40 + } else { + 0xC10C + ((i - 4) * 0x40) + } +} +/* + * Rx DCA Control Register: + * 00-15 : 0x02200 + n*4; + * 16-64 : 0x0100C + n*0x40; + * 64-127: 0x0D00C + (n-64)*0x40; + */ +pub fn IGB_DCA_RXCTRL(i: u32) -> u32 { + 0x0C014 + i * 0x40 +} +/* Transmit DMA registers */ +pub fn IGB_TDBAL(i: u32) -> u32 { + 0x0E000 + i * 0x40 +} /* 32 of them (0-31)*/ +pub fn IGB_TDBAH(i: u32) -> u32 { + 0x0E004 + i * 0x40 +} + +pub fn IGB_TDLEN(i: u32) -> u32 { + 0x0E008 + i * 0x40 +} + +pub fn IGB_TDH(i: u32) -> u32 { + 0x0E010 + i * 0x40 +} + +pub fn IGB_TDT(i: u32) -> u32 { + 0x0E018 + i * 0x40 +} + +pub fn IGB_TXDCTL(i: u32) -> u32 { + 0x0E028 + i * 0x40 +} + +pub fn IGB_TDWBAL(i: u32) -> u32 { + 0x0E038 + i * 0x40 +} + +pub fn IGB_TDWBAH(i: u32) -> u32 { + 0x0E03C + i * 0x40 +} + +pub const IGB_DMATXCTL: u32 = 0x03590; + +pub const IGB_TXPBSIZE: u32 = 0x3404; + +/*statistic register*/ +pub const IGB_GPRC: u32 = 0x04074; +pub const IGB_BPRC: u32 = 0x04078; +pub const IGB_MPRC: u32 = 0x0407C; +pub const IGB_GPTC: u32 = 0x04080; +pub const IGB_GORCL: u32 = 0x04088; +pub const IGB_GORCH: u32 = 0x0408C; +pub const IGB_GOTCL: u32 = 0x04090; +pub const IGB_GOTCH: u32 = 0x04094; + +/* CTRL Bit Masks */ +pub const IGB_CTRL_FD: u32 = 0x00000001; /* Full-Duplex */ +pub const IGB_CTRL_LNK_RST: u32 = 0x00000008; /* Link Reset. Resets everything. */ +pub const IGB_CTRL_SLU: u32 = 0x00000040; /* set link up. */ +pub const IGB_CTRL_SPEED: u32 = 0x00000300; /* SPEED. */ +pub const IGB_CTRL_RST: u32 = 0x04000000; /* Reset (SW) */ +pub const IGB_CTRL_RFCE: u32 = 0x08000000; /* Receive Flow Control Enable. */ +pub const IGB_CTRL_TFCE: u32 = 0x10000000; /* Transmit Flow Control Enable. */ +pub const IGB_CTRL_PHY_RST: u32 = 0x80000000; /* PHY Reset. */ +pub const IGB_CTRL_RST_MASK: u32 = IGB_CTRL_LNK_RST | IGB_CTRL_RST; + +/* STATUS register bit mask*/ +pub const IGB_STATUS_FD: u32 = 0x00000001; +pub const IGB_STATUS_LU: u32 = 0x00000002; +pub const IGB_STATUS_TXOFF: u32 = 0x00000010; + +/*RX control bit mask */ +pub const IGB_RCTL_EN: u32 = 0x00000002; + +pub const IGB_CTRL_EXT_NS_DIS: u32 = 0x00010000; /* No Snoop disable */ + +pub const IGB_TXPBSIZE_40KB: u32 = 0x28; /* 40KB Packet Buffer */ + +/* Packet buffer allocation strategies */ + +pub const PBA_STRATEGY_EQUAL: u32 = 0; /* Distribute PB space equally */ +pub const PBA_STRATEGY_WEIGHTED: u32 = 1; /* Weight front half of TCs */ + + +pub const IGB_SRRCTL_DESCTYPE_ADV_ONEBUF: u32 = 0x02000000; +pub const IGB_SRRCTL_DESCTYPE_MASK: u32 = 0x0E000000; + +pub const IGB_LINKS_UP: u32 = 0x2; +pub const IGB_LINKS_SPEED_82576: u32 = 0xC0; +pub const IGB_LINKS_SPEED_10_82576: u32 = 0x0; +pub const IGB_LINKS_SPEED_100_82576: u32 = 0x1; +pub const IGB_LINKS_SPEED_1000_82576: u32 = 0x2; + +pub fn IGB_RAL(i: u32) -> u32 { + if i <= 15 { + 0x05400 + i * 8 + } else { + 0x054E0 + (i - 16) * 8 + } +} + +pub fn IGB_RAH(i: u32) -> u32 { + if i <= 15 { + 0x05404 + i * 8 + } else { + 0x054E4 + (i - 16) * 8 + } +} + +pub const IGB_ADVTXD_STAT_DD: u32 = 0x1; +pub const IGB_SRRCTL_DROP_EN: u32 = 0x80000000; +pub const IGB_RXDCTL_ENABLE: u32 = 0x02000000; /* Ena specific Rx Queue */ + +pub const IGB_PHY_CTRL: u32 = 0; +pub const IGB_PHY_STATUS: u32 = 0; + +pub const IGB_PHY_AUTONE: u32 = 0x1000; +pub const IGB_PHY_RESTART: u32 = 0x0200; \ No newline at end of file diff --git a/igb_driver/src/descriptor.rs b/igb_driver/src/descriptor.rs new file mode 100644 index 0000000000..0425b4773f --- /dev/null +++ b/igb_driver/src/descriptor.rs @@ -0,0 +1,198 @@ +use bit_field::BitField; +use volatile::Volatile; + +// Transmit descriptor bits +/// Tx Command: End of Packet +pub const TX_CMD_EOP: u8 = 1 << 0; +/// Tx Command: Insert MAC FCS +pub const TX_CMD_IFCS: u8 = 1 << 1; +/// Tx Command: Report Status +pub const TX_CMD_RS: u8 = 1 << 3; +/// Tx Command: Descriptor Extension (Advanced format) +pub const TX_CMD_DEXT: u8 = 1 << 5; +/// Tx Command: VLAN Packet Enable +pub const TX_CMD_VLE: u8 = 1 << 6; +/// Tx Command: TCP/UDP Segmentation Enable +pub const TX_CMD_TSE: u8 = 1 << 7; +/// Tx Status: descriptor Done +pub const TX_STATUS_DD: u8 = 1 << 0; +/// Tx Descriptor Type: advanced +pub const TX_DTYP_ADV: u8 = 0x3 << 4; +/// Tx Descriptor paylen shift +/// The paylen is located at bit 46 in the upper 64 bits of the advanced Tx descriptor. +/// Since we have divided the upper 64 bits into 4 parts (u16,u8,u8,u32), +/// the paylen is then located at bit 14 of the upper 32 bits of the descriptor. +pub const TX_PAYLEN_SHIFT: u8 = 46 - 32; //(actual offset - offset of variable) + +// Receive descriptor bits +/// Rx Status: Descriptor Done +pub const RX_STATUS_DD: u8 = 1 << 0; +/// Rx Status: End of Packet +pub const RX_STATUS_EOP: u8 = 1 << 1; + +/// refer: [Theseus](https://github.com/theseus-os/Theseus/blob/theseus_main/kernel/intel_ethernet/src/descriptors.rs#L218-L219) +/// Advanced Receive Descriptor used in the Ixgbe driver. +/// It has 2 modes: Read and Write Back, both of which use the whole 128 bits. +/// There is one receive descriptor per receive buffer that can be converted between these 2 modes. +/// Read contains the addresses that the driver writes. +/// Write Back contains information the hardware writes on receiving a packet. +/// More information can be found in the 82599 datasheet. +pub(crate) struct AdvancedRxDescriptor { + /// Starting physical address of the receive bufffer for the packet. + pub packet_buffer_address: Volatile, + /// Starting physical address of the receive buffer for the header. + /// This field will only be used if header splitting is enabled. + pub header_buffer_address: Volatile, +} + +impl AdvancedRxDescriptor { + /// Initializes a receive descriptor by clearing its status + /// and setting the descriptor's physical address. + /// + /// # Arguments + /// * `packet_buffer_address`: starting physical address of the receive buffer. + pub fn init(&mut self) { + self.packet_buffer_address.write(0); + self.header_buffer_address.write(0); + } + + /// Updates the descriptor's physical address. + /// + /// # Arguments + /// * `packet_buffer_address`: starting physical address of the receive buffer. + pub fn set_packet_address(&mut self, packet_buffer_address: u64) { + self.packet_buffer_address.write(packet_buffer_address); + } + + /// Clears the status bits of the descriptor. + pub fn reset_status(&mut self) { + self.header_buffer_address.write(0); + } + + /// Returns true if the descriptor has a received packet copied to its buffer. + pub fn descriptor_done(&self) -> bool { + (self.get_ext_status() & RX_STATUS_DD as u64) == RX_STATUS_DD as u64 + } + + /// Returns true if the descriptor's packet buffer is the last in a frame. + pub fn end_of_packet(&self) -> bool { + (self.get_ext_status() & RX_STATUS_EOP as u64) == RX_STATUS_EOP as u64 + } + + /// The length of the packet in the descriptor's packet buffer. + pub fn length(&self) -> u64 { + self.get_pkt_len() + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns the packet type that was used for the Receive Side Scaling hash function. + pub fn get_rss_type(&self) -> u64 { + self.packet_buffer_address.read().get_bits(0..3) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns the packet type as identified by the hardware. + pub fn get_packet_type(&self) -> u64 { + self.packet_buffer_address.read().get_bits(4..16) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns the size of the packet header in bytes. + pub fn get_hdr_len(&self) -> u64 { + self.packet_buffer_address.read().get_bits(21..30) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns the Receive Side Scaling hash. + pub fn get_rss_hash(&self) -> u64 { + self.packet_buffer_address.read().get_bits(32..63) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Status information indicates whether a descriptor has been used + /// and whether the buffer is the last one for a packet + pub fn get_ext_status(&self) -> u64 { + self.header_buffer_address.read().get_bits(0..19) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns errors reported by hardware for different packet types + pub fn get_ext_error(&self) -> u64 { + self.header_buffer_address.read().get_bits(20..31) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// Returns the number of bytes posted to the packet buffer + pub fn get_pkt_len(&self) -> u64 { + self.header_buffer_address.read().get_bits(32..47) + } + + /// Write Back mode function for the Advanced Receive Descriptor. + /// If the vlan header is stripped from the packet, then the 16 bits of the VLAN tag are posted here + pub fn get_vlan_tag(&self) -> u64 { + self.header_buffer_address.read().get_bits(48..63) + } +} + +/// Advanced Transmit Descriptor used by the `ixgbe` NIC driver. +/// +/// # Two usage modes +/// It has 2 modes: Read and Write Back, both of which use the whole 128 bits. +/// There is one transmit descriptor per transmit buffer; it can be converted between these 2 modes. +/// +/// Read contains the addresses that the driver writes. +/// Write Back contains information the hardware writes on receiving a packet. +/// +/// More information can be found in the 82599 datasheet. +#[repr(C)] +pub(crate) struct AdvancedTxDescriptor { + /// Starting physical address of the receive buffer for the packet. + pub packet_buffer_address: Volatile, + /// Length of data buffer + pub data_len: Volatile, + /// A multi-part field: + /// * `dtyp`: Descriptor Type, occupies bits `[7:4]`, + /// * `mac`: options to apply LinkSec and time stamp, occupies bits `[3:2]`. + pub dtyp_mac_rsv: Volatile, + /// Command bits + pub dcmd: Volatile, + /// A multi-part field: + /// * `paylen`: the size in bytes of the data buffer in host memory. + /// not including the fields that the hardware adds), occupies bits `[31:14]`. + /// * `popts`: options to offload checksum calculation, occupies bits `[13:8]`. + /// * `sta`: status of the descriptor (whether it's in use or not), occupies bits `[3:0]`. + pub paylen_popts_cc_idx_sta: Volatile, +} + +impl AdvancedTxDescriptor { + /// Initializes a transmit descriptor by clearing all of its values. + pub fn init(&mut self) { + self.packet_buffer_address.write(0); + self.paylen_popts_cc_idx_sta.write(0); + self.dcmd.write(0); + self.dtyp_mac_rsv.write(0); + self.data_len.write(0); + } + + /// Updates the transmit descriptor to send the packet. + /// We assume that one transmit descriptor will be used to send one packet. + /// + /// # Arguments + /// * `transmit_buffer_addr`: physical address of the transmit buffer. + /// * `transmit_buffer_length`: length of packet we want to send. + pub fn send(&mut self, transmit_buffer_addr: u64, transmit_buffer_length: u16) { + self.packet_buffer_address.write(transmit_buffer_addr); + self.data_len.write(transmit_buffer_length); + self.dtyp_mac_rsv.write(TX_DTYP_ADV); + self.paylen_popts_cc_idx_sta + .write((transmit_buffer_length as u32) << TX_PAYLEN_SHIFT); + self.dcmd + .write(TX_CMD_DEXT | TX_CMD_RS | TX_CMD_IFCS | TX_CMD_EOP); + } + + /// Polls the Descriptor Done bit until the packet has been sent. + #[allow(clippy::while_immutable_condition)] + pub fn wait_for_packet_tx(&self) { + while (self.paylen_popts_cc_idx_sta.read() as u8 & TX_STATUS_DD) == 0 {} + } +} diff --git a/igb_driver/src/hal.rs b/igb_driver/src/hal.rs new file mode 100644 index 0000000000..9836240aaf --- /dev/null +++ b/igb_driver/src/hal.rs @@ -0,0 +1,66 @@ +use crate::memory::PhysAddr; +use core::ptr::NonNull; +use core::time::Duration; + +/// The interface which a particular hardware implementation must implement. +/// +/// # Safety +/// +/// Implementations of this trait must follow the "implementation safety" requirements documented +/// for each method. Callers must follow the safety requirements documented for the unsafe methods. +pub unsafe trait IgbHal +where + Self: Sized, +{ + /// Allocates and zeroes the given number of contiguous physical memory of DMA memory for Ixgbe NIC + /// use. + /// + /// Returns both the physical address which the device can use to access the memory, and a + /// pointer to the start of it which the driver can use to access it. + /// + /// # Implementation safety + /// + /// Implementations of this method must ensure that the `NonNull` returned is a + /// [_valid_](https://doc.rust-lang.org/std/ptr/index.html#safety) pointer, aligned to + /// 2, and won't alias any other allocations or references in the program until it + /// is deallocated by `dma_dealloc`. The pages must be zeroed. + fn dma_alloc(size: usize) -> (PhysAddr, NonNull); + + /// Deallocates the given contiguous physical DMA memory pages. + /// + /// # Safety + /// + /// The memory must have been allocated by `dma_alloc` on the same `Hal` implementation, and not + /// yet deallocated. `size` must be the same number passed to `dma_alloc` originally, and both + /// `paddr` and `vaddr` must be the values returned by `dma_alloc`. + unsafe fn dma_dealloc(paddr: PhysAddr, vaddr: NonNull, size: usize) -> i32; + + /// Converts a physical address used for MMIO to a virtual address which the driver can access. + /// + /// This is only used for MMIO addresses within BARs read from the device, for the PCI + /// transport. It may check that the address range up to the given size is within the region + /// expected for MMIO. + /// + /// # Implementation safety + /// + /// Implementations of this method must ensure that the `NonNull` returned is a + /// [_valid_](https://doc.rust-lang.org/std/ptr/index.html#safety) pointer, and won't alias any + /// other allocations or references in the program. + /// + /// # Safety + /// + /// The `paddr` and `size` must describe a valid MMIO region. The implementation may validate it + /// in some way (and panic if it is invalid) but is not guaranteed to. + unsafe fn mmio_phys_to_virt(paddr: PhysAddr, size: usize) -> NonNull; + + /// Converts a virtual address used by the driver to access MMIO to a physical address which the + /// device can use. + /// + /// # Safety + /// + /// + unsafe fn mmio_virt_to_phys(vaddr: NonNull, size: usize) -> PhysAddr; + + /// Wait until reaching the given deadline. + fn wait_until(duration: Duration) -> Result<(), &'static str>; +} diff --git a/igb_driver/src/igb.rs b/igb_driver/src/igb.rs new file mode 100644 index 0000000000..bd22828cda --- /dev/null +++ b/igb_driver/src/igb.rs @@ -0,0 +1,946 @@ +use crate::descriptor::{AdvancedRxDescriptor, AdvancedTxDescriptor, RX_STATUS_DD, RX_STATUS_EOP}; +use crate::interrupts::Interrupts; +use crate::memory::{alloc_pkt, Dma, MemPool, Packet, PACKET_HEADROOM}; +use crate::NicDevice; +use crate::{constants::*, hal::IgbHal}; +use crate::{IgbError, IgbResult}; +use alloc::boxed::Box; +use alloc::sync::Arc; +use alloc::{collections::VecDeque, vec::Vec}; +use core::marker::PhantomData; +use core::ptr::NonNull; +use core::time::Duration; +use core::{mem, ptr}; +use smoltcp::wire::{EthernetFrame, PrettyPrinter}; + +const DRIVER_NAME: &str = "igb"; + +const MAX_QUEUES: u16 = 64; + +const PKT_BUF_ENTRY_SIZE: usize = 2048; +const MIN_MEMPOOL_SIZE: usize = 4096; + +// const NUM_RX_QUEUE_ENTRIES: usize = 1024; +// const NUM_TX_QUEUE_ENTRIES: usize = 1024; +const TX_CLEAN_BATCH: usize = 1; + +fn wrap_ring(index: usize, ring_size: usize) -> usize { + (index + 1) & (ring_size - 1) +} + +/// Ixgbe device. +pub struct IgbDevice { + addr: *mut u8, + len: usize, + num_rx_queues: u16, + num_tx_queues: u16, + rx_queues: Vec, + tx_queues: Vec, + interrupts: Interrupts, + _marker: PhantomData, +} + +struct IgbRxQueue { + descriptors: Box<[NonNull]>, + num_descriptors: usize, + pool: Arc, + bufs_in_use: Vec, + rx_index: usize, +} + +impl IgbRxQueue { + fn can_recv(&self) -> bool { + let rx_index = self.rx_index; + + let desc = unsafe { self.descriptors[rx_index].as_ref() }; + let status = desc.get_ext_status() as u8; + status & RX_STATUS_DD != 0 + } +} + +struct IgbTxQueue { + descriptors: Box<[NonNull]>, + num_descriptors: usize, + pool: Option>, + bufs_in_use: VecDeque, + clean_index: usize, + tx_index: usize, +} + +impl IgbTxQueue { + fn can_send(&self) -> bool { + let next_tx_index = wrap_ring(self.tx_index, self.num_descriptors); + next_tx_index != self.clean_index + } +} + +/// A packet buffer for ixgbe. +pub struct IgbNetBuf { + packet: Packet, +} + +impl IgbNetBuf { + /// Allocate a packet based on [`MemPool`]. + pub fn alloc(pool: &Arc, size: usize) -> IgbResult { + if let Some(pkt) = alloc_pkt(pool, size) { + Ok(Self { packet: pkt }) + } else { + Err(IgbError::NoMemory) + } + } + + /// Returns an unmutuable packet buffer. + pub fn packet(&self) -> &[u8] { + self.packet.as_bytes() + } + + /// Returns a mutuable packet buffer. + pub fn packet_mut(&mut self) -> &mut [u8] { + self.packet.as_mut_bytes() + } + + /// Returns the length of the packet. + pub fn packet_len(&self) -> usize { + self.packet.len + } + + /// Returns the entry of the packet. + pub fn pool_entry(&self) -> usize { + self.packet.pool_entry + } + + /// Construct a [`IxgbeNetBuf`] from specified pool entry and pool. + pub fn construct(pool_entry: usize, pool: &Arc, len: usize) -> IgbResult { + let pkt = unsafe { + Packet::new( + pool.get_virt_addr(pool_entry).add(PACKET_HEADROOM), + pool.get_phys_addr(pool_entry) + PACKET_HEADROOM, + len, + Arc::clone(pool), + pool_entry, + ) + }; + Ok(Self { packet: pkt }) + } +} + +impl NicDevice for IgbDevice { + fn get_driver_name(&self) -> &str { + DRIVER_NAME + } + + /// Returns the link speed of this device. + fn get_link_speed(&self) -> u16 { + let speed = self.get_reg32(IGB_STATUS); + if (speed & IGB_LINKS_UP) == 0 { + return 0; + } + match speed & IGB_LINKS_SPEED_82576 { + IGB_LINKS_SPEED_10_82576 => 10, + IGB_LINKS_SPEED_100_82576 => 100, + IGB_LINKS_SPEED_1000_82576 => 1000, + _ => 1000, + } + } + + /// Returns the mac address of this device. + fn get_mac_addr(&self) -> [u8; 6] { + let low = self.get_reg32(IGB_RAL(0)); + let high = self.get_reg32(IGB_RAH(0)); + + [ + (low & 0xff) as u8, + (low >> 8 & 0xff) as u8, + (low >> 16 & 0xff) as u8, + (low >> 24) as u8, + (high & 0xff) as u8, + (high >> 8 & 0xff) as u8, + ] + } + + /// Resets the stats of this device. + fn reset_stats(&mut self) { + self.get_reg32(IGB_GPRC); + self.get_reg32(IGB_GPTC); + self.get_reg32(IGB_GORCL); + self.get_reg32(IGB_GORCH); + self.get_reg32(IGB_GOTCL); + self.get_reg32(IGB_GOTCH); + } + + fn recycle_tx_buffers(&mut self, queue_id: u16) -> IgbResult { + let queue = self + .tx_queues + .get_mut(queue_id as usize) + .ok_or(IgbError::InvalidQueue)?; + + let mut clean_index = queue.clean_index; + let cur_index = queue.tx_index; + // info!("s0:{}", IXGBE_ADVTXD_STAT_DD); + loop { + let mut cleanable = cur_index as i32 - clean_index as i32; + + if cleanable < 0 { + cleanable += queue.num_descriptors as i32; + } + + if cleanable < TX_CLEAN_BATCH as i32 { + break; + } + + let mut cleanup_to = clean_index + TX_CLEAN_BATCH - 1; + + if cleanup_to >= queue.num_descriptors { + cleanup_to -= queue.num_descriptors; + } + + let status = unsafe { + let descs = queue.descriptors[cleanup_to].as_mut(); + descs.paylen_popts_cc_idx_sta.read() + }; + //szy: DD=1 means DMA composed + // info!("s:{}", IXGBE_ADVTXD_STAT_DD); + if (status & IGB_ADVTXD_STAT_DD) != 0 { + if let Some(ref pool) = queue.pool { + if TX_CLEAN_BATCH >= queue.bufs_in_use.len() { + pool.free_stack + .borrow_mut() + .extend(queue.bufs_in_use.drain(..)) + } else { + pool.free_stack + .borrow_mut() + .extend(queue.bufs_in_use.drain(..TX_CLEAN_BATCH)) + } + } + + clean_index = wrap_ring(cleanup_to, queue.num_descriptors); + } else { + break; + } + } + + queue.clean_index = clean_index; + + Ok(()) + } + + fn receive_packets( + &mut self, + queue_id: u16, + packet_nums: usize, + mut f: F, + ) -> IgbResult + where + F: FnMut(IgbNetBuf), + { + let mut recv_nums = 0; + let queue = self + .rx_queues + .get_mut(queue_id as usize) + .ok_or(IgbError::InvalidQueue)?; + + // Can't receive, return [`IxgbeError::NotReady`] + if !queue.can_recv() { + return Err(IgbError::NotReady); + } + + let mut rx_index = queue.rx_index; + let mut last_rx_index = queue.rx_index; + + for _ in 0..packet_nums { + let desc = unsafe { queue.descriptors[rx_index].as_mut() }; + let status = desc.get_ext_status() as u8; + + if (status & RX_STATUS_DD) == 0 { + break; + } + + if (status & RX_STATUS_EOP) == 0 { + panic!("Increase buffer size or decrease MTU") + } + + let pool = &queue.pool; + + if let Some(buf) = pool.alloc_buf() { + let idx = mem::replace(&mut queue.bufs_in_use[rx_index], buf); + + let packet = unsafe { + Packet::new( + pool.get_virt_addr(idx), + pool.get_phys_addr(idx), + desc.length() as usize, + pool.clone(), + idx, + ) + }; + // Prefetch cache line for next packet. + #[cfg(target_arch = "x86_64")] + packet.prefrtch(crate::memory::Prefetch::Time0); + + let rx_buf = IgbNetBuf { packet }; + + // Call closure to avoid too many dynamic memory allocations, handle + // by caller. + f(rx_buf); + recv_nums += 1; + + desc.set_packet_address(pool.get_phys_addr(queue.bufs_in_use[rx_index]) as u64); + desc.reset_status(); + + last_rx_index = rx_index; + rx_index = wrap_ring(rx_index, queue.num_descriptors); + } else { + error!("Ixgbe alloc buffer failed: No Memory!"); + break; + } + } + + if rx_index != last_rx_index { + self.set_reg32(IGB_RDT(u32::from(queue_id)), last_rx_index as u32); + self.rx_queues[queue_id as usize].rx_index = rx_index; + } + + Ok(recv_nums) + } + + /// Sends a [`TxBuffer`] to the network. If currently queue is full, returns an + /// error with type [`IxgbeError::QueueFull`]. + fn send(&mut self, queue_id: u16, tx_buf: IgbNetBuf) -> IgbResult { + let queue = self + .tx_queues + .get_mut(queue_id as usize) + .ok_or(IgbError::InvalidQueue)?; + + if !queue.can_send() { + warn!("Queue {} is full", queue_id); + return Err(IgbError::QueueFull); + } + + let cur_index = queue.tx_index; + + let packet = tx_buf.packet; + + trace!( + "[ixgbe-driver] SEND PACKET: {}", + PrettyPrinter::>::new("", &packet.as_bytes()) + ); + + if queue.pool.is_some() { + if !Arc::ptr_eq(queue.pool.as_ref().unwrap(), &packet.pool) { + queue.pool = Some(packet.pool.clone()); + } + } else { + queue.pool = Some(packet.pool.clone()); + } + + assert!( + Arc::ptr_eq(queue.pool.as_ref().unwrap(), &packet.pool), + "Distince memory pools for a single tx queue are not supported yet." + ); + + queue.tx_index = wrap_ring(queue.tx_index, queue.num_descriptors); + + trace!( + "TX phys_addr: {:#x}, virt_addr: {:#x}", + packet.get_phys_addr() as u64, + packet.get_virt_addr() as u64 + ); + + // update descriptor + let desc = unsafe { queue.descriptors[cur_index].as_mut() }; + desc.send(packet.get_phys_addr() as u64, packet.len() as u16); + + trace!( + "packet phys addr: {:#x}, len: {}", + packet.get_phys_addr(), + packet.len() + ); + + queue.bufs_in_use.push_back(packet.pool_entry); + mem::forget(packet); + + self.set_reg32( + IGB_TDT(u32::from(queue_id)), + self.tx_queues[queue_id as usize].tx_index as u32, + ); + + debug!("[Igb::send] SEND PACKET COMPLETE"); + Ok(()) + } + + /// Whether can receiver packet. + fn can_receive(&self, queue_id: u16) -> IgbResult { + let queue = self + .rx_queues + .get(queue_id as usize) + .ok_or(IgbError::InvalidQueue)?; + Ok(queue.can_recv()) + } + + /// Whether can send packet. + fn can_send(&self, queue_id: u16) -> IgbResult { + let queue = self + .tx_queues + .get(queue_id as usize) + .ok_or(IgbError::InvalidQueue)?; + Ok(queue.can_send()) + } +} + +impl IgbDevice { + /// Returns an initialized `IxgbeDevice` on success. + /// + /// # Panics + /// Panics if `num_rx_queues` or `num_tx_queues` exceeds `MAX_QUEUES`. + pub fn init( + base: usize, + len: usize, + num_rx_queues: u16, + num_tx_queues: u16, + pool: &Arc, + ) -> IgbResult { + info!( + "Initializing igb device@base: {:#x}, len: {:#x}, num_rx_queues: {}, num_tx_queues: {}", + base, len, num_rx_queues, num_tx_queues + ); + // initialize RX and TX queue + let rx_queues = Vec::with_capacity(num_rx_queues as usize); + let tx_queues = Vec::with_capacity(num_tx_queues as usize); + + // let mut interrupts = Interrupts::default(); + // #[cfg(feature = "irq")] + // { + // interrupts.interrupts_enabled = true; + // interrupts.itr_rate = 0x028; + // } + + let interrupts = Interrupts::default(); + let mut dev = IgbDevice { + addr: base as *mut u8, + len, + num_rx_queues, + num_tx_queues, + rx_queues, + tx_queues, + interrupts, + _marker: PhantomData, + }; + + #[cfg(feature = "irq")] + { + for queue_id in 0..num_rx_queues { + dev.enable_msix_interrupt(queue_id); + } + } + + dev.reset_and_init(pool)?; + Ok(dev) + } + + /// Returns the number of receive queues. + pub fn num_rx_queues(&self) -> u16 { + self.num_rx_queues + } + + /// Returns the number of transmit queues. + pub fn num_tx_queues(&self) -> u16 { + self.num_tx_queues + } + + #[cfg(feature = "irq")] + /// Enable MSI interrupt for queue with `queue_id`. + pub fn enable_msi_interrupt(&self, queue_id: u16) { + // Step 1: The software driver associates between Tx and Rx interrupt causes and the EICR + // register by setting the IVAR[n] registers. + self.set_ivar(0, queue_id, 0); + + // Step 2: Program SRRCTL[n].RDMTS (per receive queue) if software uses the receive + // descriptor minimum threshold interrupt + // We don't use the minimum threshold interrupt + + // Step 3: All interrupts should be set to 0b (no auto clear in the EIAC register). Following an + // interrupt, software might read the EICR register to check for the interrupt causes. + self.set_reg32(IXGBE_EIAC, 0x0000_0000); + + // Step 4: Set the auto mask in the EIAM register according to the preferred mode of operation. + // In our case we prefer to not auto-mask the interrupts + + // Step 5: Set the interrupt throttling in EITR[n] and GPIE according to the preferred mode of operation. + self.set_reg32(IXGBE_EITR(u32::from(queue_id)), self.interrupts.itr_rate); + + // Step 6: Software clears EICR by writing all ones to clear old interrupt causes + self.clear_interrupts(); + + // Step 7: Software enables the required interrupt causes by setting the EIMS register + let mut mask: u32 = self.get_reg32(IXGBE_EIMS); + mask |= 1 << queue_id; + self.set_reg32(IXGBE_EIMS, mask); + debug!("Using MSI interrupts"); + } + + #[cfg(feature = "irq")] + /// Enable MSI-X interrupt for queue with `queue_id`. + pub fn enable_msix_interrupt(&self, queue_id: u16) { + // Step 1: The software driver associates between interrupt causes and MSI-X vectors and the + // throttling timers EITR[n] by programming the IVAR[n] and IVAR_MISC registers. + let mut gpie: u32 = self.get_reg32(IXGBE_GPIE); + gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_EIAME; + self.set_reg32(IXGBE_GPIE, gpie); + + // Set IVAR reg to enable interrupst for different queues. + self.set_ivar(0, queue_id, u32::from(queue_id)); + + // Step 2: Program SRRCTL[n].RDMTS (per receive queue) if software uses the receive + // descriptor minimum threshold interrupt + // We don't use the minimum threshold interrupt + + // Step 3: The EIAC[n] registers should be set to auto clear for transmit and receive interrupt + // causes (for best performance). The EIAC bits that control the other and TCP timer + // interrupt causes should be set to 0b (no auto clear). + self.set_reg32(IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE); + + // Step 4: Set the auto mask in the EIAM register according to the preferred mode of operation. + // In our case we prefer to not auto-mask the interrupts + + // Step 5: Set the interrupt throttling in EITR[n] and GPIE according to the preferred mode of operation. + // 0x000 (0us) => ... INT/s + // 0x008 (2us) => 488200 INT/s + // 0x010 (4us) => 244000 INT/s + // 0x028 (10us) => 97600 INT/s + // 0x0C8 (50us) => 20000 INT/s + // 0x190 (100us) => 9766 INT/s + // 0x320 (200us) => 4880 INT/s + // 0x4B0 (300us) => 3255 INT/s + // 0x640 (400us) => 2441 INT/s + // 0x7D0 (500us) => 2000 INT/s + // 0x960 (600us) => 1630 INT/s + // 0xAF0 (700us) => 1400 INT/s + // 0xC80 (800us) => 1220 INT/s + // 0xE10 (900us) => 1080 INT/s + // 0xFA7 (1000us) => 980 INT/s + // 0xFFF (1024us) => 950 INT/s + self.set_reg32(IXGBE_EITR(u32::from(queue_id)), self.interrupts.itr_rate); + + // Step 6: Software enables the required interrupt causes by setting the EIMS register + let mut mask: u32 = self.get_reg32(IXGBE_EIMS); + mask |= 1 << queue_id; + self.set_reg32(IXGBE_EIMS, mask); + debug!("Using MSIX interrupts"); + } +} + +// Private methods implementation +impl IgbDevice { + /// Resets and initializes the device. + fn reset_and_init(&mut self, pool: &Arc) -> IgbResult { + debug!("resetting device igb device"); + self.disable_interrupts(); + + self.set_flags32(IGB_CTRL, IGB_CTRL_RST); + self.wait_clear_reg32(IGB_CTRL, IGB_CTRL_RST); + self.disable_interrupts(); + + let mac = self.get_mac_addr(); + info!( + "mac address: {:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + self.set_flags32(IGB_CTRL, IGB_CTRL_SLU); + self.set_phy_flags32(IGB_PHY_CTRL, IGB_PHY_AUTONE | IGB_PHY_RESTART | self.get_phy_flags32(0) as u32); + let _ = H::wait_until(Duration::from_millis(1000)); + debug!("phy status:{:x}, ctl:{:x}, {:x}", self.get_phy_flags32(1), self.get_phy_flags32(0), self.get_phy_flags32(4)); + + // reset-on-read registers, just read them once + self.reset_stats(); + + self.init_rx(pool)?; + + self.init_tx()?; + + for i in 0..self.num_rx_queues { + self.start_rx_queue(i)?; + } + + for i in 0..self.num_tx_queues { + self.start_tx_queue(i)?; + } + // enable promisc mode by default to make testing easier + // self.set_promisc(true); + debug!("CTRL:{:x}, STATUS:{:x}", self.get_reg32(IGB_CTRL), self.get_reg32(IGB_STATUS)); + // wait some time for the link to come up + self.set_flags32(IGB_CTRL, 1 << 28); + + info!("Success to initialize and reset Intel IGB NIC regs."); + + Ok(()) + } + + // sections 4.5.9 + /// Initializes the rx queues of this device. + #[allow(clippy::needless_range_loop)] + fn init_rx(&mut self, pool: &Arc) -> IgbResult { + // disable rx while re-configuring it + // self.clear_flags32(IGB_RCTL, IGB_RCTL_EN); + + // configure queues, same for all queues + for i in 0..self.num_rx_queues { + info!("initializing rx queue {}", i); + // enable advanced rx descriptors + self.set_reg32( + IGB_SRRCTL(u32::from(i)), + (self.get_reg32(IGB_SRRCTL(u32::from(i))) & !IGB_SRRCTL_DESCTYPE_MASK) + | IGB_SRRCTL_DESCTYPE_ADV_ONEBUF, + ); + // let nic drop packets if no rx descriptor is available instead of buffering them + self.set_flags32(IGB_SRRCTL(u32::from(i)), IGB_SRRCTL_DROP_EN); + + assert_eq!(mem::size_of::(), 16); + // section 7.1.9 - setup descriptor ring + let ring_size_bytes = QS * mem::size_of::(); + let dma: Dma = Dma::allocate(ring_size_bytes, true)?; + + // initialize to 0xff to prevent rogue memory accesses on premature dma activation + let mut descriptors: [NonNull; QS] = [NonNull::dangling(); QS]; + + unsafe { + for desc_id in 0..QS { + descriptors[desc_id] = NonNull::new(dma.virt.add(desc_id)).unwrap(); + descriptors[desc_id].as_mut().init(); + } + } + + self.set_reg32( + IGB_RDBAL(u32::from(i)), + (dma.phys as u64 & 0xffff_ffff) as u32, + ); + self.set_reg32(IGB_RDBAH(u32::from(i)), (dma.phys as u64 >> 32) as u32); + self.set_reg32(IGB_RDLEN(u32::from(i)), ring_size_bytes as u32); + + info!("rx ring {} phys addr: {:#x}", i, dma.phys); + info!("rx ring {} virt addr: {:p}", i, dma.virt); + + // set ring to empty at start + self.set_reg32(IGB_RDH(u32::from(i)), 0); + self.set_reg32(IGB_RDT(u32::from(i)), 0); + + let rx_queue = IgbRxQueue { + descriptors: Box::new(descriptors), + pool: Arc::clone(pool), + num_descriptors: QS, + rx_index: 0, + bufs_in_use: Vec::with_capacity(QS), + }; + + self.rx_queues.push(rx_queue); + } + + // last sentence of section 4.6.7 - set some magic bits + // self.set_flags32(IGB_CTRL_EXT, IGB_CTRL_EXT_NS_DIS); + + // probably a broken feature, this flag is initialized with 1 but has to be set to 0 + // for i in 0..self.num_rx_queues { + // self.clear_flags32(IGB_DCA_RXCTRL(u32::from(i)), 1 << 12); + // } + + // start rx + // self.set_flags32(IGB_RCTL, IGB_RCTL_EN); + + Ok(()) + } + + // section 4.6.8 + /// Initializes the tx queues of this device. + #[allow(clippy::needless_range_loop)] + fn init_tx(&mut self) -> IgbResult { + + //default buffer size allocations + self.set_reg32(IGB_TXPBSIZE, IGB_TXPBSIZE_40KB); + + // configure queues + for i in 0..self.num_tx_queues { + info!("initializing tx queue {}", i); + // section 7.1.9 - setup descriptor ring + assert_eq!(mem::size_of::(), 16); + let ring_size_bytes = QS * mem::size_of::(); + + let dma: Dma = Dma::allocate(ring_size_bytes, true)?; + + let mut descriptors: [NonNull; QS] = [NonNull::dangling(); QS]; + + unsafe { + for desc_id in 0..QS { + descriptors[desc_id] = NonNull::new(dma.virt.add(desc_id)).unwrap(); + descriptors[desc_id].as_mut().init(); + } + } + + self.set_reg32( + IGB_TDBAL(u32::from(i)), + (dma.phys as u64 & 0xffff_ffff) as u32, + ); + self.set_reg32(IGB_TDBAH(u32::from(i)), (dma.phys as u64 >> 32) as u32); + self.set_reg32(IGB_TDLEN(u32::from(i)), ring_size_bytes as u32); + + trace!("tx ring {} phys addr: {:#x}", i, dma.phys); + trace!("tx ring {} virt addr: {:p}", i, dma.virt); + + self.set_reg32(IGB_TXDCTL(u32::from(i)), 0); + self.set_flags32(IGB_TXDCTL(u32::from(i)), IGB_TXDCTL_WTHRESH); + // self.set_flags32(IGB_TXDCTL(u32::from(i)), IGB_TXDCTL_EN); + + let tx_queue = IgbTxQueue { + descriptors: Box::new(descriptors), + bufs_in_use: VecDeque::with_capacity(QS), + pool: None, + num_descriptors: QS, + clean_index: 0, + tx_index: 0, + }; + + self.tx_queues.push(tx_queue); + } + + // final step: enable + // self.set_flags32(IGB_TCTL, IGB_TCTL_EN); + + Ok(()) + } + + /// Sets the rx queues` descriptors and enables the queues. + fn start_rx_queue(&mut self, queue_id: u16) -> IgbResult { + debug!("starting rx queue {}", queue_id); + + let queue = &mut self.rx_queues[queue_id as usize]; + + if queue.num_descriptors & (queue.num_descriptors - 1) != 0 { + // return Err("number of queue entries must be a power of 2".into()); + return Err(IgbError::QueueNotAligned); + } + + for i in 0..queue.num_descriptors { + let pool = &queue.pool; + + let id = match pool.alloc_buf() { + Some(x) => x, + None => return Err(IgbError::NoMemory), + }; + + unsafe { + let desc = queue.descriptors[i].as_mut(); + desc.set_packet_address(pool.get_phys_addr(id) as u64); + desc.reset_status(); + } + + // we need to remember which descriptor entry belongs to which mempool entry + queue.bufs_in_use.push(id); + } + + let queue = &self.rx_queues[queue_id as usize]; + + // enable queue and wait if necessary + self.set_flags32(IGB_RXDCTL(u32::from(queue_id)), IGB_RXDCTL_ENABLE); + self.wait_set_reg32(IGB_RXDCTL(u32::from(queue_id)), IGB_RXDCTL_ENABLE); + + // rx queue starts out full + self.set_reg32(IGB_RDH(u32::from(queue_id)), 0); + + // was set to 0 before in the init function + self.set_reg32( + IGB_RDT(u32::from(queue_id)), + (queue.num_descriptors - 1) as u32, + ); + // self.set_flags32(IGB_RCTL, 0x8038); + // self.set_flags32(IGB_RCTL, 1 << 3); + // self.set_flags32(IGB_RCTL, 1 << 4); + // self.set_flags32(IGB_RCTL, 1 << 5); + self.set_flags32(IGB_RCTL, 1 << 15); + self.set_flags32(IGB_RCTL, IGB_RCTL_EN); + Ok(()) + } + + /// Enables the tx queues. + fn start_tx_queue(&mut self, queue_id: u16) -> IgbResult { + debug!("starting tx queue {}", queue_id); + + let queue = &mut self.tx_queues[queue_id as usize]; + + if queue.num_descriptors & (queue.num_descriptors - 1) != 0 { + return Err(IgbError::QueueNotAligned); + } + + // tx queue starts out empty + self.set_reg32(IGB_TDH(u32::from(queue_id)), 0); + self.set_reg32(IGB_TDT(u32::from(queue_id)), 0); + + // enable queue and wait if necessary + self.set_flags32(IGB_TXDCTL(u32::from(queue_id)), IGB_TXDCTL_EN); + self.wait_set_reg32(IGB_TXDCTL(u32::from(queue_id)), IGB_TXDCTL_EN); + + self.set_flags32(IGB_TCTL, IGB_TCTL_EN); + + Ok(()) + } + + // see section 4.5.7 + /// Initializes the link of this device. + fn init_link(&self) { + // link auto-configuration register should already be set correctly, we're resetting it anyway + let mut current = self.get_reg32(IGB_CTRL); + current |= IGB_CTRL_SLU; + // info!("current:{:x}", current);s + self.set_reg32(IGB_CTRL, current); + } + + /// Disable all interrupts for all queues. + fn disable_interrupts(&self) { + // Clear interrupt mask to stop from interrupts being generated + self.set_reg32(IGB_EIMS, 0x0000_0000); + self.clear_interrupts(); + } + + /// Disable interrupt for queue with `queue_id`. + fn disable_interrupt(&self, queue_id: u16) { + // Clear interrupt mask to stop from interrupts being generated + let mut mask: u32 = self.get_reg32(IGB_EIMS); + mask &= !(1 << queue_id); + self.set_reg32(IGB_EIMS, mask); + self.clear_interrupt(queue_id); + debug!("Using polling"); + } + + /// Clear interrupt for queue with `queue_id`. + fn clear_interrupt(&self, queue_id: u16) { + // Clear interrupt mask + self.set_reg32(IGB_EIMC, 1 << queue_id); + self.get_reg32(IGB_EICR); + } + + /// Clear all interrupt masks for all queues. + fn clear_interrupts(&self) { + // Clear interrupt mask + self.set_reg32(IGB_EIMC, IGB_IRQ_CLEAR_MASK); + self.get_reg32(IGB_EICR); + } + + /// Waits for the link to come up. + fn wait_for_link(&self) { + #[cfg(target_arch = "x86_64")] + { + info!("waiting for link"); + let _ = H::wait_until(Duration::from_secs(1)); + let mut speed = self.get_link_speed(); + while speed == 0 { + let _ = H::wait_until(Duration::from_millis(100)); + speed = self.get_link_speed(); + } + info!("link speed is {} Mbit/s", self.get_link_speed()); + } + } + + // Enables or disables promisc mode of this device. + fn set_promisc(&self, enabled: bool) { + if enabled { + info!("enabling promisc mode"); + // self.set_flags32(IXGBE_FCTRL, IXGBE_FCTRL_MPE | IXGBE_FCTRL_UPE); + } else { + info!("disabling promisc mode"); + // self.clear_flags32(IXGBE_FCTRL, IXGBE_FCTRL_MPE | IXGBE_FCTRL_UPE); + } + } + + /// Returns the register at `self.addr` + `reg`. + /// + /// # Panics + /// + /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. + fn get_reg32(&self, reg: u32) -> u32 { + assert!(reg as usize <= self.len - 4, "memory access out of bounds"); + + unsafe { ptr::read_volatile((self.addr as usize + reg as usize) as *mut u32) } + } + + /// Sets the register at `self.addr` + `reg` to `value`. + /// + /// # Panics + /// + /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. + fn set_reg32(&self, reg: u32, value: u32) { + assert!(reg as usize <= self.len - 4, "memory access out of bounds"); + + unsafe { + ptr::write_volatile((self.addr as usize + reg as usize) as *mut u32, value); + } + } + + /// Sets the `flags` at `self.addr` + `reg`. + fn set_flags32(&self, reg: u32, flags: u32) { + self.set_reg32(reg, self.get_reg32(reg) | flags); + } + + /// Clears the `flags` at `self.addr` + `reg`. + fn clear_flags32(&self, reg: u32, flags: u32) { + self.set_reg32(reg, self.get_reg32(reg) & !flags); + } + + fn set_phy_flags32(&self, offset: u32, flags: u32) { + self.set_reg32(IGB_MDIC, offset << 16 | 1 << 21 | 1 << 26 | flags); + loop { + if (self.get_reg32(IGB_MDIC) & (1 << 28)) != 0 { + break; + } + } + } + + fn get_phy_flags32(&self, offset: u32) -> u16 { + let mut mdic:u32; + self.set_reg32(IGB_MDIC, offset << 16 | 1 << 21 | 1 << 27); + loop { + mdic = self.get_reg32(IGB_MDIC); + if (mdic & (1 << 28)) != 0 { + break; + } + } + (mdic & 0xffff) as u16 + } + + /// Waits for `self.addr` + `reg` to clear `value`. + fn wait_clear_reg32(&self, reg: u32, value: u32) { + loop { + let current = self.get_reg32(reg); + if (current & value) == 0 { + break; + } + // `thread::sleep(Duration::from_millis(100));` + // let _ = H::wait_ms(100); + let _ = H::wait_until(Duration::from_millis(100)); + } + } + + /// Waits for `self.addr` + `reg` to set `value`. + fn wait_set_reg32(&self, reg: u32, value: u32) { + loop { + let current = self.get_reg32(reg); + if (current & value) == value { + break; + } + let _ = H::wait_until(Duration::from_millis(100)); + } + } + + // Maps interrupt causes to vectors by specifying the `direction` (0 for Rx, 1 for Tx),, + // the `queue` ID and the corresponding `misx_vector`. + // fn set_ivar(&self, direction: u32, queue: u16, mut msix_vector: u32) { + // let mut ivar: u32; + // // let index: u32; + // msix_vector |= IXGBE_IVAR_ALLOC_VAL; + // let index = 16 * (u32::from(queue) & 1) + 8 * direction; + // ivar = self.get_reg32(IXGBE_IVAR(u32::from(queue) >> 1)); + // ivar &= !(0xFF << index); + // ivar |= msix_vector << index; + // self.set_reg32(IXGBE_IVAR(u32::from(queue) >> 1), ivar); + // } +} + +unsafe impl Sync for IgbDevice {} +unsafe impl Send for IgbDevice {} diff --git a/igb_driver/src/interrupts.rs b/igb_driver/src/interrupts.rs new file mode 100644 index 0000000000..dd8b706cb6 --- /dev/null +++ b/igb_driver/src/interrupts.rs @@ -0,0 +1,23 @@ +use alloc::vec::Vec; + +/// The number of msi-x vectors this device can have. +/// It can be set from PCI space, but we took the value from the data sheet. +pub const IXGBE_MAX_MSIX_VECTORS: usize = 64; + +#[derive(Default)] +pub struct Interrupts { + pub interrupts_enabled: bool, // Interrupts for this device enabled? + pub itr_rate: u32, // Interrupt Throttling Rate + pub interrupt_type: u64, // MSI or MSIX + pub timeout_ms: i16, // Interrupt timeout in ms (-1 to disable timeout) + pub queues: Vec, // Interrupt settings per queue +} + +pub struct InterruptsQueue { + pub interrupt_enabled: bool, // Interrupt for this queue enabled? +} + +pub enum InterruptType { + Msi, + Msix, +} diff --git a/igb_driver/src/lib.rs b/igb_driver/src/lib.rs new file mode 100644 index 0000000000..eb5fd046af --- /dev/null +++ b/igb_driver/src/lib.rs @@ -0,0 +1,107 @@ +//! Intel IGB NIC Driver Implementation. + +#![no_std] +#![deny(warnings)] +#![deny(missing_docs)] +#![allow(dead_code)] + + +mod constants; +mod descriptor; +mod hal; +mod interrupts; +mod igb; +mod memory; + +// mod net_buf; +pub mod net_igb; + +extern crate alloc; +#[macro_use] +extern crate log; + +pub use hal::IgbHal; +pub use igb::{IgbDevice, IgbNetBuf}; + +pub use memory::{alloc_pkt, MemPool, PhysAddr}; + +/// Vendor ID for Intel.... +pub const INTEL_VEND: u16 = 0x8086; + +/// Device ID for the 82576, used to identify the device from the PCI space. +pub const INTEL_82576: u16 = 0x10C9; + +#[derive(Debug)] +/// Error type for Ixgbe functions. +pub enum IgbError { + /// Queue size is not aligned. + QueueNotAligned, + /// Threr are not enough descriptors available in the queue, try again later. + QueueFull, + /// No memory + NoMemory, + /// Allocated page not aligned. + PageNotAligned, + /// The device is not ready. + NotReady, + /// Invalid `queue_id`. + InvalidQueue, +} + +/// Result type for Ixgbe functions. +pub type IgbResult = Result; + +/// Used for implementing an ixy device driver like ixgbe or virtio. +pub trait NicDevice { + /// Returns the driver's name. + fn get_driver_name(&self) -> &str; + + /// Returns the layer 2 address of this device. + fn get_mac_addr(&self) -> [u8; 6]; + + /// Resets the network card's stats registers. + fn reset_stats(&mut self); + + /// Returns the network card's link speed. + fn get_link_speed(&self) -> u16; + + /// Pool the transmit queue for sent packets and free their buffers. + fn recycle_tx_buffers(&mut self, queue_id: u16) -> IgbResult; + + /// Receives `packet_nums` [`RxBuffer`] from network. If currently no data, returns an error + /// with type [`IxgbeError::NotReady`], else returns the number of received packets. clourse `f` will + /// be called for avoiding too many dynamic memory allocations. + fn receive_packets(&mut self, queue_id: u16, packet_nums: usize, f: F) -> IgbResult + where + F: FnMut(IgbNetBuf); + + /// Sends a [`TxBuffer`] to network. If currently queue is full, returns an + /// error with type [`IxgbeError::QueueFull`]. + fn send(&mut self, queue_id: u16, tx_buf: IgbNetBuf) -> IgbResult; + + /// Whether can receive packet. + fn can_receive(&self, queue_id: u16) -> IgbResult; + + /// Whether can send packet. + fn can_send(&self, queue_id: u16) -> IgbResult; +} + +/// Holds network card stats about sent and received packets. +#[allow(missing_docs)] +#[derive(Default, Copy, Clone)] +pub struct DeviceStats { + pub rx_pkts: u64, + pub tx_pkts: u64, + pub rx_bytes: u64, + pub tx_bytes: u64, +} + +impl core::fmt::Display for DeviceStats { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "rx_pkts: {}, tx_pkts: {}, rx_bytes: {}, tx_bytes: {}", + self.rx_pkts, self.tx_pkts, self.rx_bytes, self.tx_bytes + ) + } +} diff --git a/igb_driver/src/memory.rs b/igb_driver/src/memory.rs new file mode 100644 index 0000000000..f2104964f7 --- /dev/null +++ b/igb_driver/src/memory.rs @@ -0,0 +1,295 @@ +use core::fmt::Debug; +use core::ops::{Deref, DerefMut}; +use core::ptr::NonNull; +use core::{cell::RefCell, marker::PhantomData}; + +use crate::hal::IgbHal; +use crate::{IgbError, IgbResult}; +use alloc::sync::Arc; +use alloc::vec::Vec; +use alloc::{fmt, slice}; + +/// Phyaical Address +pub type PhysAddr = usize; +/// Virtual Address +pub type VirtAddr = usize; + +const HUGE_PAGE_BITS: u32 = 21; +const HUGE_PAGE_SIZE: usize = 1 << HUGE_PAGE_BITS; + +// this differs from upstream ixy as our packet metadata is stored outside of the actual packet data +// which results in a different alignment requirement +pub const PACKET_HEADROOM: usize = 32; + +/// a Memory Pool struct to cache and accelerate memory allocation. +pub struct MemPool { + base_addr: *mut u8, + num_entries: usize, + entry_size: usize, + phys_addr: Vec, + pub(crate) free_stack: RefCell>, +} + +impl MemPool { + /// Allocates a new `Mempool`. + /// + /// # Panics + /// + /// Panics if `size` is not a divisor of the page size. + pub fn allocate(entries: usize, size: usize) -> IgbResult> { + let entry_size = match size { + 0 => 2048, + x => x, + }; + + if HUGE_PAGE_SIZE % entry_size != 0 { + error!("entry size must be a divisor of the page size"); + return Err(IgbError::PageNotAligned); + } + + let dma = Dma::::allocate(entries * entry_size, false)?; + let mut phys_addr = Vec::with_capacity(entries); + + for i in 0..entries { + phys_addr.push(unsafe { + H::mmio_virt_to_phys( + NonNull::new(dma.virt.add(i * entry_size)).unwrap(), + entry_size, + ) + }) + } + + let pool = MemPool { + base_addr: dma.virt, + num_entries: entries, + entry_size, + phys_addr, + free_stack: RefCell::new(Vec::with_capacity(entries)), + }; + + let pool = Arc::new(pool); + pool.free_stack.borrow_mut().extend(0..entries); + + Ok(pool) + } + + /// Returns the position of a free buffer in the memory pool, or [`None`] if the pool is empty. + pub(crate) fn alloc_buf(&self) -> Option { + self.free_stack.borrow_mut().pop() + } + + /// Marks a buffer in the memory pool as free. + pub(crate) fn free_buf(&self, id: usize) { + assert!( + id < self.num_entries, + "buffer outside of memory pool, id: {}", + id + ); + + let mut free_stack = self.free_stack.borrow_mut(); + if free_stack.iter().any(|&x| x == id) { + panic!("free buf: buffer already free"); + } + + free_stack.push(id); + } + + /// Return entry size. + pub fn entry_size(&self) -> usize { + self.entry_size + } + + /// Returns the virtual address of a buffer from the memory pool. + pub(crate) fn get_virt_addr(&self, id: usize) -> *mut u8 { + assert!( + id < self.num_entries, + "buffer outside of memory pool, id: {}", + id + ); + + unsafe { self.base_addr.add(id * self.entry_size) } + } + + /// Returns the physical address of a buffer from the memory pool. + pub fn get_phys_addr(&self, id: usize) -> usize { + self.phys_addr[id] + } +} + +pub struct Dma { + pub virt: *mut T, + pub phys: usize, + _marker: PhantomData, +} + +impl Dma { + pub fn allocate(size: usize, _require_contiguous: bool) -> IgbResult> { + // let size = if size % HUGE_PAGE_SIZE != 0 { + // ((size >> HUGE_PAGE_BITS) + 1) << HUGE_PAGE_BITS + // } else { + // size + // }; + // let size = if size < 0x1000 { 0x1000 } else { size }; + // let (pa, va) = H::dma_alloc(size / 0x1000, crate::BufferDirection::Both); + let (pa, va) = H::dma_alloc(size); + info!( + "allocated DMA memory @pa: {:#x}, va: {:#x}, size: {:#x}", + pa, + va.as_ptr() as usize, + size + ); + Ok(Dma:: { + virt: va.as_ptr() as *mut T, + phys: pa, + _marker: PhantomData, + }) + } +} + +pub struct Packet { + pub(crate) addr_virt: NonNull, + pub(crate) addr_phys: usize, + pub(crate) len: usize, + pub(crate) pool: Arc, + pub(crate) pool_entry: usize, +} + +impl Clone for Packet { + fn clone(&self) -> Self { + let mut p = alloc_pkt(&self.pool, self.len).expect("no buffer available"); + p.clone_from_slice(self); + + p + } +} + +impl Deref for Packet { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.addr_virt.as_ptr(), self.len) } + } +} + +impl DerefMut for Packet { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { slice::from_raw_parts_mut(self.addr_virt.as_ptr(), self.len) } + } +} + +impl Debug for Packet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (**self).fmt(f) + } +} + +impl Drop for Packet { + fn drop(&mut self) { + self.pool.free_buf(self.pool_entry); + } +} + +impl Packet { + /// Returns a new `Packet`. + pub(crate) unsafe fn new( + addr_virt: *mut u8, + addr_phys: usize, + len: usize, + pool: Arc, + pool_entry: usize, + ) -> Packet { + Packet { + addr_virt: NonNull::new_unchecked(addr_virt), + addr_phys, + len, + pool, + pool_entry, + } + } + /// Returns the virtual address of the packet. + pub fn get_virt_addr(&self) -> *mut u8 { + self.addr_virt.as_ptr() + } + + /// Returns the physical address of the packet. + pub fn get_phys_addr(&self) -> usize { + self.addr_phys + } + + /// Returns all data in the buffer, not including header. + pub fn as_bytes(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.addr_virt.as_ptr(), self.len) } + } + + /// Returns all data in the buffer with the mutuable reference, + /// not including header. + pub fn as_mut_bytes(&mut self) -> &mut [u8] { + unsafe { slice::from_raw_parts_mut(self.addr_virt.as_ptr(), self.len) } + } + + /// Returns a mutable slice to the headroom of the pakcet. + /// + /// The `len` parameter controls how much of the headroom is returned. + /// + /// # Panics + /// + /// Panics if `len` is greater than [`PACKET_HEADROOM`] + pub fn headroom_mut(&mut self, len: usize) -> &mut [u8] { + assert!(len <= PACKET_HEADROOM); + unsafe { slice::from_raw_parts_mut(self.addr_virt.as_ptr().sub(len), len) } + } + + #[cfg(target_arch = "x86_64")] + #[inline(always)] + pub(crate) fn prefrtch(&self, hint: Prefetch) { + if core_detect::is_x86_feature_detected!("sse") { + let addr = self.get_virt_addr() as *const _; + unsafe { + use core::arch::x86_64; + match hint { + Prefetch::Time0 => x86_64::_mm_prefetch(addr, x86_64::_MM_HINT_T0), + Prefetch::Time1 => x86_64::_mm_prefetch(addr, x86_64::_MM_HINT_T1), + Prefetch::Time2 => x86_64::_mm_prefetch(addr, x86_64::_MM_HINT_T2), + Prefetch::NonTemporal => x86_64::_mm_prefetch(addr, x86_64::_MM_HINT_NTA), + } + } + } + } +} + +/// Returns a free packet from the `pool`, or [`None`] if the requested packet size exceeds the +/// maximum size for that pool or if the pool is empty. +pub fn alloc_pkt(pool: &Arc, size: usize) -> Option { + if size > pool.entry_size - PACKET_HEADROOM { + return None; + } + + pool.alloc_buf().map(|id| unsafe { + Packet::new( + pool.get_virt_addr(id).add(PACKET_HEADROOM), + pool.get_phys_addr(id) + PACKET_HEADROOM, + size, + Arc::clone(pool), + id, + ) + }) +} + +/// Common representation for prefetch strategies. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Prefetch { + /// Corresponds to _MM_HINT_T0 on x86 sse. + Time0, + + /// Corresponds to _MM_HINT_T1 on x86 sse. + Time1, + + /// Corresponds to _MM_HINT_T2 on x86 sse. + Time2, + + /// Corresponds to _MM_HINT_NTA on x86 sse. + NonTemporal, +} + +unsafe impl Sync for MemPool {} +unsafe impl Send for MemPool {} diff --git a/igb_driver/src/net_igb.rs b/igb_driver/src/net_igb.rs new file mode 100644 index 0000000000..5aabc874a7 --- /dev/null +++ b/igb_driver/src/net_igb.rs @@ -0,0 +1,257 @@ +//! Common traits and types for network device (NIC) drivers. + +// #![no_std] +// #![feature(const_mut_refs)] +// #![feature(const_slice_from_raw_parts_mut)] + +use core::convert::From; +use core::{mem::ManuallyDrop, ptr::NonNull}; +use alloc::{collections::VecDeque, sync::Arc}; +use axdriver_base::{BaseDriverOps, DevError, DevResult, DeviceType}; +use crate::{IgbDevice, IgbError, IgbNetBuf, MemPool, NicDevice}; +// pub use crate::{IgbHal, PhysAddr, INTEL_82576, INTEL_VEND}; +pub use crate::IgbHal; + + +// pub use crate::net_buf::{NetBuf, NetBufBox, NetBufPool}; + +/// The ethernet address of the NIC (MAC address). +pub struct EthernetAddress(pub [u8; 6]); + +/// Operations that require a network device (NIC) driver to implement. +pub trait NetDriverOps: BaseDriverOps { + /// The ethernet address of the NIC. + fn mac_address(&self) -> EthernetAddress; + + /// Whether can transmit packets. + fn can_transmit(&self) -> bool; + + /// Whether can receive packets. + fn can_receive(&self) -> bool; + + /// Size of the receive queue. + fn rx_queue_size(&self) -> usize; + + /// Size of the transmit queue. + fn tx_queue_size(&self) -> usize; + + /// Gives back the `rx_buf` to the receive queue for later receiving. + /// + /// `rx_buf` should be the same as the one returned by + /// [`NetDriverOps::receive`]. + fn recycle_rx_buffer(&mut self, rx_buf: NetBufPtr) -> DevResult; + + /// Poll the transmit queue and gives back the buffers for previous transmiting. + /// returns [`DevResult`]. + fn recycle_tx_buffers(&mut self) -> DevResult; + + /// Transmits a packet in the buffer to the network, without blocking, + /// returns [`DevResult`]. + fn transmit(&mut self, tx_buf: NetBufPtr) -> DevResult; + + /// Receives a packet from the network and store it in the [`NetBuf`], + /// returns the buffer. + /// + /// Before receiving, the driver should have already populated some buffers + /// in the receive queue by [`NetDriverOps::recycle_rx_buffer`]. + /// + /// If currently no incomming packets, returns an error with type + /// [`DevError::Again`]. + fn receive(&mut self) -> DevResult; + + /// Allocate a memory buffer of a specified size for network transmission, + /// returns [`DevResult`] + fn alloc_tx_buffer(&mut self, size: usize) -> DevResult; +} + +/// A raw buffer struct for network device. +pub struct NetBufPtr { + // The raw pointer of the original object. + raw_ptr: NonNull, + // The pointer to the net buffer. + buf_ptr: NonNull, + len: usize, +} + +impl NetBufPtr { + /// Create a new [`NetBufPtr`]. + pub fn new(raw_ptr: NonNull, buf_ptr: NonNull, len: usize) -> Self { + Self { + raw_ptr, + buf_ptr, + len, + } + } + + /// Return raw pointer of the original object. + pub fn raw_ptr(&self) -> *mut T { + self.raw_ptr.as_ptr() as *mut T + } + + /// Return [`NetBufPtr`] buffer len. + pub fn packet_len(&self) -> usize { + self.len + } + + /// Return [`NetBufPtr`] buffer as &[u8]. + pub fn packet(&self) -> &[u8] { + unsafe { core::slice::from_raw_parts(self.buf_ptr.as_ptr() as *const u8, self.len) } + } + + /// Return [`NetBufPtr`] buffer as &mut [u8]. + pub fn packet_mut(&mut self) -> &mut [u8] { + unsafe { core::slice::from_raw_parts_mut(self.buf_ptr.as_ptr(), self.len) } + } +} + + +const RECV_BATCH_SIZE: usize = 64; +const RX_BUFFER_SIZE: usize = 1024; +const MEM_POOL: usize = 4096; +const MEM_POOL_ENTRY_SIZE: usize = 2048; + +/// The ixgbe NIC device driver. +/// +/// `QS` is the ixgbe queue size, `QN` is the ixgbe queue num. +pub struct IgbNic { + inner: IgbDevice, + mem_pool: Arc, + rx_buffer_queue: VecDeque, +} + +unsafe impl Sync for IgbNic {} +unsafe impl Send for IgbNic {} + +impl IgbNic { + /// Creates a net ixgbe NIC instance and initialize, or returns a error if + /// any step fails. + pub fn init(base: usize, len: usize) -> DevResult { + let mem_pool = MemPool::allocate::(MEM_POOL, MEM_POOL_ENTRY_SIZE) + .map_err(|_| DevError::NoMemory)?; + let inner = IgbDevice::::init(base, len, QN, QN, &mem_pool).map_err(|err| { + log::error!("Failed to initialize ixgbe device: {:?}", err); + DevError::BadState + })?; + + let rx_buffer_queue = VecDeque::with_capacity(RX_BUFFER_SIZE); + Ok(Self { + inner, + mem_pool, + rx_buffer_queue, + }) + } +} + +impl BaseDriverOps for IgbNic { + fn device_name(&self) -> &str { + self.inner.get_driver_name() + } + + fn device_type(&self) -> DeviceType { + DeviceType::Net + } +} + +impl NetDriverOps for IgbNic { + fn mac_address(&self) -> EthernetAddress { + EthernetAddress(self.inner.get_mac_addr()) + } + + fn rx_queue_size(&self) -> usize { + QS + } + + fn tx_queue_size(&self) -> usize { + QS + } + + fn can_receive(&self) -> bool { + !self.rx_buffer_queue.is_empty() || self.inner.can_receive(0).unwrap() + } + + fn can_transmit(&self) -> bool { + // Default implementation is return true forever. + self.inner.can_send(0).unwrap() + } + + fn recycle_rx_buffer(&mut self, rx_buf: NetBufPtr) -> DevResult { + let rx_buf = igb_ptr_to_buf(rx_buf, &self.mem_pool)?; + drop(rx_buf); + Ok(()) + } + + fn recycle_tx_buffers(&mut self) -> DevResult { + self.inner + .recycle_tx_buffers(0) + .map_err(|_| DevError::BadState)?; + Ok(()) + } + + fn receive(&mut self) -> DevResult { + if !self.can_receive() { + return Err(DevError::Again); + } + if !self.rx_buffer_queue.is_empty() { + // RX buffer have received packets. + Ok(self.rx_buffer_queue.pop_front().unwrap()) + } else { + let f = |rx_buf| { + let rx_buf = NetBufPtr::from(rx_buf); + self.rx_buffer_queue.push_back(rx_buf); + }; + + // RX queue is empty, receive from ixgbe NIC. + match self.inner.receive_packets(0, RECV_BATCH_SIZE, f) { + Ok(recv_nums) => { + if recv_nums == 0 { + // No packet is received, it is impossible things. + panic!("Error: No receive packets.") + } else { + Ok(self.rx_buffer_queue.pop_front().unwrap()) + } + } + Err(e) => match e { + IgbError::NotReady => Err(DevError::Again), + _ => Err(DevError::BadState), + }, + } + } + } + + fn transmit(&mut self, tx_buf: NetBufPtr) -> DevResult { + let tx_buf = igb_ptr_to_buf(tx_buf, &self.mem_pool)?; + match self.inner.send(0, tx_buf) { + Ok(_) => Ok(()), + Err(err) => match err { + IgbError::QueueFull => Err(DevError::Again), + _ => panic!("Unexpected err: {:?}", err), + }, + } + } + + fn alloc_tx_buffer(&mut self, size: usize) -> DevResult { + let tx_buf = IgbNetBuf::alloc(&self.mem_pool, size).map_err(|_| DevError::NoMemory)?; + Ok(NetBufPtr::from(tx_buf)) + } +} + +impl From for NetBufPtr { + fn from(buf: IgbNetBuf) -> Self { + // Use `ManuallyDrop` to avoid drop `tx_buf`. + let mut buf = ManuallyDrop::new(buf); + // In ixgbe, `raw_ptr` is the pool entry, `buf_ptr` is the packet ptr, `len` is packet len + // to avoid too many dynamic memory allocation. + let buf_ptr = buf.packet_mut().as_mut_ptr(); + Self::new( + NonNull::new(buf.pool_entry() as *mut u8).unwrap(), + NonNull::new(buf_ptr).unwrap(), + buf.packet_len(), + ) + } +} + +// Converts a `NetBufPtr` to `IxgbeNetBuf`. +fn igb_ptr_to_buf(ptr: NetBufPtr, pool: &Arc) -> DevResult { + IgbNetBuf::construct(ptr.raw_ptr.as_ptr() as usize, pool, ptr.len) + .map_err(|_| DevError::BadState) +} diff --git a/modules/axdriver/Cargo.toml b/modules/axdriver/Cargo.toml index 117105a672..a0f98151b3 100644 --- a/modules/axdriver/Cargo.toml +++ b/modules/axdriver/Cargo.toml @@ -27,6 +27,7 @@ virtio-gpu = ["display", "virtio", "axdriver_virtio/gpu"] ramdisk = ["block", "axdriver_block/ramdisk"] bcm2835-sdhci = ["block", "axdriver_block/bcm2835-sdhci"] ixgbe = ["net", "axdriver_net/ixgbe", "dep:axalloc", "dep:axhal", "dep:axdma"] +igb = ["net", "dep:axalloc", "dep:axhal", "dep:axdma", "dep:igb_driver"] # more devices example: e1000 = ["net", "axdriver_net/e1000"] default = ["bus-pci"] @@ -43,4 +44,5 @@ axdriver_virtio = { git = "https://github.com/arceos-org/axdriver_crates.git", t axalloc = { workspace = true, optional = true } axhal = { workspace = true, optional = true } axconfig = { workspace = true, optional = true } -axdma = { workspace = true, optional = true } \ No newline at end of file +axdma = { workspace = true, optional = true } +igb_driver = { path = "../../igb_driver", optional = true } \ No newline at end of file diff --git a/modules/axdriver/build.rs b/modules/axdriver/build.rs index 8d5fd0710a..4d259fc013 100644 --- a/modules/axdriver/build.rs +++ b/modules/axdriver/build.rs @@ -1,4 +1,4 @@ -const NET_DEV_FEATURES: &[&str] = &["ixgbe", "virtio-net"]; +const NET_DEV_FEATURES: &[&str] = &["ixgbe", "igb", "virtio-net"]; const BLOCK_DEV_FEATURES: &[&str] = &["ramdisk", "bcm2835-sdhci", "virtio-blk"]; const DISPLAY_DEV_FEATURES: &[&str] = &["virtio-gpu"]; diff --git a/modules/axdriver/src/drivers.rs b/modules/axdriver/src/drivers.rs index 4c9d467d5c..ae5fd958d6 100644 --- a/modules/axdriver/src/drivers.rs +++ b/modules/axdriver/src/drivers.rs @@ -128,3 +128,51 @@ cfg_if::cfg_if! { } } } + +cfg_if::cfg_if! { + if #[cfg(net_dev = "igb")] { + use crate::igb::IgbHalImpl; + use axhal::mem::phys_to_virt; + pub struct IgbDriver; + register_net_driver!(IgbDriver, igb_driver::net_igb::IgbNic); + impl DriverProbe for IgbDriver { + #[cfg(bus = "pci")] + fn probe_pci( + root: &mut axdriver_pci::PciRoot, + bdf: axdriver_pci::DeviceFunction, + dev_info: &axdriver_pci::DeviceFunctionInfo, + ) -> Option { + use igb_driver::{INTEL_82576, INTEL_VEND, net_igb::IgbNic}; + if dev_info.vendor_id == INTEL_VEND && + (dev_info.device_id == 0x1533 || dev_info.device_id == INTEL_82576) { + + info!("igb PCI device found at {:?}", bdf); + // Initialize the device + // These can be changed according to the requirments specified in the ixgbe init function. + const QN: u16 = 1; + const QS: usize = 1024; + let bar_info = root.bar_info(bdf, 0).unwrap(); + match bar_info { + axdriver_pci::BarInfo::Memory { + address, + size, + .. + } => { + let igb_nic = IgbNic::::init( + phys_to_virt((address as usize).into()).into(), + size as usize + ) + .expect("failed to initialize igb device"); + return Some(AxDeviceEnum::from_net(igb_nic)); + } + axdriver_pci::BarInfo::IO { .. } => { + error!("igb: BAR0 is of I/O type"); + return None; + } + } + } + None + } + } + } +} \ No newline at end of file diff --git a/modules/axdriver/src/igb.rs b/modules/axdriver/src/igb.rs new file mode 100644 index 0000000000..54d2cb9c75 --- /dev/null +++ b/modules/axdriver/src/igb.rs @@ -0,0 +1,41 @@ +use axdma::{alloc_coherent, dealloc_coherent, BusAddr, DMAInfo}; +// use igb_driver::net_igb::{IgbHal, PhysAddr as IgbPhysAddr};// + +use igb_driver::{IgbHal, PhysAddr as IgbPhysAddr}; +use axhal::mem::{phys_to_virt, virt_to_phys}; +use core::{alloc::Layout, ptr::NonNull}; + +pub struct IgbHalImpl; + +unsafe impl IgbHal for IgbHalImpl { + fn dma_alloc(size: usize) -> (IgbPhysAddr, NonNull) { + let layout = Layout::from_size_align(size, 8).unwrap(); + match unsafe { alloc_coherent(layout) } { + Ok(dma_info) => (dma_info.bus_addr.as_u64() as usize, dma_info.cpu_addr), + Err(_) => (0, NonNull::dangling()), + } + } + + unsafe fn dma_dealloc(paddr: IgbPhysAddr, vaddr: NonNull, size: usize) -> i32 { + let layout = Layout::from_size_align(size, 8).unwrap(); + let dma_info = DMAInfo { + cpu_addr: vaddr, + bus_addr: BusAddr::from(paddr as u64), + }; + unsafe { dealloc_coherent(dma_info, layout) }; + 0 + } + + unsafe fn mmio_phys_to_virt(paddr: IgbPhysAddr, _size: usize) -> NonNull { + NonNull::new(phys_to_virt(paddr.into()).as_mut_ptr()).unwrap() + } + + unsafe fn mmio_virt_to_phys(vaddr: NonNull, _size: usize) -> IgbPhysAddr { + virt_to_phys((vaddr.as_ptr() as usize).into()).into() + } + + fn wait_until(duration: core::time::Duration) -> Result<(), &'static str> { + axhal::time::busy_wait_until(duration); + Ok(()) + } +} \ No newline at end of file diff --git a/modules/axdriver/src/lib.rs b/modules/axdriver/src/lib.rs index 659d95cdef..5fc3a9e4a6 100644 --- a/modules/axdriver/src/lib.rs +++ b/modules/axdriver/src/lib.rs @@ -78,6 +78,9 @@ mod virtio; #[cfg(feature = "ixgbe")] mod ixgbe; +#[cfg(feature = "igb")] +mod igb; + pub mod prelude; #[allow(unused_imports)] diff --git a/modules/axdriver/src/macros.rs b/modules/axdriver/src/macros.rs index b90e813e70..142a4ff65d 100644 --- a/modules/axdriver/src/macros.rs +++ b/modules/axdriver/src/macros.rs @@ -64,5 +64,10 @@ macro_rules! for_each_drivers { type $drv_type = crate::drivers::IxgbeDriver; $code } + #[cfg(net_dev = "igb")] + { + type $drv_type = crate::drivers::IgbDriver; + $code + } }}; } diff --git a/modules/axdriver/src/prelude.rs b/modules/axdriver/src/prelude.rs index 11e960feeb..f244a98c9d 100644 --- a/modules/axdriver/src/prelude.rs +++ b/modules/axdriver/src/prelude.rs @@ -7,4 +7,9 @@ pub use {crate::structs::AxBlockDevice, axdriver_block::BlockDriverOps}; #[cfg(feature = "display")] pub use {crate::structs::AxDisplayDevice, axdriver_display::DisplayDriverOps}; #[cfg(feature = "net")] -pub use {crate::structs::AxNetDevice, axdriver_net::NetDriverOps}; +#[cfg(not(feature = "igb"))] +pub use {crate::structs::AxNetDevice, axdriver_net::{NetDriverOps, NetBufPtr}}; + +// #[cfg(feature = "net")] +#[cfg(feature = "igb")] +pub use {crate::structs::AxNetDevice, igb_driver::net_igb:: {NetDriverOps, NetBufPtr}}; diff --git a/modules/axnet/src/smoltcp_impl/mod.rs b/modules/axnet/src/smoltcp_impl/mod.rs index fdfa5bed53..7ecc171db3 100644 --- a/modules/axnet/src/smoltcp_impl/mod.rs +++ b/modules/axnet/src/smoltcp_impl/mod.rs @@ -10,7 +10,7 @@ use core::cell::RefCell; use core::ops::DerefMut; use axdriver::prelude::*; -use axdriver_net::{DevError, NetBufPtr}; +// use axdriver_net::{DevError, NetBufPtr}; use axhal::time::{NANOS_PER_MICROS, wall_time_nanos}; use axsync::Mutex; use lazyinit::LazyInit; diff --git a/scripts/make/qemu.mk b/scripts/make/qemu.mk index b7b8db6cb1..72c23cab5d 100644 --- a/scripts/make/qemu.mk +++ b/scripts/make/qemu.mk @@ -44,7 +44,8 @@ qemu_args-$(BLK) += \ -drive id=disk0,if=none,format=raw,file=$(DISK_IMG) qemu_args-$(NET) += \ - -device virtio-net-$(vdev-suffix),netdev=net0 + -device igb,netdev=net0 + #-device virtio-net-$(vdev-suffix),netdev=net0 ifeq ($(NET_DEV), user) qemu_args-$(NET) += -netdev user,id=net0,hostfwd=tcp::5555-:5555,hostfwd=udp::5555-:5555 diff --git a/ulib/axstd/Cargo.toml b/ulib/axstd/Cargo.toml index 4adb9125b0..d3aecdce60 100644 --- a/ulib/axstd/Cargo.toml +++ b/ulib/axstd/Cargo.toml @@ -65,6 +65,7 @@ bus-pci = ["axfeat/bus-pci"] driver-ramdisk = ["axfeat/driver-ramdisk"] driver-ixgbe = ["axfeat/driver-ixgbe"] driver-bcm2835-sdhci = ["axfeat/driver-bcm2835-sdhci"] +driver-igb = ["axfeat/driver-igb"] # Logging log-level-off = ["axfeat/log-level-off"] From 4b50f321128151985ad08fecbc0f4d046d67e508 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 7 Jan 2025 14:31:38 +0800 Subject: [PATCH 2/2] config x86 oslab --- configs/platforms/x86_64-pc-oslab.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configs/platforms/x86_64-pc-oslab.toml b/configs/platforms/x86_64-pc-oslab.toml index dc95b4c090..017ac8d6b3 100644 --- a/configs/platforms/x86_64-pc-oslab.toml +++ b/configs/platforms/x86_64-pc-oslab.toml @@ -38,13 +38,14 @@ mmio-regions = [ [0xfec0_0000, 0x1000], # IO APIC [0xfed0_0000, 0x1000], # HPET [0xfee0_0000, 0x1000], # Local APIC - [0xf000_0000, 0x0800_0000], # PCI config space + [0xf800_0000, 0x0800_0000], # PCI config space [0xfcd8_0000, 0x0008_0000], # Ixgbe BAR0 + [0xdc20_0000, 0x0010_0000], # Igb BAR0 ] # [(uint, uint)] # VirtIO MMIO regions with format (`base_paddr`, `size`). virtio-mmio-regions = [] # [(uint, uint)] # Base physical address of the PCIe ECAM space (should read from ACPI 'MCFG' table). -pci-ecam-base = 0xf000_0000 # uint +pci-ecam-base = 0xf800_0000 # uint # End PCI bus number. pci-bus-end = 0x7f # uint # PCI device memory ranges (not used on x86).