From b013d09683e7ab8b7330d1a14552408fbaa8eca1 Mon Sep 17 00:00:00 2001 From: Han Date: Wed, 29 Nov 2017 15:17:14 -0500 Subject: [PATCH] feat(baremetal NIC): initial push for baremetal NIC Only works on Intel 82599 family based NICs --- src/IOBuf.h | 3 +- src/native/GeneralPurposeAllocator.h | 20 + src/native/Ixgbe.h | 387 +++++ src/native/IxgbeDriver.cc | 2001 ++++++++++++++++++++++++++ src/native/IxgbeDriver.h | 473 ++++++ src/native/Main.cc | 15 + src/native/Msr.h | 11 + src/native/Net.cc | 5 +- src/native/Net.h | 18 +- src/native/NetIcmp.cc | 16 +- src/native/NetIp.cc | 29 +- src/native/NetTcp.cc | 42 +- src/native/NetUdp.cc | 23 +- src/native/Pci.cc | 59 +- src/native/Pci.h | 5 + src/native/VirtioNet.cc | 2 +- src/native/config.cmake | 1 + src/native/config.h.in | 1 + 18 files changed, 3074 insertions(+), 37 deletions(-) create mode 100644 src/native/Ixgbe.h create mode 100644 src/native/IxgbeDriver.cc create mode 100644 src/native/IxgbeDriver.h diff --git a/src/IOBuf.h b/src/IOBuf.h index a405027b..c430f1a3 100644 --- a/src/IOBuf.h +++ b/src/IOBuf.h @@ -7,10 +7,10 @@ #include #include +#include #include #include #include -#include #include @@ -64,6 +64,7 @@ class IOBuf { } void TrimEnd(size_t amount) { length_ -= amount; } + void SetLength(size_t amount) { length_ = amount; } bool IsChained() const { return Next() != this; } diff --git a/src/native/GeneralPurposeAllocator.h b/src/native/GeneralPurposeAllocator.h index 12c91d97..b51869c6 100644 --- a/src/native/GeneralPurposeAllocator.h +++ b/src/native/GeneralPurposeAllocator.h @@ -16,6 +16,26 @@ namespace ebbrt { +// handler used in Pci.cc code to handle faults on multicores when mapping +// device +class MulticorePciFaultHandler : public ebbrt::VMemAllocator::PageFaultHandler { + ebbrt::Pfn vpage_; + ebbrt::Pfn ppage_; + size_t size_; + + public: + void SetMap(ebbrt::Pfn va, ebbrt::Pfn pa, size_t s) { + vpage_ = va; + ppage_ = pa; + size_ = s; + } + + void HandleFault(ebbrt::idt::ExceptionFrame* ef, + uintptr_t faulted_address) override { + ebbrt::vmem::MapMemory(vpage_, ppage_, size_); + } +}; + // page fault handler for mapping in physical pages // to virtual pages on all cores class LargeRegionFaultHandler : public ebbrt::VMemAllocator::PageFaultHandler { diff --git a/src/native/Ixgbe.h b/src/native/Ixgbe.h new file mode 100644 index 00000000..1a966ec1 --- /dev/null +++ b/src/native/Ixgbe.h @@ -0,0 +1,387 @@ +#ifndef BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_H_ +#define BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_H_ + +// from https://github.com/cisco-open-source/ethtool/ixgbe.c + +/* Register Bit Masks */ +#define IXGBE_FCTRL_SBP 0x00000002 +#define IXGBE_FCTRL_MPE 0x00000100 +#define IXGBE_FCTRL_UPE 0x00000200 +#define IXGBE_FCTRL_BAM 0x00000400 +#define IXGBE_FCTRL_PMCF 0x00001000 +#define IXGBE_FCTRL_DPF 0x00002000 +#define IXGBE_FCTRL_RPFCE 0x00004000 +#define IXGBE_FCTRL_RFCE 0x00008000 +#define IXGBE_VLNCTRL_VET 0x0000FFFF +#define IXGBE_VLNCTRL_CFI 0x10000000 +#define IXGBE_VLNCTRL_CFIEN 0x20000000 +#define IXGBE_VLNCTRL_VFE 0x40000000 +#define IXGBE_VLNCTRL_VME 0x80000000 +#define IXGBE_LINKS_UP 0x40000000 +#define IXGBE_LINKS_SPEED 0x20000000 +#define IXGBE_SRRCTL_BSIZEPKT_MASK 0x0000007F +#define IXGBE_HLREG0_TXCRCEN 0x00000001 +#define IXGBE_HLREG0_RXCRCSTRP 0x00000002 +#define IXGBE_HLREG0_JUMBOEN 0x00000004 +#define IXGBE_HLREG0_TXPADEN 0x00000400 +#define IXGBE_HLREG0_LPBK 0x00008000 +#define IXGBE_RMCS_TFCE_802_3X 0x00000008 +#define IXGBE_RMCS_TFCE_PRIORITY 0x00000010 +#define IXGBE_FCCFG_TFCE_802_3X 0x00000008 +#define IXGBE_FCCFG_TFCE_PRIORITY 0x00000010 +#define IXGBE_MFLCN_PMCF 0x00000001 /* Pass MAC Control Frames */ +#define IXGBE_MFLCN_DPF 0x00000002 /* Discard Pause Frame */ +#define IXGBE_MFLCN_RPFCE 0x00000004 /* Receive Priority FC Enable */ +#define IXGBE_MFLCN_RFCE 0x00000008 /* Receive FC Enable */ + +enum l4_type { l4_type_udp = 0, l4_type_tcp, l4_type_sctp, l4_type_rsv }; + +#define ETHHDR_LEN 14 +#define IPHDR_LEN 20 +#define UDPHDR_LEN 8 + +#define RXFLAG_IPCS (1 << 0) +#define RXFLAG_IPCS_VALID (1 << 1) +#define RXFLAG_L4CS (1 << 2) +#define RXFLAG_L4CS_VALID (1 << 3) + +/*********************** + * RX + * Descriptors + **********************/ +// 7.1.5 Legacy Receive Descriptor, Table 7 - 11 +typedef union { + + uint64_t raw[2]; + + struct { + uint64_t buffer_address; + + union { + uint64_t word2_raw; + + struct { + uint64_t length : 16; + uint64_t fragment_checksum : 16; + + // uint64_t status : 8; + uint64_t dd : 1; + uint64_t eop : 1; + uint64_t rsvd1 : 1; + uint64_t vp : 1; + uint64_t udpcs : 1; + uint64_t l4cs : 1; + uint64_t ipcs : 1; + uint64_t pif : 1; + + // uint64_t errors : 8; + uint64_t rxe : 1; + uint64_t rsvd2 : 1; + uint64_t rsvd3 : 1; + uint64_t rsvd4 : 1; + uint64_t rsvd5 : 1; + uint64_t rsvd6 : 1; + uint64_t tcpe : 1; + uint64_t ipe : 1; + + uint64_t vlan_tag : 16; + }; // struct + + }; // union + + } __attribute__((packed)); // struct + +} rdesc_legacy_t; // typedef union + +// 7.1.6.1 Advanced Receive Descriptors Read Format +typedef union { + uint64_t raw[2]; + + struct { + uint64_t packet_buffer; + uint64_t header_buffer; + } __attribute__((packed)); // struct +} rdesc_adv_rf_t; + +// 7.1.6.2 Advanced Receive Descriptors — Write-Back Format +typedef union { + uint64_t raw[2]; + struct { + union { + uint32_t raw32_1; + struct { + uint32_t rss_type : 4; + + // packet type + uint32_t pt_ipv4 : 1; + uint32_t pt_ipv4e : 1; + uint32_t pt_ipv6 : 1; + uint32_t pt_ipv6e : 1; + uint32_t pt_tcp : 1; + uint32_t pt_udp : 1; + uint32_t pt_sctp : 1; + uint32_t pt_nfs : 1; + uint32_t pt_isesp : 1; + uint32_t pt_isah : 1; + uint32_t pt_linksec : 1; + uint32_t pt_l2packet : 1; + uint32_t pt_rsvd : 1; + + uint32_t rsccnt : 4; + uint32_t hdr_len : 10; + uint32_t sph : 1; + }; + }; // union raw32_1 + + union { + uint32_t raw32_2; + uint32_t rss_hash; + uint32_t fragment_checksum; + uint32_t rtt; + uint32_t fcoe_param; + uint32_t flow_directors_filters_id; // may need more, page 317 + }; // union raw32_2 + + union { + uint32_t raw32_3; + + struct { + // extended status + uint32_t dd : 1; + uint32_t eop : 1; + uint32_t flm : 1; + uint32_t vp : 1; + + // fcstat - 2 bits + uint32_t udpcs : 1; + uint32_t l4i : 1; + + uint32_t ipcs : 1; + uint32_t pif : 1; + uint32_t rsvd_1 : 1; + uint32_t vext : 1; + uint32_t udpv : 1; + uint32_t llint : 1; + uint32_t rsvd_2 : 4; + uint32_t ts : 1; + uint32_t secp : 1; + uint32_t lb : 1; + uint32_t rsvd_3 : 1; + + // extended error + uint32_t fdierr : 3; + uint32_t hbo : 1; + uint32_t rsvd : 3; + uint32_t secerr : 2; + uint32_t rxe : 1; + uint32_t l4e : 1; + uint32_t ipe : 1; + }; // status_last_descriptor; + + struct { + // extended status + uint32_t dd2 : 1; + uint32_t eop2 : 1; + uint32_t rsvd_4 : 2; + uint32_t next_descriptor_ptr : 16; + + // extended error + uint32_t error : 12; + }; // status_non_last_descriptor; + }; // union raw32_3 + + union { + uint32_t raw32_4; + struct { + uint32_t pkt_len : 16; + uint32_t vlan_tag : 16; + }; + }; // union raw32_4 + + } __attribute__((packed)); // struct +} rdesc_adv_wb_t; + +/*********************** + * TX + * Descriptors + **********************/ +// 7.2.3.2.2 Legacy Transmit Descriptor Format +typedef union { + uint64_t raw[2]; + + struct { + uint64_t buffer_address; + + union { + uint64_t word2_raw; + + struct { + uint64_t length : 16; + uint64_t cso : 8; + + // cmd + uint64_t eop : 1; + uint64_t ifcs : 1; + uint64_t ic : 1; + uint64_t rs : 1; + uint64_t rsvd_1 : 1; + uint64_t dext : 1; + uint64_t vle : 1; + uint64_t rsvd_2 : 1; + + // sta + uint64_t dd : 1; + uint64_t rsvd_3 : 3; + + uint64_t rsvd_4 : 4; + uint64_t css : 8; + uint64_t vlan : 16; + }; + }; + + } __attribute__((packed)); +} tdesc_legacy_t; + +// 7.2.3.2.3 Advanced Transmit Context Descriptor +typedef union { + uint64_t raw[2]; + + struct { + union { + uint64_t raw_1; + + struct { + uint64_t iplen : 9; + uint64_t maclen : 7; + uint64_t vlan : 16; + uint64_t ipsec_sa_index : 10; + uint64_t fcoef : 6; + uint64_t rsvd_1 : 16; + }; + }; + + union { + uint64_t raw_2; + + struct { + uint64_t ipsec_esp_len : 9; + + // tucmd + uint64_t snap : 1; + uint64_t ipv4 : 1; + uint64_t l4t : 2; // l4 packet type + uint64_t ipsec_type : 1; + uint64_t encyption : 1; + uint64_t fcoe : 1; + uint64_t rsvd_2 : 4; + + uint64_t dytp : 4; + uint64_t rsvd_3 : 5; + uint64_t dext : 1; + + uint64_t bcntlen : 6; + uint64_t idx : 1; + uint64_t rsvd_4 : 3; + uint64_t l4len : 8; + uint64_t mss : 16; + }; + }; + + } __attribute__((packed)); + +} tdesc_advance_ctxt_wb_t; + +// 7.2.3.2.4 Advanced Transmit Data Descriptor - Read Format +typedef union { + uint64_t raw[2]; + + struct { + uint64_t address; + + union { + uint64_t raw2; + struct { + uint64_t dtalen : 16; + uint64_t rsvd_1 : 2; + + // mac + uint64_t mac_ilsec : 1; + uint64_t mac_1588 : 1; + + uint64_t dtyp : 4; + + // dcmd + uint64_t eop : 1; + uint64_t ifcs : 1; + uint64_t rsvd_2 : 1; + uint64_t rs : 1; + uint64_t rsvd_3 : 1; + uint64_t dext : 1; + uint64_t vle : 1; + uint64_t tse : 1; + + // status + uint64_t dd : 1; + uint64_t rsvd_4 : 3; + + // idx + uint64_t idx : 3; + // uint64_t rsvd_5 : 2; + + uint64_t cc : 1; + + // popts + uint64_t ixsm : 1; + uint64_t txsm : 1; + uint64_t ipsec : 1; + uint64_t rsvd_6 : 3; + + uint64_t paylen : 18; + }; + }; + }; + +} tdesc_advance_tx_rf_t; + +// Advanced Transmit Data Descriptor - Write-back Format +typedef union { + uint64_t raw[2]; + + struct { + uint64_t rsvd_1; + + union { + uint64_t raw2; + + struct { + uint64_t rsvd_2 : 32; + + // status + uint64_t dd : 1; + uint64_t rsvd_3 : 3; + + uint64_t rsvd_4 : 28; + }; + }; + }; + +} tdesc_advance_tx_wbf_t; + +struct VirtioNetHeader { + static const constexpr uint8_t kNeedsCsum = 1; + static const constexpr uint8_t kGsoNone = 0; + static const constexpr uint8_t kGsoTcpv4 = 1; + static const constexpr uint8_t kGsoUdp = 3; + static const constexpr uint8_t kGsoTcpv6 = 4; + static const constexpr uint8_t kGsoEvn = 0x80; + + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; + uint16_t num_buffers; +}; + +#endif // BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_H_ diff --git a/src/native/IxgbeDriver.cc b/src/native/IxgbeDriver.cc new file mode 100644 index 00000000..ab28293b --- /dev/null +++ b/src/native/IxgbeDriver.cc @@ -0,0 +1,2001 @@ +// Copyright Boston University SESA Group 2013 - 2018. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#include "IxgbeDriver.h" + +#include "../Align.h" +#include "../StaticIOBuf.h" +#include "../UniqueIOBuf.h" +#include "Clock.h" +#include "Debug.h" +#include "EventManager.h" +#include "Fls.h" +#include "Ixgbe.h" +#include "Net.h" +#include "Pfn.h" + +#include +#include +#include + +void ebbrt::IxgbeDriver::Create(pci::Device& dev) { + auto ixgbe_dev = new IxgbeDriver(dev); + + // physical device bringup + ixgbe_dev->Init(); + + ixgbe_dev->ebb_ = + IxgbeDriverRep::Create(ixgbe_dev, ebb_allocator->AllocateLocal()); + + // initialize per core rx and tx queues + for (size_t i = 0; i < Cpu::Count(); i++) { + ixgbe_dev->SetupMultiQueue(i); + } + + ixgbe_dev->FinishSetup(); + + // TODO remove? + ebbrt::clock::SleepMilli(200); + ebbrt::kprintf("intel 82599 card initialzed\n"); +} + +const ebbrt::EthernetAddress& ebbrt::IxgbeDriver::GetMacAddress() { + return mac_addr_; +} + +void ebbrt::IxgbeDriver::Send(std::unique_ptr buf, PacketInfo pinfo) { + ebb_->Send(std::move(buf), std::move(pinfo)); +} + +void ebbrt::IxgbeDriver::Run() { ebb_->Run(); } + +// After packet transmission, need to mark bit in +// tx queue so that it can be used again +// TX_HEAD_WB does it automatically +void ebbrt::IxgbeDriverRep::ReclaimTx() { +#ifndef TX_HEAD_WB + size_t head = ixgmq_.tx_head_; + size_t tail = ixgmq_.tx_tail_; + tdesc_advance_tx_wbf_t* actx; + + // go through all descriptors owned by HW + while (head != tail) { + actx = reinterpret_cast(&(ixgmq_.tx_ring_[head])); + + // if context + if (ixgmq_.tx_isctx_[head]) { + head = (head + 1) % ixgmq_.tx_size_; + } + // if non eop + else if (!(actx->dd)) { + head = (head + 1) % ixgmq_.tx_size_; + } + // eop + else if (actx->dd) { + head = (head + 1) % ixgmq_.tx_size_; + ixgmq_.tx_head_ = head; + } + } +#endif +} + +// every TX requires a context struct before +void ebbrt::IxgbeDriverRep::AddContext(uint8_t idx, uint8_t maclen, + uint16_t iplen, uint8_t l4len, + enum l4_type l4type) { + + tdesc_advance_ctxt_wb_t* actx; + + auto tail = ixgmq_.tx_tail_; + + // context buffer already allocated, need to zero + actx = reinterpret_cast(&(ixgmq_.tx_ring_[tail])); + + actx->raw_1 = 0x0; + actx->raw_2 = 0x0; + + memset(actx, 0, sizeof(tdesc_advance_ctxt_wb_t)); + ixgmq_.tx_isctx_[tail] = true; + + // refer to 82599 datasheet for these settings + actx->dytp = 0b0010; + actx->dext = 1; + actx->idx = idx; + actx->maclen = maclen; + actx->iplen = iplen; + + actx->ipv4 = 1; + actx->l4len = 0; // ignored when TSE not set + actx->l4t = l4type; + + // need to increment tail + ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + ixgmq_.tx_tail_ = (tail + 1) % ixgmq_.tx_size_; +} + +// Add a new packet to be transmitted +void ebbrt::IxgbeDriverRep::AddTx(const uint8_t* pa, uint64_t len, + uint64_t totallen, bool first, bool last, + uint8_t ctx, bool ip_cksum, + bool tcpudp_cksum) { + tdesc_advance_tx_rf_t* actx; + + auto tail = ixgmq_.tx_tail_; + actx = reinterpret_cast(&(ixgmq_.tx_ring_[tail])); + + ixgmq_.tx_isctx_[tail] = false; + + actx->raw[0] = 0x0; + actx->raw[1] = 0x0; + + // pa is physical address of where send buffer exists + actx->address = reinterpret_cast(pa); + actx->dtalen = len; + if (first) { + actx->paylen = totallen; + } + + // type is advanced + actx->dtyp = 0b0011; + actx->dext = 1; + + // rs bit should only be set when eop is set + if (last) { + actx->rs = 1; + } else { + actx->rs = 0; + } + + // checksum + actx->ifcs = 1; + + // set last packet bit + if (last) { + actx->eop = 1; + } else { + actx->eop = 0; + } + + // TODO enable ip checksum + if (ctx != -1) { + actx->idx = ctx; + actx->cc = 1; + actx->ixsm = ip_cksum; // no ip checksum + actx->txsm = tcpudp_cksum; // udp or tcp checksum offload + } + + ixgmq_.tx_last_tail_ = ixgmq_.tx_tail_; + ixgmq_.tx_tail_ = (tail + 1) % ixgmq_.tx_size_; +} + +void ebbrt::IxgbeDriverRep::Send(std::unique_ptr buf, PacketInfo pinfo) { + auto dp = buf->GetDataPointer(); + auto len = buf->ComputeChainDataLength(); + auto count = buf->CountChainElements(); + bool ip_cksum = false; + bool tcpudp_cksum = false; + + ebbrt::kbugon(len >= 0xA0 * 1000, + "%s packet len bigger than max ether length\n", __FUNCTION__); + +// TODO threshold for triggering reclaim tx buffers +#ifndef TX_HEAD_WB + size_t free_desc = + IxgbeDriver::NTXDESCS - + (std::abs(static_cast(ixgmq_.tx_tail_ - ixgmq_.tx_head_))); + // free descripts must have enough for count in chained iobufs + if (free_desc < (count + 1)) { + // reclaim buffers + ReclaimTx(); + + free_desc = IxgbeDriver::NTXDESCS - + (std::abs(static_cast(ixgmq_.tx_tail_ - ixgmq_.tx_head_))); + // not enough descriptors got freed + if (free_desc < (count + 1)) { + return; + } + } +#endif + + if (pinfo.flags & PacketInfo::kNeedsIpCsum) { + ip_cksum = true; + } + + // NEED CHECKSUM + if (pinfo.flags & PacketInfo::kNeedsCsum) { + tcpudp_cksum = true; + + // check datasheet for numbers + if (pinfo.csum_offset == 6) { + AddContext(0, ETHHDR_LEN, IPHDR_LEN, 0, l4_type_udp); + } else if (pinfo.csum_offset == 16) { + AddContext(0, ETHHDR_LEN, IPHDR_LEN, 0, l4_type_tcp); + } else { + ebbrt::kabort("%s unknown packet type checksum\n", __FUNCTION__); + } + + // if buffer is chained + if (buf->IsChained()) { + size_t counter = 0; + for (auto& buf_it : *buf) { + counter++; + + // first buffer + if (counter == 1) { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), len, + true, false, 0, ip_cksum, tcpudp_cksum); + } else { + // last buffer + if (counter == count) { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), + len, false, true, 0, ip_cksum, tcpudp_cksum); + } else { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), + len, false, false, 0, ip_cksum, tcpudp_cksum); + } + } + } + } + // not chained + else { + AddTx(buf->Data(), len, len, true, true, 0, ip_cksum, tcpudp_cksum); + } + } else { + // NO CHECKSUM FLAG SET + // if buffer is chained + if (buf->IsChained()) { + size_t counter = 0; + for (auto& buf_it : *buf) { + counter++; + + // first buffer + if (counter == 1) { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), len, + true, false, 0, ip_cksum, tcpudp_cksum); + } else { + // last buffer + if (counter == count) { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), + len, false, true, 0, ip_cksum, tcpudp_cksum); + } else { + AddTx(buf_it.Data(), reinterpret_cast(buf_it.Length()), + len, false, false, 0, ip_cksum, tcpudp_cksum); + } + } + } + } + // not chained + else { + AddTx(buf->Data(), len, len, true, true, 0, ip_cksum, tcpudp_cksum); + } + } + + // bump tx_tail + // indicates position beyond last descriptor hw + WriteTdt_1(Cpu::GetMine(), ixgmq_.tx_tail_); +} + +void ebbrt::IxgbeDriver::WriteRxctrl(uint32_t m) { + // Disable RXCTRL - 8.2.3.8.10 + bar0_.Write32(0x03000, m); +} + +void ebbrt::IxgbeDriver::WriteDmatxctl(uint32_t m) { + uint32_t reg; + + reg = bar0_.Read32(0x04A80); + ebbrt::kprintf("0x04A80: DMATXCTL 0x%08X - reset to 0x%08X\n", reg, reg & m); + + // DMATXCTL - 8.2.3.9.2 + bar0_.Write32(0x04A80, reg & m); +} +void ebbrt::IxgbeDriver::WriteDmatxctl_te(uint32_t m) { + auto reg = bar0_.Read32(0x04A80); + bar0_.Write32(0x04A80, reg | m); +} + +// 8.2.3.5.18 - General Purpose Interrupt Enable — GPIE (0x00898; RW) +void ebbrt::IxgbeDriver::WriteGpie(uint32_t m) { + auto reg = bar0_.Read32(0x00898); + bar0_.Write32(0x00898, reg | m); +} + +// 8.2.3.5.1 Extended Interrupt Cause Register- EICR (0x00800; RW1C) +void ebbrt::IxgbeDriver::ReadEicr() { + /* Note + * The EICR is also cleared on read if GPIE.OCD bit is cleared. When the + * GPIE.OCD bit is set, then only bits 16...29 are cleared on read. + */ + // 8.2.3.5.18 General Purpose Interrupt Enable — GPIE (0x00898;RW) + uint32_t reg; + reg = bar0_.Read32(0x00898); + ebbrt::kbugon((reg & 0x20), "GPIE.OCD not cleared\n"); + + reg = bar0_.Read32(0x00800); + ebbrt::kprintf("First Read - 0x00800: EICR 0x%08X, ", reg); + + reg = bar0_.Read32(0x00800); + ebbrt::kprintf("Second Read - EICR 0x%08X\n", reg); +} +void ebbrt::IxgbeDriver::WriteEicr(uint32_t m) { + auto reg = bar0_.Read32(0x00800); + bar0_.Write32(0x00800, reg | m); +} + +// 8.2.3.5.3 Extended Interrupt Mask Set/Read Register- EIMS (0x00880; RWS) +uint32_t ebbrt::IxgbeDriver::ReadEims() { return bar0_.Read32(0x00880); } +void ebbrt::IxgbeDriver::WriteEims(uint32_t m) { bar0_.Write32(0x00880, m); } + +// 8.2.3.5.4 Extended Interrupt Mask Clear Register- EIMC (0x00888; WO) +void ebbrt::IxgbeDriver::WriteEimc(uint32_t m) { bar0_.Write32(0x00888, m); } + +// 8.2.3.5.5 Extended Interrupt Auto Clear Register — EIAC (0x00810; RW) +void ebbrt::IxgbeDriver::WriteEiac(uint32_t m) { + auto reg = bar0_.Read32(0x00810); + bar0_.Write32(0x00810, reg | m); +} + +// 8.2.3.5.8 Extended Interrupt Mask Set/Read Registers — EIMS[n] (0x00AA0 + +// 4*(n-1), n=1...2; RWS) +void ebbrt::IxgbeDriver::WriteEimsn(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00AA0 + 4 * n); + bar0_.Write32(0x00AA0 + 4 * n, reg | m); +} + +// 8.2.3.5.12 +// Extended Interrupt Throttle Registers — EITR[n] +// (0x00820 + 4*n, n=0...23 and 0x012300 + 4*(n-24), +// n=24...128; RW) +void ebbrt::IxgbeDriver::WriteEitr(uint32_t n, uint32_t m) { + ebbrt::kbugon(n > 128, "%s error\n", __FUNCTION__); + + if (n < 24) { + bar0_.Write32(0x00820 + 4 * n, m); + } else { + bar0_.Write32(0x012300 + 4 * (n - 24), m); + } +} + +// 8.2.3.9.10 Transmit Descriptor Control — TXDCTL[n] (0x06028+0x40*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTxdctl(uint32_t n, uint32_t m) { + bar0_.Write32(0x06028 + (0x40 * n), m); +} +uint8_t ebbrt::IxgbeDriver::ReadTxdctl_enable(uint32_t n) { + auto reg = bar0_.Read32(0x06028 + 0x40 * n); + return (reg >> 25) & 0x1; +} + +// 8.2.3.8.6 Receive Descriptor Control — RXDCTL[n] (0x01028 + +// 0x40*n, n=0...63 and 0x0D028 + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRxdctl_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01028 + (0x40 * n), m); +} +void ebbrt::IxgbeDriver::WriteRxdctl_1_enable(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x01028 + (0x40 * n)); + bar0_.Write32(0x01028 + (0x40 * n), reg | m); +} + +uint8_t ebbrt::IxgbeDriver::ReadRxdctl_1_enable(uint32_t n) { + auto reg = bar0_.Read32(0x01028 + (0x40 * n)); + return (reg >> 25) & 0x1; +} + +void ebbrt::IxgbeDriver::WriteRxdctl_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x0D028 + (0x40 * n), m); +} + +// 8.2.3.27.14 PF VM L2 Control Register — PFVML2FLT[n] (0x0F000 + 4*n, +// n=0...63; RW) +void ebbrt::IxgbeDriver::WritePfvml2flt(uint32_t n, uint32_t m) { + bar0_.Write32(0x0F000 + 4 * n, m); +} + +// 8.2.3.9.14 Manageability Transmit TC Mapping — MNGTXMAP (0x0CD10; RW) +void ebbrt::IxgbeDriver::WriteMngtxmap(uint32_t m) { + bar0_.Write32(0x0CD10, m); +} + +// 8.2.3.1.1 Device Control Register — CTRL (0x00000 / 0x00004;RW) +void ebbrt::IxgbeDriver::WriteCtrl(uint32_t m) { bar0_.Write32(0x0, m); } +void ebbrt::IxgbeDriver::ReadCtrl() { + uint32_t reg; + reg = bar0_.Read32(0x0); + ebbrt::kprintf("%s = 0x%X\n", __FUNCTION__, reg); +} + +// 8.2.3.1.3 Extended Device Control Register — CTRL_EXT (0x00018; RW) +void ebbrt::IxgbeDriver::WriteCtrlExt(uint32_t m) { + auto reg = bar0_.Read32(0x00018); + bar0_.Write32(0x00018, reg | m); +} + +// 8.2.3.7.1 Filter Control Register — FCTRL (0x05080; RW) +void ebbrt::IxgbeDriver::WriteFctrl(uint32_t m) { bar0_.Write32(0x05080, m); } + +// 8.2.3.24.9 Flexible Host Filter Table Registers — FHFT (0x09000 — 0x093FC and +// 0x09800 — 0x099FC; RW) +void ebbrt::IxgbeDriver::WriteFhft_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x09000, m); +} +void ebbrt::IxgbeDriver::WriteFhft_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x09800, m); +} + +// 8.2.3.1.2 Device Status Register — STATUS (0x00008; RO) +bool ebbrt::IxgbeDriver::ReadStatusPcieMes() { + auto reg = bar0_.Read32(0x8); + return !(reg & 0x80000); +} +uint8_t ebbrt::IxgbeDriver::ReadStatusLanId() { + auto reg = bar0_.Read32(0x8); + return (reg >> 2) & 0x3; +} + +// 8.2.3.3.2 Flow Control Transmit Timer Value n — FCTTVn (0x03200 + 4*n, +// n=0...3; RW) +void ebbrt::IxgbeDriver::WriteFcttv(uint32_t n, uint32_t m) { + bar0_.Write32(0x03200 + (4 * n), m); +} + +// 8.2.3.3.3 Flow Control Receive Threshold Low — FCRTL[n] (0x03220 + 4*n, +// n=0...7; RW) +void ebbrt::IxgbeDriver::WriteFcrtl(uint32_t n, uint32_t m) { + bar0_.Write32(0x03220 + (4 * n), m); +} + +// 8.2.3.3.4 Flow Control Receive Threshold High — FCRTH[n] (0x03260 + 4*n, +// n=0...7; RW) +void ebbrt::IxgbeDriver::WriteFcrth(uint32_t n, uint32_t m) { + bar0_.Write32(0x03260 + (4 * n), m); +} + +// 8.2.3.3.5 Flow Control Refresh Threshold Value — FCRTV (0x032A0; RW) +void ebbrt::IxgbeDriver::WriteFcrtv(uint32_t m) { bar0_.Write32(0x032A0, m); } + +// 8.2.3.3.7 Flow Control Configuration — FCCFG (0x03D00; RW) +void ebbrt::IxgbeDriver::WriteFccfg(uint32_t m) { bar0_.Write32(0x03D00, m); } + +// 8.2.3.2.2 EEPROM Read Register — EERD (0x10014; RW) +void ebbrt::IxgbeDriver::WriteEerd(uint32_t m) { bar0_.Write32(0x10014, m); } +bool ebbrt::IxgbeDriver::ReadEerdDone() { + auto reg = bar0_.Read32(0x10014); + return !!(reg & 0x2); // return true when Read Done = 1 +} + +uint16_t ebbrt::IxgbeDriver::ReadEerdData() { + auto reg = bar0_.Read32(0x10014); + return (reg >> 16) & 0xFFFF; +} + +uint16_t ebbrt::IxgbeDriver::ReadEeprom(uint16_t offset) { + WriteEerd(offset << 2 | 1); + // TODO: Timeout + while (ReadEerdDone() == 0) + ; + return ReadEerdData(); +} + +// 8.2.3.22.32 - Core Analog Configuration Register — CoreCTL (0x014F00; RW) +void ebbrt::IxgbeDriver::WriteCorectl(uint16_t m) { + bar0_.Write32(0x014F00, 0x0 | m); +} + +// 8.2.3.22.19 Auto Negotiation Control Register — AUTOC (0x042A0; RW) +void ebbrt::IxgbeDriver::WriteAutoc(uint32_t m) { + auto reg = bar0_.Read32(0x042A0); + bar0_.Write32(0x042A0, reg | m); +} +uint8_t ebbrt::IxgbeDriver::ReadAutocRestartAn() { + auto reg = bar0_.Read32(0x042A0); + return (reg >> 12) & 0x1; +} + +// 8.2.3.22.23 Auto Negotiation Link Partner Link Control Word 1 Register — +// ANLP1 (0x042B0; RO) +uint8_t ebbrt::IxgbeDriver::ReadAnlp1() { + auto reg = bar0_.Read32(0x042B0); + return (reg >> 16) & 0xFF; +} + +// 8.2.3.2.1 EEPROM/Flash Control Register — EEC (0x10010; RW) +uint8_t ebbrt::IxgbeDriver::ReadEecAutoRd() { + auto reg = bar0_.Read32(0x10010); + return (reg >> 9) & 0xFF; +} + +// 8.2.3.7.7 Multicast Table Array — MTA[n] (0x05200 + 4*n, n=0...127; RW) +void ebbrt::IxgbeDriver::WriteMta(uint32_t n, uint32_t m) { + bar0_.Write32(0x05200 + (4 * n), m); +} + +// 8.2.3.7.11 VLAN Filter Table Array — VFTA[n] (0x0A000 + 4*n,n=0...127; RW) +void ebbrt::IxgbeDriver::WriteVfta(uint32_t n, uint32_t m) { + bar0_.Write32(0x0A000 + (4 * n), m); +} + +// 8.2.3.27.15 PF VM VLAN Pool Filter — PFVLVF[n] (0x0F100 + 4*n, n=0...63; RW) +void ebbrt::IxgbeDriver::WritePfvlvf(uint32_t n, uint32_t m) { + bar0_.Write32(0x0F100 + 4 * n, m); +} + +// 8.2.3.27.16 PF VM VLAN Pool Filter Bitmap — PFVLVFB[n] (0x0F200 + 4*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WritePfvlvfb(uint32_t n, uint32_t m) { + bar0_.Write32(0x0F200 + 4 * n, m); +} + +// 8.2.3.7.23 Rx Filter ECC Err Insertion 0 — RXFECCERR0 (0x051B8; RW) +void ebbrt::IxgbeDriver::WriteRxfeccerr0(uint32_t m) { + auto reg = bar0_.Read32(0x051B8); + bar0_.Write32(0x051B8, reg | m); +} + +// Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if +// 1360 +// so it returns the offsets to the phy init sequence block. +// also based on +// http://lxr.free-electrons.com/source/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c?v=3.14#L1395 +// https://github.com/freebsd/freebsd/blob/386ddae58459341ec567604707805814a2128a57/sys/dev/ixgbe/ixgbe_82599.c#L173 +void ebbrt::IxgbeDriver::PhyInit() { + + uint16_t list_offset; + uint16_t data_offset = 0x0; + uint16_t data_value; + uint16_t sfp_id; + uint16_t sfp_type = 0x4; /* SPF_DA_CORE1 */ + + /* IXGBE_PHY_INIT_OFFSET_NL */ + list_offset = ReadEeprom(0x002B); + + if ((list_offset == 0x0) || (list_offset == 0xFFFF)) { + return; + } + + /* Shift offset to first ID word */ + list_offset++; + + sfp_id = ReadEeprom(list_offset); + + while (sfp_id != 0xFFFF) { + if (sfp_id == sfp_type) { + list_offset++; + data_offset = ReadEeprom(list_offset); + if ((data_offset == 0x0) || (data_offset == 0xFFFF)) { + ebbrt::kprintf("sfp init failed\n"); + return; + } else { + break; + } + } else { + list_offset += 2; + sfp_id = ReadEeprom(list_offset); + } + list_offset++; + } + + if (sfp_id == 0xFFFF) { + ebbrt::kprintf("sfp init failed\n"); + return; + } + + ebbrt::kprintf("data offset -> 0x%x\n", data_offset); + + SwfwLockPhy(); + + data_value = ReadEeprom(++data_offset); + while (data_value != 0xFFFF) { + ebbrt::kprintf("data_value -> 0x%x\n", data_value); + WriteCorectl(data_value); + data_value = ReadEeprom(++data_offset); + } + SwfwUnlockPhy(); + + ebbrt::clock::SleepMilli(20); + + WriteAutoc(0x0 << 13 | 0x1 << 12); + while (ReadAnlp1() != 0) + ; // TODO: timeout + + WriteAutoc(0x3 << 13 | 0x1 << 12); + while (ReadAutocRestartAn() != 0) + ; // TODO: timeout + + ebbrt::kprintf("PHY init done\n"); +} + +// 8.2.3.7.8 Receive Address Low — RAL[n] (0x0A200 + 8*n, n=0...127; RW) +uint32_t ebbrt::IxgbeDriver::ReadRal(uint32_t n) { + auto reg = bar0_.Read32(0x0A200 + 8 * n); + return reg; +} +void ebbrt::IxgbeDriver::WriteRal(uint32_t n, uint32_t m) { + bar0_.Write32(0x0A200 + (8 * n), m); +} + +// 8.2.3.7.9 Receive Address High — RAH[n] (0x0A204 + 8*n, n=0...127; RW) +uint16_t ebbrt::IxgbeDriver::ReadRah(uint32_t n) { + auto reg = bar0_.Read32(0x0A204 + 8 * n); + return (reg)&0xFFFF; +} +uint8_t ebbrt::IxgbeDriver::ReadRahAv(uint32_t n) { + return (bar0_.Read32(0x0A204 + 8 * n) >> 31) & 0xFF; +} +void ebbrt::IxgbeDriver::WriteRah(uint32_t n, uint32_t m) { + bar0_.Write32(0x0A204 + (8 * n), m); +} + +// 8.2.3.7.10 MAC Pool Select Array — MPSAR[n] (0x0A600 + 4*n, n=0...255; RW) +void ebbrt::IxgbeDriver::WriteMpsar(uint32_t n, uint32_t m) { + bar0_.Write32(0x0A600 + 4 * n, m); +} + +// 8.2.3.7.19 Five tuple Queue Filter — FTQF[n] (0x0E600 + 4*n,n=0...127; RW) +void ebbrt::IxgbeDriver::WriteFtqf(uint32_t n, uint32_t m) { + bar0_.Write32(0x0E600 + 4 * n, m); +} + +// 8.2.3.7.16 Source Address Queue Filter — SAQF[n] (0x0E000 + 4*n, n=0...127; +// RW) +void ebbrt::IxgbeDriver::WriteSaqf(uint32_t n, uint32_t m) { + bar0_.Write32(0x0E000 + 4 * n, m); +} + +// 8.2.3.7.17 Destination Address Queue Filter — DAQF[n] (0x0E200 + 4*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteDaqf(uint32_t n, uint32_t m) { + bar0_.Write32(0x0E200 + 4 * n, m); +} + +// 8.2.3.7.18 Source Destination Port Queue Filter — SDPQF[n] (0x0E400 + 4*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteSdpqf(uint32_t n, uint32_t m) { + bar0_.Write32(0x0E400 + 4 * n, m); +} + +// 8.2.3.27.17 PF Unicast Table Array — PFUTA[n] (0x0F400 + 4*n, n=0...127; RW) +void ebbrt::IxgbeDriver::WritePfuta(uint32_t n, uint32_t m) { + bar0_.Write32(0x0F400 + 4 * n, m); +} + +// 8.2.3.7.3 Multicast Control Register — MCSTCTRL (0x05090; RW) +void ebbrt::IxgbeDriver::WriteMcstctrl(uint32_t m) { + auto reg = bar0_.Read32(0x05090); + bar0_.Write32(0x05090, reg & m); +} + +// 8.2.3.10.13 DCB Transmit Descriptor Plane Queue Select — RTTDQSEL (0x04904; +// RW) +void ebbrt::IxgbeDriver::WriteRttdqsel(uint32_t m) { + auto reg = bar0_.Read32(0x04904); + bar0_.Write32(0x04904, reg | m); +} + +// 8.2.3.10.14 DCB Transmit Descriptor Plane T1 Config — RTTDT1C (0x04908; RW) +void ebbrt::IxgbeDriver::WriteRttdt1c(uint32_t m) { bar0_.Write32(0x04908, m); } + +// 8.2.3.10.16 DCB Transmit Rate-Scheduler Config — RTTBCNRC (0x04984; RW) +void ebbrt::IxgbeDriver::WriteRttbcnrc(uint32_t m) { + bar0_.Write32(0x04984, m); +} + +// 8.2.3.10.9 DCB Transmit Descriptor Plane T2 Config - RTTDT2C[n] (0x04910 + +// 4*n, n=0...7; RW) DMA-Tx +void ebbrt::IxgbeDriver::WriteRttdt2c(uint32_t n, uint32_t m) { + bar0_.Write32(0x04910 + 4 * n, m); +} + +// 8.2.3.10.10 DCB Transmit Packet Plane T2 Config — RTTPT2C[n] (0x0CD20 + 4*n, +// n=0...7; RW) +void ebbrt::IxgbeDriver::WriteRttpt2c(uint32_t n, uint32_t m) { + bar0_.Write32(0x0CD20 + 4 * n, m); +} + +// 8.2.3.10.6 DCB Receive Packet Plane T4 Config — RTRPT4C[n] (0x02140 + 4*n, +// n=0...7; RW) +void ebbrt::IxgbeDriver::WriteRtrpt4c(uint32_t n, uint32_t m) { + bar0_.Write32(0x02140 + 4 * n, m); +} + +// 8.2.3.10.1 DCB Receive Packet Plane Control and Status — RTRPCS (0x02430; RW) +void ebbrt::IxgbeDriver::WriteRtrpcs(uint32_t m) { bar0_.Write32(0x02430, m); } + +// 8.2.3.11.2 Tx DCA Control Registers — DCA_TXCTRL[n] (0x0600C + 0x40*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteDcaTxctrlTxdescWbro(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0600C + 0x40 * n); + bar0_.Write32(0x0600C + 0x40 * n, reg & m); +} + +// 8.2.3.11.1 Rx DCA Control Register — DCA_RXCTRL[n] (0x0100C + 0x40*n, +// n=0...63 and 0x0D00C + 0x40*(n-64), +// n=64...127 / 0x02200 + 4*n, [n=0...15]; RW) +void ebbrt::IxgbeDriver::WriteDcaRxctrl_1(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0100C + 0x40 * n); + bar0_.Write32(0x0100C + 0x40 * n, reg & m); +} + +// void ebbrt::IxgbeDriver::WriteDcaRxctrl_1_RxdataWrro(uint32_t n, uint32_t m); +void ebbrt::IxgbeDriver::WriteDcaRxctrl_2(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0D00C + 0x40 * n); + bar0_.Write32(0x0D00C + 0x40 * n, reg & m); +} + +// 8.2.3.7.5 Receive Checksum Control — RXCSUM (0x05000; RW) +void ebbrt::IxgbeDriver::WriteRxcsum(uint32_t m) { + auto reg = bar0_.Read32(0x05000); + bar0_.Write32(0x05000, reg | m); +} + +// 8.2.3.8.13 RSC Control — RSCCTL[n] (0x0102C + 0x40*n, n=0...63 +// and 0x0D02C + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRscctl(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0102C + 0x40 * n); + bar0_.Write32(0x0102C + 0x40 * n, reg | m); +} + +// 8.2.3.7.4 Packet Split Receive Type Register — PSRTYPE[n] +// (0x0EA00 + 4*n, n=0...63 / 0x05480 + 4*n, n=0...15; RW) +void ebbrt::IxgbeDriver::WritePsrtype(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0EA00 + 0x40 * n); + bar0_.Write32(0x0EA00 + 0x40 * n, reg | m); +} + +void ebbrt::IxgbeDriver::WritePsrtypeZero(uint32_t n) { + bar0_.Write32(0x0EA00 + 0x40 * n, 0x0); +} + +// 8.2.3.7.15 Redirection Table — RETA[n] (0x0EB00 + 4*n, n=0...31/ 0x05C00 + +// 4*n, n=0...31; RW) +void ebbrt::IxgbeDriver::WriteReta(uint32_t n, uint32_t m) { + bar0_.Write32(0x0EB00 + 4 * n, m); +} + +// 8.2.3.7.6 Receive Filter Control Register — RFCTL (0x05008; RW) +void ebbrt::IxgbeDriver::WriteRfctl(uint32_t m) { bar0_.Write32(0x05008, m); } + +// 8.2.3.9.16 Tx Packet Buffer Threshold — +// TXPBTHRESH (0x04950 +0x4*n, n=0...7; RW) +void ebbrt::IxgbeDriver::WriteTxpbthresh(uint32_t n, uint32_t m) { + bar0_.Write32(0x04950 + 0x4 * n, m); +} + +// 8.2.3.7.12 Multiple Receive Queues Command Register- MRQC (0x0EC80 / 0x05818; +// RW) +void ebbrt::IxgbeDriver::WriteMrqc(uint32_t m) { + auto reg = bar0_.Read32(0x0EC80); + bar0_.Write32(0x0EC80, reg | m); +} + +// 8.2.3.9.15 Multiple Transmit Queues Command Register — MTQC (0x08120; RW) +void ebbrt::IxgbeDriver::WriteMtqc(uint32_t m) { bar0_.Write32(0x08120, m); } + +// 8.2.3.27.1 VT Control Register — PFVTCTL (0x051B0; RW) +void ebbrt::IxgbeDriver::WritePfvtctl(uint32_t m) { bar0_.Write32(0x051B0, m); } + +// 8.2.3.10.4 DCB Receive User Priority to Traffic Class — RTRUP2TC (0x03020; +// RW) +void ebbrt::IxgbeDriver::WriteRtrup2tc(uint32_t m) { + bar0_.Write32(0x03020, m); +} + +// 8.2.3.10.5 DCB Transmit User Priority to Traffic Class — RTTUP2TC (0x0C800; +// RW) +void ebbrt::IxgbeDriver::WriteRttup2tc(uint32_t m) { + bar0_.Write32(0x0C800, m); +} + +// 8.2.3.9.1 DMA Tx TCP Max Allow Size Requests — DTXMXSZRQ (0x08100; RW) +void ebbrt::IxgbeDriver::WriteDtxmxszrq(uint32_t m) { + auto reg = bar0_.Read32(0x08100); + bar0_.Write32(0x08100, reg | m); +} + +// 8.2.3.27.9 PF PF Queue Drop Enable Register — PFQDE (0x02F04; RW) +void ebbrt::IxgbeDriver::WritePfqde(uint32_t m) { bar0_.Write32(0x02F04, m); } + +// 8.2.3.22.34 MAC Flow Control Register — MFLCN (0x04294; RW) +void ebbrt::IxgbeDriver::WriteMflcn(uint32_t m) { + auto reg = bar0_.Read32(0x04294); + bar0_.Write32(0x04294, reg | m); +} + +// 8.2.3.3.7 Flow Control Configuration — FCCFG (0x03D00; RW) +/*void ebbrt::IxgbeDriver::WriteFccfg(uint32_t m) { + auto reg = bar0_.Read32(0x03D00); + bar0_.Write32(0x03D00, reg | m); + }*/ + +// void ebbrt::IxgbeDriver::WriteDcaRxctrl_2_RxdataWrro(uint32_t n, uint32_t m); + +// 8.2.3.4.9 - Software Semaphore Register — SWSM (0x10140; RW) +bool ebbrt::IxgbeDriver::SwsmSmbiRead() { + return !!(bar0_.Read32(0x10140) & 0x1); +} +bool ebbrt::IxgbeDriver::SwsmSwesmbiRead() { + return !(bar0_.Read32(0x10140) & 0x2); +} +void ebbrt::IxgbeDriver::SwsmSwesmbiSet() { + auto reg = bar0_.Read32(0x10140); + ebbrt::kprintf("%s: reg before: 0x%08X, reg after: 0x%08X\n", __FUNCTION__, + reg, reg | 0x2); + bar0_.Write32(0x10140, reg | 0x2); +} +void ebbrt::IxgbeDriver::SwsmSmbiClear() { + auto reg = bar0_.Read32(0x10140); + ebbrt::kprintf("%s: reg before: 0x%08X, reg after: 0x%08X\n", __FUNCTION__, + reg, reg & 0xFFFFFFFE); + bar0_.Write32(0x10140, reg & 0xFFFFFFFE); +} +void ebbrt::IxgbeDriver::SwsmSwesmbiClear() { + auto reg = bar0_.Read32(0x10140); + ebbrt::kprintf("%s: reg before: 0x%08X, reg after: 0x%08X\n", __FUNCTION__, + reg, reg & 0xFFFFFFFD); + bar0_.Write32(0x10140, reg & 0xFFFFFFFD); +} + +// 8.2.3.22.20 Link Status Register — LINKS (0x042A4; RO) +bool ebbrt::IxgbeDriver::ReadLinksLinkUp() { + auto reg = bar0_.Read32(0x042A4); + return ((reg >> 30) & 0x1) == 1; +} + +// 8.2.3.4.11 Software-Firmware Synchronization - SW_FW_SYNC (0x10160; RW) +uint32_t ebbrt::IxgbeDriver::ReadSwfwSyncSmBits(uint32_t m) { + auto reg = bar0_.Read32(0x10160); + return (reg & m) & 0x3FF; // masking bits 9:0 +} +void ebbrt::IxgbeDriver::WriteSwfwSyncSmBits(uint32_t m) { + auto reg = bar0_.Read32(0x10160); + bar0_.Write32(0x10160, reg | m); +} +void ebbrt::IxgbeDriver::WriteSwfwSyncSmBits2(uint32_t m) { + auto reg = bar0_.Read32(0x10160); + bar0_.Write32(0x10160, reg & m); +} + +// 8.2.3.11.1 Rx DCA Control Register — DCA_RXCTRL[n] (0x0100C + 0x40*n, +// n=0...63 and 0x0D00C + 0x40*(n-64), // n=0...63 and 0x0D00C + 0x40*(n-64), +// n=64...127 / 0x02200 + 4*n, [n=0...15]; RW) // n=64...127 / 0x02200 + 4*n, +// [n=0...15]; RW) +void ebbrt::IxgbeDriver::WriteDcaRxctrl(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0100C + 0x40 * n); + bar0_.Write32(0x0100C + 0x40 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteDcaRxctrlClear(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0100C + 0x40 * n); + bar0_.Write32(0x0100C + 0x40 * n, reg & m); +} + +// 8.2.3.11.4 DCA Control Register — DCA_CTRL (0x11074; RW) +void ebbrt::IxgbeDriver::WriteDcaCtrl(uint32_t m) { + auto reg = bar0_.Read32(0x11074); + bar0_.Write32(0x11074, reg | m); +} + +// 8.2.3.11.2 Tx DCA Control Registers — DCA_TXCTRL[n] (0x0600C + 0x40*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteDcaTxctrl(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x0600C + 0x40 * n); + bar0_.Write32(0x0600C + 0x40 * n, reg | m); +} + +// 8.2.3.8.1 Receive Descriptor Base Address Low — RDBAL[n] (0x01000 + 0x40*n, +// n=0...63 and 0x0D000 + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRdbal_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01000 + 0x40 * n, m); +} +void ebbrt::IxgbeDriver::WriteRdbal_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x0D000 + 0x40 * n, m); +} + +// 8.2.3.8.2 Receive Descriptor Base Address High — RDBAH[n] (0x01004 + 0x40*n, +// n=0...63 and 0x0D004 + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRdbah_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01004 + 0x40 * n, m); +} +void ebbrt::IxgbeDriver::WriteRdbah_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x0D004 + 0x40 * n, m); +} + +// 8.2.3.9.5 Transmit Descriptor Base Address Low — TDBAL[n] (0x06000+0x40*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTdbal(uint32_t n, uint32_t m) { + bar0_.Write32(0x06000 + 0x40 * n, m); +} + +// 8.2.3.9.6 Transmit Descriptor Base Address High — TDBAH[n] (0x06004+0x40*n, +// n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTdbah(uint32_t n, uint32_t m) { + bar0_.Write32(0x06004 + 0x40 * n, m); +} + +// 8.2.3.9.7 Transmit Descriptor Length — TDLEN[n] (0x06008+0x40*n, n=0...127; +// RW) +void ebbrt::IxgbeDriver::WriteTdlen(uint32_t n, uint32_t m) { + bar0_.Write32(0x06008 + 0x40 * n, m); +} + +// 8.2.3.9.8 Transmit Descriptor Head — TDH[n] (0x06010+0x40*n, n=0...127; RO) +void ebbrt::IxgbeDriver::WriteTdh(uint32_t n, uint32_t m) { + bar0_.Write32(0x06010 + 0x40 * n, m); +} +uint16_t ebbrt::IxgbeDriver::ReadTdh(uint32_t n) { + auto reg = bar0_.Read32(0x06010 + 0x40 * n); + return reg & 0xFFFF; +} + +// 8.2.3.9.11 Tx Descriptor Completion Write Back Address Low — +// TDWBAL[n] (0x06038+0x40*n, n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTdwbal(uint32_t n, uint32_t m) { + bar0_.Write32(0x06038 + 0x40 * n, m); +} +// 8.2.3.9.12 Tx Descriptor Completion Write Back Address High — +// TDWBAH[n] (0x0603C+0x40*n, n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTdwbah(uint32_t n, uint32_t m) { + bar0_.Write32(0x0603C + 0x40 * n, m); +} + +// 8.2.3.9.9 Transmit Descriptor Tail — TDT[n] (0x06018+0x40*n, n=0...127; RW) +void ebbrt::IxgbeDriver::WriteTdt(uint32_t n, uint32_t m) { + bar0_.Write32(0x06018 + 0x40 * n, m); +} + +// 8.2.3.8.3 Receive Descriptor Length — RDLEN[n] (0x01008 + 0x40*n, n=0...63 +// and 0x0D008 + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRdlen_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01008 + 0x40 * n, m); +} +void ebbrt::IxgbeDriver::WriteRdlen_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x0D008 + 0x40 * n, m); +} + +// 8.2.3.8.7 Split Receive Control Registers — SRRCTL[n] (0x01014 + 0x40*n, +// n=0...63 and 0x0D014 + 0x40*(n-64), n=64...127 / 0x02100 + 4*n, [n=0...15]; +// RW) +void ebbrt::IxgbeDriver::WriteSrrctl_1(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x01014 + 0x40 * n); + bar0_.Write32(0x01014 + 0x40 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteSrrctlZero(uint32_t n) { + bar0_.Write32(0x01014 + 0x40 * n, 0x0); +} + +// 8.2.3.8.12 RSC Data Buffer Control Register — RSCDBU (0x03028; RW) +void ebbrt::IxgbeDriver::WriteRscdbu(uint32_t m) { + auto reg = bar0_.Read32(0x03028); + bar0_.Write32(0x03028, reg | m); +} + +void ebbrt::IxgbeDriver::WriteSrrctl_1_desctype(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x01014 + 0x40 * n); + bar0_.Write32(0x01014 + 0x40 * n, reg & m); +} + +// 8.2.3.8.8 Receive DMA Control Register — RDRXCTL (0x02F00; RW) +void ebbrt::IxgbeDriver::WriteRdrxctl(uint32_t m) { + auto reg = bar0_.Read32(0x02F00); + bar0_.Write32(0x02F00, reg | m); +} + +void ebbrt::IxgbeDriver::WriteRdrxctlRSCFRSTSIZE(uint32_t m) { + auto reg = bar0_.Read32(0x02F00); + bar0_.Write32(0x02F00, reg & m); +} + +uint8_t ebbrt::IxgbeDriver::ReadRdrxctlDmaidone() { + auto reg = bar0_.Read32(0x02F00); + return (reg >> 3) & 0x1; +} + +// 8.2.3.8.9 Receive Packet Buffer Size — RXPBSIZE[n] (0x03C00 + 4*n, n=0...7; +// RW) +void ebbrt::IxgbeDriver::WriteRxpbsize(uint32_t n, uint32_t m) { + bar0_.Write32(0x03C00 + 4 * n, m); +} + +// 8.2.3.9.13 Transmit Packet Buffer Size — TXPBSIZE[n] (0x0CC00 + 0x4*n, +// n=0...7; RW) +void ebbrt::IxgbeDriver::WriteTxpbsize(uint32_t n, uint32_t m) { + bar0_.Write32(0x0CC00 + 0x4 * n, m); +} + +// 8.2.3.9.16 Tx Packet Buffer Threshold — TXPBTHRESH (0x04950+0x4*n, n=0...7; +// RW) +void ebbrt::IxgbeDriver::WriteTxpbThresh(uint32_t n, uint32_t m) { + bar0_.Write32(0x04950 + 0x4 * n, m); +} + +// 8.2.3.22.8 MAC Core Control 0 Register — HLREG0 (0x04240; RW) +void ebbrt::IxgbeDriver::WriteHlreg0(uint32_t m) { + auto reg = bar0_.Read32(0x04240); + bar0_.Write32(0x04240, reg | m); +} + +// 8.2.3.8.5 Receive Descriptor Tail — RDT[n] (0x01018 + 0x40*n, n=0...63 and +// 0x0D018 + 0x40*(n-64), n=64...127; RW) +void ebbrt::IxgbeDriver::WriteRdt_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01018 + 0x40 * n, m); +} +void ebbrt::IxgbeDriver::WriteRdt_2(uint32_t n, uint32_t m) { + bar0_.Write32(0x0D018 + 0x40 * n, m); +} + +// 8.2.3.8.4 Receive Descriptor Head — RDH[n] (0x01010 + 0x40*n, n=0...63 and +// 0x0D010 + 0x40*(n-64), n=64...127; RO) +void ebbrt::IxgbeDriver::WriteRdh_1(uint32_t n, uint32_t m) { + bar0_.Write32(0x01010 + 0x40 * n, m); +} +void ebbrt::IxgbeDriverRep::WriteRdh_1(uint32_t n, uint32_t m) { + root_.bar0_.Write32(0x01010 + 0x40 * n, m); +} + +uint16_t ebbrt::IxgbeDriver::ReadRdh_1(uint32_t n) { + auto reg = bar0_.Read32(0x01010 + 0x40 * n); + return reg & 0xFFFF; +} + +uint16_t ebbrt::IxgbeDriver::ReadRdt_1(uint32_t n) { + auto reg = bar0_.Read32(0x01018 + 0x40 * n); + return reg & 0xFFFF; +} + +void ebbrt::IxgbeDriver::SwfwSemRelease() { + SwsmSwesmbiClear(); + SwsmSmbiClear(); + ebbrt::kprintf("%s\n", __FUNCTION__); +} + +// 8.2.3.5.16 Interrupt Vector Allocation Registers — IVAR[n] (0x00900 + 4*n, +// n=0...63; RW) +void ebbrt::IxgbeDriver::WriteIvarAlloc0(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteIvarAllocval0(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} + +void ebbrt::IxgbeDriver::WriteIvarAlloc1(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteIvarAllocval1(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} + +void ebbrt::IxgbeDriver::WriteIvarAlloc2(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteIvarAllocval2(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} + +void ebbrt::IxgbeDriver::WriteIvarAlloc3(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} +void ebbrt::IxgbeDriver::WriteIvarAllocval3(uint32_t n, uint32_t m) { + auto reg = bar0_.Read32(0x00900 + 4 * n); + bar0_.Write32(0x00900 + 4 * n, reg | m); +} + +// 8.2.3.10.2 DCB Transmit Descriptor Plane Control and Status — RTTDCS +// (0x04900; RW) DMA-Tx +void ebbrt::IxgbeDriver::WriteRttdcs(uint32_t m) { + auto reg = bar0_.Read32(0x04900); + bar0_.Write32(0x04900, reg | m); +} +void ebbrt::IxgbeDriver::WriteRttdcsArbdisEn(uint32_t m) { + auto reg = bar0_.Read32(0x04900); + bar0_.Write32(0x04900, reg & m); +} + +// 8.2.3.10.3 DCB Transmit Packet Plane Control and Status- RTTPCS (0x0CD00; RW) +void ebbrt::IxgbeDriver::WriteRttpcs(uint32_t m) { bar0_.Write32(0x0CD00, m); } + +// 8.2.3.12.5 Security Rx Control — SECRXCTRL (0x08D00; RW) +void ebbrt::IxgbeDriver::WriteSecrxctrl_Rx_Dis(uint32_t m) { + auto reg = bar0_.Read32(0x08D00); + if (m) { + bar0_.Write32(0x08D00, reg | m); + } else { + bar0_.Write32(0x08D00, reg & ~(0x1 << 1)); + } +} + +// 8.2.3.12.6 Security Rx Status — SECRXSTAT (0x08D04; RO) +uint8_t ebbrt::IxgbeDriver::ReadSecrxstat_Sr_Rdy() { + auto reg = bar0_.Read32(0x08D04); + return reg & 0x1; +} + +// 8.2.3.23.59 Total Packets Received — TPR (0x040D0; RC) +uint32_t ebbrt::IxgbeDriver::ReadTpr() { + auto reg = bar0_.Read32(0x040D0); + ebbrt::kprintf("%s %d\n", __FUNCTION__, reg); + return reg; +} + +// 8.2.3.23.26 Good Packets Received Count — GPRC (0x04074; RO) +uint32_t ebbrt::IxgbeDriver::ReadGprc() { + auto reg = bar0_.Read32(0x04074); + ebbrt::kprintf("%s %d\n", __FUNCTION__, reg); + return reg; +} + +bool ebbrt::IxgbeDriver::SwfwSemAcquire() { + // polls SWSM.SMBI until 0b is read or timeout + // TODO: timeout after 10 ms + while (SwsmSmbiRead()) + ; + + // writes 1b to SWSM.SWESMBI bit + SwsmSwesmbiSet(); + + // polls SWSM.SWESMBI bit until read as 1b + // TODO: timeout of 3 secs + while (SwsmSwesmbiRead()) + ; + + return true; +} + +// 10.5.4 Software and Firmware Synchronization +bool ebbrt::IxgbeDriver::SwfwLockPhy() { + bool good = false; + +again: + if (!SwfwSemAcquire()) { + ebbrt::kabort("SwfwSemAcquire failed\n"); + } else { + ebbrt::kprintf("SWSM Sem acquired\n"); + } + + if ((ReadStatusLanId() == 0) && (ReadSwfwSyncSmBits(0x2) == 0) // SW_PHY_SM0 + && (ReadSwfwSyncSmBits(0x40) == 0)) // FW_PHY_SM0 + { + WriteSwfwSyncSmBits(0x2); // SW_PHY_SM0 + ebbrt::kprintf("SW_PHY_SMO written\n"); + good = true; + } else if ((ReadSwfwSyncSmBits(0x4) == 0) // SW_PHY_SM1 + && (ReadSwfwSyncSmBits(0x80) == 0)) // FW_PHY_SM1 + { + WriteSwfwSyncSmBits(0x4); // SW_PHY_SM1 + ebbrt::kprintf("SW_PHY_SM1 written\n"); + good = true; + } + + SwfwSemRelease(); + + if (!good) { + ebbrt::kprintf("%s: failed, trying again\n", __FUNCTION__); + ebbrt::clock::SleepMilli(20); + goto again; + } + + return true; +} +void ebbrt::IxgbeDriver::SwfwUnlockPhy() { + if (!SwfwSemAcquire()) { + ebbrt::kabort("SwfwSemAcquire failed\n"); + } else { + ebbrt::kprintf("SWSM Sem acquired\n"); + } + + if (ReadStatusLanId() == 0) { + WriteSwfwSyncSmBits2(~0x2); // SW_PHY_SM0 + } else { + WriteSwfwSyncSmBits2(~0x4); // SW_PHY_SM1 + } + + SwfwSemRelease(); + + ebbrt::clock::SleepMilli(10); +} + +void ebbrt::IxgbeDriver::StopDevice() { + ebbrt::kprintf("%s ", __PRETTY_FUNCTION__); + + // disable rx + WriteRxctrl(0x0); + + // disable tx + WriteDmatxctl(0xFFFFFFFE); + + // disable interrupts + WriteEimc(0x7FFFFFFF); + ReadEicr(); + + // disable each rx and tx queue + for (auto i = 0; i < 128; i++) { + // Bit 26, transmit software flush + WriteTxdctl(i, 0x04000000); + + if (i < 64) { + WriteRxdctl_1(i, 0x0); + } else { + WriteRxdctl_2(i - 64, 0x0); + } + } + + // from arrakis + ebbrt::clock::SleepMilli(2); + + // Master disable procedure + WriteCtrl(0x4); // PCIe Master Disable + while (ReadStatusPcieMes() != 1) + ; + ebbrt::kprintf("Ixgbe 82599 stop done\n"); +} + +void ebbrt::IxgbeDriver::GlobalReset() { + ebbrt::kprintf("%s ", __PRETTY_FUNCTION__); + + WriteCtrl(0x8); // Link Reset + WriteCtrl(0x4000000); // Device Reset + + // Note: To ensure that a global device reset has fully completed and that the + // 82599 responds to subsequent accesses, programmers must wait + // before approximately 1 ms after setting attempting to check + // if the bit has cleared or to access (read or write) any other device + // register. + ebbrt::clock::SleepMilli(2); + ReadCtrl(); +} + +/** + * ixgbe_init_hw_generic - Generic hardware initialization + * @hw: pointer to hardware structure + * + * Initialize the hardware by resetting the hardware, filling the bus info + * structure and media type, clears all on chip counters, initializes receive + * address registers, multicast table, VLAN filter table, calls routine to set + * up link and flow control settings, and leaves transmit and receive units + * disabled and uninitialized + **/ +void ebbrt::IxgbeDriver::Init() { + uint64_t d_mac; + + ebbrt::kprintf("%s ", __PRETTY_FUNCTION__); + bar0_.Map(); // allocate virtual memory + ebbrt::clock::SleepMilli(200); + ebbrt::kprintf("Sleep 200 ms\n"); + + StopDevice(); + GlobalReset(); + ebbrt::clock::SleepMilli(50); + GlobalReset(); + ebbrt::clock::SleepMilli(250); + + // disable interrupts + WriteEimc(0x7FFFFFFF); + ReadEicr(); + + // Let firmware know we have taken over + WriteCtrlExt(0x1 << 28); // DRV_LOAD + + // No snoop disable from FreeBSD ?? + WriteCtrlExt(0x1 << 16); // NS_DIS + + // Initialize flow-control registers + for (auto i = 0; i < 8; i++) { + if (i < 4) { + WriteFcttv(i, 0x0); + } + WriteFcrtl(i, 0x0); + WriteFcrth(i, 0x0); + } + + WriteFcrtv(0x0); + WriteFccfg(0x0); + + // Initialize Phy + PhyInit(); + + // Wait for EEPROM auto read + while (ReadEecAutoRd() == 0) { + }; // TODO: Timeout + ebbrt::kprintf("EEPROM auto read done\n"); + + ebbrt::clock::SleepMilli(200); + d_mac = ReadRal(0) | ((uint64_t)ReadRah(0) << 32); + // ebbrt::kprintf("mac %p valid = %x\n", d_mac, ReadRahAv(0)); + for (auto i = 0; i < 6; i++) { + mac_addr_[i] = (d_mac >> (i * 8)) & 0xFF; + } + ebbrt::kprintf( + "Mac Address: %02X:%02X:%02X:%02X:%02X:%02X\n", + static_cast(mac_addr_[0]), static_cast(mac_addr_[1]), + static_cast(mac_addr_[2]), static_cast(mac_addr_[3]), + static_cast(mac_addr_[4]), static_cast(mac_addr_[5])); + + // Wait for DMA initialization + while (ReadRdrxctlDmaidone() == 0) { + }; // TODO: Timeout + + // Wait for link to come up + while (!ReadLinksLinkUp()) { + }; // TODO: timeout + ebbrt::kprintf("Link is up\n"); + ebbrt::clock::SleepMilli(50); + + // clears on read + WriteEicr(0xFFFFFFFF); + + /* setup msix */ + // switch to msix mode + WriteGpie(0x1 << 4); // Multiple_MSIX + WriteGpie(0x1 << 31); // PBA_support + WriteGpie(0x1 << 5); // OCD + + // TODO: Set up management interrupt handler + + // Enable auto masking of interrupt + WriteGpie(0x1 << 30); // EIAME + +#ifdef RSC_EN + // TODO: RSC delay value, just a guess at (1 + 1) * 4us = 8 us + // Recommended value based on 7.3.2.1.1 + WriteGpie(0x1 << 11); +#endif + + /* FreeBSD: + * ixgbe_common.c - s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw) + * Places the MAC address in receive address register 0 and clears the rest + * of the receive address registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + */ + // Initialize RX filters + + /* Zero out the other receive addresses. */ + for (auto i = 1; i < 128; i++) { + WriteRal(i, 0x0); + WriteRah(i, 0x0); + } + + // clear mta + for (auto i = 0; i < 128; i++) { + WriteMta(i, 0x0); + } + + // No init uta tables? + + // set vlan filter table + for (auto i = 0; i < 128; i++) { + WriteVfta(i, 0x0); + } + + for (auto i = 0; i < 64; i++) { + // WritePfvlvf(i, 0x1 << 31); // VI_En bit 31 + WritePfvlvf(i, 0x0); + WritePfvlvfb(i, 0x0); + // WritePsrtypeZero(0x0); + } + + // PF Unicast Table Array + for (auto i = 0; i < 128; i++) { + WritePfuta(i, 0x0); + } + + // not sure why initing these tables? + for (auto i = 0; i < 128; i++) { + WriteFhft_1(i, 0x0); + if (i < 64) { + WriteFhft_2(i, 0x0); + } + } + + // enable ECC Reporting TODO - causes interrupts to be broken?? + // WriteRxfeccerr0(0x1 << 9); + + /**** Initialize RX filters ****/ + // FreeBSD if_ix.c - ixgbe_initialize_receive_units - Enable broadcast accept + WriteFctrl(0x1 << 10); // Set BAM = 1 + + // TODO VLNCTRL + WriteMcstctrl(0x0); + +#ifndef RSC_EN + WriteRxcsum(0x1 << 12); // IP payload checksum enable +#endif +// TODO RQTC + +#ifdef RSC_EN + WriteRfctl(0x0); +#else + WriteRfctl(0x1 << 5); +#endif + + for (auto i = 0; i < 256; i++) { + WriteMpsar(i, 0x0); + } + + // TODO RSSRK + + for (auto i = 0; i < 32; i++) { + WriteReta(i, 0x0); + } + + for (auto i = 0; i < 128; i++) { + WriteFtqf(i, 0x0); + WriteSaqf(i, 0x0); + WriteDaqf(i, 0x0); + WriteSdpqf(i, 0x0); + } + + // TODO SYNQF + // TODO ETQF + // TODO ETQS + + // Make sure RX CRC strip enabled in HLREG0 and RDRXCTL + WriteRdrxctlRSCFRSTSIZE(~(0x1F << 17)); // s/w set to 0 + WriteRdrxctl(0x1 << 1); // CRCStrip + WriteHlreg0(0x1 << 1); // CRCStrip + WriteRdrxctl(0x1 << 25); // RSCACKC s/w set to 1 + WriteRdrxctl(0x1 << 26); // FCOE_WRFIX s/w set to 1 + // TODO RSCDBU + + /***** END RX FILTER *****/ + + // Configure buffers etc. according to specification + // Section 4.6.11.3.4 (no DCB, no virtualization) + + /* Transmit Init: Set RTTDCS.ARBDIS to 1b. + * Program DTXMXSZRQ, TXPBSIZE, TXPBTHRESH, MTQC, and MNGTXMAP, according + * to the DCB and virtualization modes (see Section 4.6.11.3). + * Clear RTTDCS.ARBDIS to 0b. + */ + WriteRttdcs(0x1 << 6); + WriteDtxmxszrq(0xFFF); + WriteTxpbsize(0, 0xA0 << 10); + WriteTxpbThresh(0, 0xA0); + for (auto i = 1; i < 8; i++) { + WriteTxpbsize(i, 0x0); + WriteTxpbThresh(i, 0x0); + } + WriteMtqc(0x0); + WriteMngtxmap(0x0); + WriteRttdcsArbdisEn(~(0x1 << 6)); + + /* Receive Init: Program RXPBSIZE, MRQC, PFQDE, RTRUP2TC, MFLCN.RPFCE, + * and MFLCN.RFCE according to the DCB and virtualization modes + */ + WriteRxpbsize(0, 0x200 << 10); + for (auto i = 1; i < 8; i++) { + WriteRxpbsize(i, 0x0); + } + WriteMrqc(0x0); + WritePfqde(0x0); + WriteRtrup2tc(0x0); + WriteMflcn(0x0 << 2); + WriteMflcn(0x1 << 3); + // end DCB off, VT off + + // TODO Enable Jumbo Packets + + // disable relaxed ordering + for (auto i = 0; i < 128; i++) { + WriteDcaTxctrlTxdescWbro(i, ~(0x1 << 11)); // Txdesc_Wbro + + if (i < 64) { + WriteDcaRxctrl_1( + i, ~(0x1 << 15)); // Rx split header relax order enable, bit 15 + WriteDcaRxctrl_1( + i, ~(0x1 << 13)); // Rx data Write Relax Order Enable, bit 13 + } else { + WriteDcaRxctrl_2( + i - 64, ~(0x1 << 15)); // Rx split header relax order enable, bit 15 + WriteDcaRxctrl_2( + i - 64, ~(0x1 << 13)); // Rx data Write Relax Order Enable, bit 13 + } + } + +#ifdef DCA_ENABLE + // DCA_MODE = DCA 1.0 + WriteDcaCtrl(0x1 << 1); +#endif +} + +void ebbrt::IxgbeDriver::FinishSetup() { + // No snoop disable from FreeBSD ?? + WriteCtrlExt(0x1 << 16); // NS_DIS + for (size_t i = 0; i < Cpu::Count(); i++) { + WriteDcaRxctrlClear(i, ~(0x1 << 12)); // clear bit 12 + } + WriteEims(0xFFFF); +} + +// initializes per core rx/tx queues and interrupts +void ebbrt::IxgbeDriver::SetupMultiQueue(uint32_t i) { + if (!rcv_vector) { + rcv_vector = + event_manager->AllocateVector([this]() { ebb_->ReceivePoll(); }); + } + + // allocate memory for descriptor rings + ixgmq[i].reset(new e10Kq(i, Cpu::GetMyNode())); + + // not going to set up receive descripts greater than 63 + ebbrt::kbugon(i >= 64, "can't set up descriptors greater than 63\n"); + + // update register RDBAL, RDBAH with receive descriptor base address + WriteRdbal_1(i, ixgmq[i]->rxaddr_ & 0xFFFFFFFF); + WriteRdbah_1(i, (ixgmq[i]->rxaddr_ >> 32) & 0xFFFFFFFF); + + // set to number of bytes allocated for receive descriptor ring + WriteRdlen_1(i, ixgmq[i]->rx_size_bytes_); + + // program srrctl register + WriteSrrctlZero(i); + WriteSrrctl_1(i, RXBUFSZ / 1024); // bsizepacket + WriteSrrctl_1(i, (128 / 64) << 8); // bsizeheader + +// TODO headsplit adv +#ifdef RSC_EN + WriteSrrctl_1(i, 0x1 << 25); // desctype adv +#else + // legacy is default?? + WriteSrrctl_1(i, ~(0x7 << 25)); // desctype legacy +#endif + + WriteSrrctl_1(i, 0x1 << 28); // Drop_En + +#ifdef RSC_EN + // RSC set up + WriteRscctl(i, 0x3 << 2); // MAXDESC + WriteRscctl(i, 0x1); // RSCEN + WritePsrtypeZero(i); + WritePsrtype(i, 0x1 << 4); // Split received TCP packets after TCP header. +#endif + + // Set head and tail pointers + WriteRdt_1(i, 0x0); + WriteRdh_1(i, 0x0); + + // Set Enable bit in receive queue + WriteRxdctl_1_enable(i, 0x1 << 25); + // TODO: Timeout + while (ReadRxdctl_1_enable(i) == 0) + ; + + // setup RX interrupts for queue i + dev_.SetMsixEntry(i, rcv_vector, ebbrt::Cpu::GetByIndex(i)->apic_id()); + + // don't set up interrupts for tx since we have head writeback?? + auto qn = i / 2; // put into correct IVAR + + if ((i % 2) == 0) { // check if 2xN or 2xN + 1 + WriteIvarAlloc0(qn, i); // rx interrupt allocation corresponds to index i * + // 2 in MSI-X table + WriteIvarAllocval0(qn, 0x1 << 7); + } else { + WriteIvarAlloc2(qn, i << 16); + WriteIvarAllocval2(qn, 0x1 << 23); + } + + // must be greater than rsc delay + // WriteEitr(i, 0x80 << 3); // 7 * 2us = 14 us + WriteEitr(i, 0x7 << 3); // 16 * 2us = 32 us + + // 7.3.1.4 - Note that there are no EIAC(1)...EIAC(2) registers. + // The hardware setting for interrupts 16...63 is always auto clear. + if (i < 16) { + // enable auto clear + WriteEiac(0x1 << i); + } + + // enable interrupt + WriteEimsn(i / 32, (0x1 << (i % 32))); + + // make sure interupt is cleared + if (i < 16) { + WriteEicr(0x1 << i); + } + + // Enable RX + // disable RX_DIS + WriteSecrxctrl_Rx_Dis(0x1 << 1); + // TODO Timeout + while (ReadSecrxstat_Sr_Rdy() == 0) + ; + WriteRxctrl(0x1); + // enable RX_DIS + WriteSecrxctrl_Rx_Dis(0x0 << 1); + + // add buffer to each descriptor + for (size_t j = 0; j < NRXDESCS - 1; j++) { + auto rxphys = + reinterpret_cast((ixgmq[i]->circ_buffer_[j])->MutData()); + auto tail = ixgmq[i]->rx_tail_; + +// update buffer address for descriptor +#ifdef RSC_EN + rdesc_adv_rf_t* tmp; + tmp = reinterpret_cast(&(ixgmq[i]->rx_ring_[tail])); + + tmp->packet_buffer = rxphys; + // TODO only use this if enabling header splitting? + tmp->header_buffer = 0; +#else + ixgmq[i]->rx_ring_[tail].buffer_address = rxphys; +#endif + + ixgmq[i]->rx_tail_ = (tail + 1) % ixgmq[i]->rx_size_; + } + + // bump tail pts via register rdt to enable descriptor fetching by setting to + // length of ring minus one + WriteRdt_1(i, ixgmq[i]->rx_tail_); + +#ifdef DCA_ENABLE + auto myapic = ebbrt::Cpu::GetByIndex(i)->apic_id(); + + WriteDcaRxctrl(i, 0x1 << 5); // Descriptor DCA EN + WriteDcaRxctrl(i, 0x1 << 6); // Rx Header DCA EN + WriteDcaRxctrl(i, 0x1 << 7); // Payload DCA EN + + WriteDcaRxctrl(i, myapic << 24); // CPUID = apic id + + WriteDcaTxctrl(i, 0x1 << 5); // DCA Enable + WriteDcaTxctrl(i, myapic << 24); // CPUID = apic id +#endif + + // program base address registers + WriteTdbal(i, ixgmq[i]->txaddr_ & 0xFFFFFFFF); + WriteTdbah(i, (ixgmq[i]->txaddr_ >> 32) & 0xFFFFFFFF); + + // length must also be 128 byte aligned + WriteTdlen(i, ixgmq[i]->tx_size_bytes_); + +#ifdef TX_HEAD_WB + WriteTdwbal(i, (ixgmq[i]->txhwbaddr_ & 0xFFFFFFFF) | 0x1); + WriteTdwbah(i, (ixgmq[i]->txhwbaddr_ >> 32) & 0xFFFFFFFF); +#endif + + // enable transmit path + WriteDmatxctl_te(0x1); + + // transmit queue enable + WriteTxdctl(i, 0x1 << 25); + + // poll until set, TODO: Timeout + while (ReadTxdctl_enable(i) == 0) + ; + + // TODO: set up dca txctrl FreeBSD? + // clear TXdescWBROen + WriteDcaTxctrlTxdescWbro(i, ~(0x1 << 11)); +} + +// after packet received, need to make sure device can reuse +void ebbrt::IxgbeDriverRep::ReclaimRx() { + for (size_t i = 0; i < ixgmq_.rsc_chain_.size(); i++) { + // bump tail ptr + ixgmq_.rx_tail_ = (ixgmq_.rx_tail_ + 1) % ixgmq_.rx_size_; + auto n = ixgmq_.rsc_chain_[i].first; + + // reset buffer + ixgmq_.rx_ring_[n].raw[0] = 0; + ixgmq_.rx_ring_[n].raw[1] = 0; + // allocate new rx buffer + ixgmq_.circ_buffer_[n] = std::move(MakeUniqueIOBuf(IxgbeDriver::RXBUFSZ)); + auto rxphys = + reinterpret_cast((ixgmq_.circ_buffer_[n])->MutData()); + // update buffer with new adder + ixgmq_.rx_ring_[n].buffer_address = rxphys; + } +} + +// keep check for new packets to receive +// may wait for RSC to be done +uint32_t ebbrt::IxgbeDriverRep::GetRxBuf(uint32_t* len, uint64_t* bAddr, + uint64_t* rxflag, bool* process_rsc, + uint32_t* rnt) { +#ifdef RSC_EN + rdesc_adv_wb_t* tmp; + tmp = reinterpret_cast(&(ixgmq_.rx_ring_[ixgmq_.rx_head_])); + + // if rx packet not ready + if (!(tmp->dd)) { + return 1; + } + + auto rsccnt = tmp->rsccnt; + + // not RSC, handled normally + if (rsccnt == 0 && tmp->eop) { + *len = tmp->pkt_len; + + /* set rx flags */ + // TCP/UDP checksum + if (tmp->l4i) { + *rxflag |= RXFLAG_L4CS; + if (!(tmp->l4e)) { + *rxflag |= RXFLAG_L4CS_VALID; + } + } + + // Ipv4 checksum + if (tmp->ipcs) { + *rxflag |= RXFLAG_IPCS; + if (!(tmp->ipe)) { + *rxflag |= RXFLAG_IPCS_VALID; + } + } + + // reset descriptor + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[0] = 0; + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[1] = 0; + + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + return 0; + } + // not sure what case this is, no context started, eop is set but rsccnt > 0 + else if (rsccnt > 0 && tmp->eop && !(ixgmq_.rsc_used)) { + kbugon(tmp->next_descriptor_ptr > ixgmq_.rx_size_, + "RSC: NEXTP > RX_SIZE\n"); + + *len = tmp->pkt_len; + + /* set rx flags */ + // TCP/UDP checksum + if (tmp->l4i) { + *rxflag |= RXFLAG_L4CS; + if (!(tmp->l4e)) { + *rxflag |= RXFLAG_L4CS_VALID; + } + } + + // Ipv4 checksum + if (tmp->ipcs) { + *rxflag |= RXFLAG_IPCS; + if (!(tmp->ipe)) { + *rxflag |= RXFLAG_IPCS_VALID; + } + } + + // reset descriptor + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[0] = 0; + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[1] = 0; + + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + return 0; + } + // START NEW RSC CONTEXT + else if (rsccnt > 0 && !(tmp->eop) && !(ixgmq_.rsc_used)) { + kbugon(tmp->next_descriptor_ptr > ixgmq_.rx_size_, + "RSC: NEXTP > RX_SIZE\n"); + + ixgmq_.rsc_used = true; + ixgmq_.rsc_chain_.clear(); + ixgmq_.rsc_chain_.emplace_back( + std::make_pair(ixgmq_.rx_head_, static_cast(tmp->pkt_len))); + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + return 1; + } + // APPEND TO EXISTING RSC CONTEXT + else if (rsccnt > 0 && !(tmp->eop) && ixgmq_.rsc_used) { + kbugon(tmp->next_descriptor_ptr > ixgmq_.rx_size_, + "RSC: NEXTP > RX_SIZE\n"); + + ixgmq_.rsc_chain_.emplace_back( + std::make_pair(ixgmq_.rx_head_, static_cast(tmp->pkt_len))); + + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + return 1; + } + // LAST RSC CONTEXT + else if (rsccnt > 0 && tmp->eop && ixgmq_.rsc_used) { + ixgmq_.rsc_used = false; + + /* set rx flags */ + // TCP/UDP checksum + if (tmp->l4i) { + *rxflag |= RXFLAG_L4CS; + if (!(tmp->l4e)) { + *rxflag |= RXFLAG_L4CS_VALID; + } + } + + // Ipv4 checksum + if (tmp->ipcs) { + *rxflag |= RXFLAG_IPCS; + if (!(tmp->ipe)) { + *rxflag |= RXFLAG_IPCS_VALID; + } + } + + ixgmq_.rsc_chain_.emplace_back( + std::make_pair(ixgmq_.rx_head_, static_cast(tmp->pkt_len))); + + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + *process_rsc = true; + + return 0; + } else { + // shouldn't hit here + ebbrt::kabort("%s Not sure what state\n", __FUNCTION__); + } + +#else + // no RSC so just get one packet at a time + int c = static_cast(Cpu::GetMine()); + rdesc_legacy_t tmp; + tmp = ixgmq_.rx_ring_[ixgmq_.rx_head_]; + + if (tmp.dd && tmp.eop) { + *len = tmp.length; + + /* set rx flags */ + // TCP/UDP checksum + if (tmp.l4cs) { + *rxflag |= RXFLAG_L4CS; + if (!(tmp.tcpe)) { + *rxflag |= RXFLAG_L4CS_VALID; + } + } + + // Ipv4 checksum + if (tmp.ipcs) { + *rxflag |= RXFLAG_IPCS; + if (!(tmp.ipe)) { + *rxflag |= RXFLAG_IPCS_VALID; + } + } + + // reset descriptor + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[0] = 0; + ixgmq_.rx_ring_[ixgmq_.rx_head_].raw[1] = 0; + + // bump head ptr + ixgmq_.rx_head_ = (ixgmq_.rx_head_ + 1) % ixgmq_.rx_size_; + + return 0; + } +#endif + + return 1; +} + +void ebbrt::IxgbeDriverRep::ReceivePoll() { + uint32_t len; + uint64_t bAddr; + uint64_t rxflag; + bool process_rsc; + uint32_t count; + uint32_t rnt; + static bool ret = false; + process_rsc = false; + +retry: + rxflag = 0; + count = 0; + rnt = 0; + + // get address of buffer with data + while (GetRxBuf(&len, &bAddr, &rxflag, &process_rsc, &rnt) == 0) { + // hit last rsc context, start to process all buffers + if (process_rsc) { + ret = true; + process_rsc = false; + count++; + + auto n = ixgmq_.rsc_chain_[0].first; + auto rsclen = 0; + + // TODO hack - need to set actual length of data else there'll be 0's + // attached + ixgmq_.circ_buffer_[n]->SetLength(ixgmq_.rsc_chain_[0].second); + + rsclen += ixgmq_.rsc_chain_[0].second; + + // TODO - maybe find better way to rewrite this + auto b = std::move(ixgmq_.circ_buffer_[n]); + + for (size_t x = 1; x < ixgmq_.rsc_chain_.size(); x++) { + count++; + + auto n = ixgmq_.rsc_chain_[x].first; + // TODO hack - need to set actual length of data + ixgmq_.circ_buffer_[n]->SetLength(ixgmq_.rsc_chain_[x].second); + rsclen += ixgmq_.rsc_chain_[x].second; + b->PrependChain(std::move(ixgmq_.circ_buffer_[n])); + } + + ReclaimRx(); + + root_.itf_.Receive(std::move(b), rxflag); + } else { + // done with buffer addr above, now to reuse it + auto tail = ixgmq_.rx_tail_; + + // bump tail ptr + ixgmq_.rx_tail_ = (tail + 1) % ixgmq_.rx_size_; + + count++; + + if (count > 0) { + auto tail = ixgmq_.rx_tail_; + + // TODO hack - need to set actual length of data otherwise it'll send + // leftover 0's + ixgmq_.circ_buffer_[tail]->SetLength(len); + + // TODO hack - need to reallocate IOBuf after its been moved to Receive + auto b = std::move(ixgmq_.circ_buffer_[tail]); + + ixgmq_.circ_buffer_[tail] = + std::move(MakeUniqueIOBuf(IxgbeDriver::RXBUFSZ)); + auto rxphys = + reinterpret_cast((ixgmq_.circ_buffer_[tail])->MutData()); + + ixgmq_.rx_ring_[tail].buffer_address = rxphys; + + root_.itf_.Receive(std::move(b), rxflag); + } + } + } + + // TODO: Update tail register here or above? + if (count > 0) { + // update reg + WriteRdt_1(Cpu::GetMine(), ixgmq_.rx_tail_); + } + + // keep looping back once we see start of rsc context + if (likely(ret)) { + goto retry; + } +} + +ebbrt::IxgbeDriverRep::IxgbeDriverRep(const IxgbeDriver& root) + : root_(root), ixgq_(root_.GetQueue()), + ixgmq_(root.GetMultiQueue(Cpu::GetMine())), + receive_callback_([this]() { ReceivePoll(); }) { + this->ReceivePoll(); +} + +uint16_t ebbrt::IxgbeDriverRep::ReadRdh_1(uint32_t n) { + auto reg = root_.bar0_.Read32(0x01010 + 0x40 * n); + return reg & 0xFFFF; +} +uint16_t ebbrt::IxgbeDriverRep::ReadRdt_1(uint32_t n) { + auto reg = root_.bar0_.Read32(0x01018 + 0x40 * n); + return reg & 0xFFFF; +} + +void ebbrt::IxgbeDriverRep::WriteRdt_1(uint32_t n, uint32_t m) { + root_.bar0_.Write32(0x01018 + 0x40 * n, m); +} + +void ebbrt::IxgbeDriverRep::Run() { + while (1) { + ReceivePoll(); + } +} +void ebbrt::IxgbeDriverRep::WriteTdt_1(uint32_t n, uint32_t m) { + root_.bar0_.Write32(0x06018 + 0x40 * n, m); +} + +// 8.2.3.5.9 Extended Interrupt Mask Clear Registers — EIMC[n] +// (0x00AB0 + 4*(n-1), n=1...2; WO) +void ebbrt::IxgbeDriverRep::WriteEimcn(uint32_t n, uint32_t m) { + auto reg = root_.bar0_.Read32(0x00AB0 + 4 * n); + root_.bar0_.Write32(0x00AB0 + 4 * n, reg | m); +} diff --git a/src/native/IxgbeDriver.h b/src/native/IxgbeDriver.h new file mode 100644 index 00000000..46670a2d --- /dev/null +++ b/src/native/IxgbeDriver.h @@ -0,0 +1,473 @@ +// Copyright Boston University SESA Group 2013 - 2017. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_DRIVER_H_ +#define BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_DRIVER_H_ + +#include "../Align.h" +#include "../MulticoreEbb.h" +#include "../SpinLock.h" +#include "../StaticIOBuf.h" +#include "../UniqueIOBuf.h" +#include "Debug.h" +#include "Fls.h" +#include "Ixgbe.h" +#include "Net.h" +#include "PageAllocator.h" +#include "Pci.h" +#include "Pfn.h" +#include "SlabAllocator.h" + +// Receive Side Scaling (RSC) enabled +#define RSC_EN +// Direct Cache Access (DCA) enabled +#define DCA_ENABLE +// Transmit Header Writeback enabled +#define TX_HEAD_WB + +namespace ebbrt { + +// Per-core receive and transmit queue +typedef struct { + rdesc_legacy_t* rx_ring; + size_t rx_head; + size_t rx_tail; + size_t rx_size; + + tdesc_legacy_t* tx_ring; + uint32_t* tx_head; + size_t tx_tail; + size_t tx_last_tail; + size_t tx_size; + bool* tx_isctx; + + // buffers holding packet data + std::vector> circ_buffer; +} e10k_queue_t; + +class IxgbeDriverRep; + +class IxgbeDriver : public EthernetDevice { + public: + explicit IxgbeDriver(pci::Device& dev) + : itf_(network_manager->NewInterface(*this)), dev_(dev), + bar0_(dev.GetBar(0)) { + dev_.SetBusMaster(true); + + // set up interrupts, polling won't work after this + auto msix = dev_.MsixEnable(); + kbugon(!msix, "Ixgbe without msix is unsupported\n"); + + // each core gets a queue struct + ixgmq.resize(Cpu::Count()); + } + + static void Create(pci::Device& dev); + static bool Probe(pci::Device& dev) { + if (dev.GetVendorId() == kIxgbeVendorId && + dev.GetDeviceId() == kIxgbeDeviceId && dev.GetFunc() == 0) { + IxgbeDriver::Create(dev); + return true; + } + return false; + } + + void Run(); + void Send(std::unique_ptr buf, PacketInfo pinfo) override; + const EthernetAddress& GetMacAddress() override; + + protected: + static const constexpr uint16_t kIxgbeVendorId = 0x8086; + static const constexpr uint16_t kIxgbeDeviceId = 0x10F8; // 0x10FB; + + /* FreeBSD: + * RxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the + * number of receive descriptors allocated for each RX queue. Increasing this + * value allows the driver to buffer more incoming packets. Each descriptor + * is 16 bytes. A receive buffer is also allocated for each descriptor. + * + * Note: with 8 rings and a dual port card, it is possible to bump up + * against the system mbuf pool limit, you can tune nmbclusters + * to adjust for this. + */ + static const constexpr uint32_t NTXDESCS = 256; + static const constexpr uint32_t NRXDESCS = 256; + // static const constexpr uint32_t NTXDESCS = 4096; + // static const constexpr uint32_t NRXDESCS = 4096; + static const constexpr uint32_t RXBUFSZ = 4096; + // static const constexpr uint32_t RXBUFSZ = 16384; + + // Class with per core queue data structures + class e10Kq { + public: + e10Kq(size_t idx, Nid nid) + : rx_head_(0), rx_tail_(0), rx_size_(NRXDESCS), tx_tail_(0), + tx_last_tail_(0), tx_size_(NTXDESCS), idx_(idx), rxflag_(0), + rsc_used(false), hanc{0} { + + circ_buffer_.reserve(NRXDESCS); + for (uint32_t k = 0; k < NRXDESCS; k++) { + circ_buffer_.emplace_back(MakeUniqueIOBuf(RXBUFSZ, true)); + } + + // rsc_chain_ is a map between receive descriptor number and + // packet len, need packet len to extract out + // packet data else code will read redundant + // zeros if packet len does not use full buffer + // TODO: should be optimized + rsc_chain_.reserve(NRXDESCS); + + // RX ring buffer allocation + auto sz = align::Up(sizeof(rdesc_legacy_t) * NRXDESCS, 4096); + auto order = Fls(sz - 1) - pmem::kPageShift + 1; + auto page = page_allocator->Alloc(order, nid); + kbugon(page == Pfn::None(), "ixgbe: page allocation failed in %s", + __FUNCTION__); + auto addr = reinterpret_cast(page.ToAddr()); + memset(addr, 0, sz); + rx_ring_ = static_cast(addr); + + // TX ring buffer allocation + sz = align::Up(sizeof(tdesc_legacy_t) * NTXDESCS, 4096); + order = Fls(sz - 1) - pmem::kPageShift + 1; + page = page_allocator->Alloc(order, nid); + kbugon(page == Pfn::None(), "ixgbe: page allocation failed in %s", + __FUNCTION__); + addr = reinterpret_cast(page.ToAddr()); + memset(addr, 0, sz); + tx_ring_ = static_cast(addr); + + // TX adv context buffer allocation + sz = align::Up(sizeof(bool) * NTXDESCS, 4096); + order = Fls(sz - 1) - pmem::kPageShift + 1; + page = page_allocator->Alloc(order, nid); + kbugon(page == Pfn::None(), "ixgbe: page allocation failed in %s", + __FUNCTION__); + addr = reinterpret_cast(page.ToAddr()); + memset(addr, 0, sz); + tx_isctx_ = static_cast(addr); + +#ifdef TX_HEAD_WB + // TODO: not sure how much exactly to allocate for head wb addr + tx_head_ = (uint32_t*)malloc(4 * sizeof(uint32_t)); + memset(tx_head_, 0, 4 * sizeof(uint32_t)); + txhwbaddr_ = reinterpret_cast(tx_head_); + // txhwbaddr must be byte aligned + ebbrt::kbugon((txhwbaddr_ & 0x3) != 0, "txhwbaddr not byte aligned\n"); + kassert((txhwbaddr_ & 0x3) == 0); +#else + tx_head_ = 0; +#endif + + // get starting address, need to write to device registers + rxaddr_ = reinterpret_cast(rx_ring_); + txaddr_ = reinterpret_cast(tx_ring_); + rx_size_bytes_ = sizeof(rdesc_legacy_t) * NRXDESCS; + tx_size_bytes_ = sizeof(tdesc_legacy_t) * NTXDESCS; + + // must be 128 byte aligned + ebbrt::kbugon((rxaddr_ & 0x7F) != 0, "rx_addr_ not 128 byte aligned\n"); + ebbrt::kbugon((txaddr_ & 0x7F) != 0, "tx_addr_ not 128 byte aligned\n"); + ebbrt::kbugon((rx_size_bytes_ & 0x7F) != 0, + "rx_size_bytes_ not 128 byte aligned\n"); + ebbrt::kbugon((tx_size_bytes_ & 0x7F) != 0, + "tx_size_bytes_ not 128 byte aligned\n"); + } + + size_t rx_head_; + size_t rx_tail_; + size_t rx_size_; + size_t tx_tail_; + size_t tx_last_tail_; + size_t tx_size_; + size_t idx_; + size_t rx_size_bytes_; + size_t tx_size_bytes_; + uint64_t rxaddr_; + uint64_t txaddr_; + uint64_t txhwbaddr_; + uint64_t rxflag_; + + std::vector> circ_buffer_; + std::vector> rsc_chain_; + + rdesc_legacy_t* rx_ring_; + tdesc_legacy_t* tx_ring_; + bool* tx_isctx_; + bool rsc_used; + int hanc; +#ifdef TX_HEAD_WB + uint32_t* tx_head_; +#else + size_t tx_head_; +#endif + }; + + private: + EbbRef ebb_; + NetworkManager::Interface& itf_; + EthernetAddress mac_addr_; + + void Init(); + void PhyInit(); + void StopDevice(); + void GlobalReset(); + void SetupMultiQueue(uint32_t i); + void FinishSetup(); + + // device register writing code below + bool SwsmSmbiRead(); + void SwsmSmbiClear(); + + void SwsmSwesmbiSet(); + bool SwsmSwesmbiRead(); + void SwsmSwesmbiClear(); + + uint32_t ReadSwfwSyncSmBits(uint32_t m); + void WriteSwfwSyncSmBits(uint32_t m); + void WriteSwfwSyncSmBits2(uint32_t m); + + bool SwfwLockPhy(); + void SwfwUnlockPhy(); + bool SwfwSemAcquire(); + void SwfwSemRelease(); + + void WriteRxctrl(uint32_t m); + void WriteDmatxctl(uint32_t m); + void WriteDmatxctl_te(uint32_t m); + + void WriteEimc(uint32_t m); + void WriteEitr(uint32_t n, uint32_t m); + + void WriteTxdctl(uint32_t n, uint32_t m); + + void WriteRxdctl_1(uint32_t n, uint32_t m); + void WriteRxdctl_1_enable(uint32_t n, uint32_t m); + + void WriteRxdctl_2(uint32_t n, uint32_t m); + void WriteCtrl(uint32_t m); + void WriteCtrlExt(uint32_t m); + void WriteFcttv(uint32_t n, uint32_t m); + void WriteFcrtl(uint32_t n, uint32_t m); + void WriteFcrth(uint32_t n, uint32_t m); + void WriteFcrtv(uint32_t m); + void WriteFccfg(uint32_t m); + void WriteEerd(uint32_t m); + + void WriteCorectl(uint16_t m); + + void WriteAutoc(uint32_t m); + + void WriteEicr(uint32_t m); + void WriteGpie(uint32_t m); + + void WriteEims(uint32_t m); + + void WriteRal(uint32_t n, uint32_t m); + void WriteRah(uint32_t n, uint32_t m); + + void WriteMta(uint32_t n, uint32_t m); + void WriteVfta(uint32_t n, uint32_t m); + void WritePfvlvf(uint32_t n, uint32_t m); + void WritePfvlvfb(uint32_t n, uint32_t m); + void WriteMpsar(uint32_t n, uint32_t m); + void WriteFtqf(uint32_t n, uint32_t m); + void WriteSaqf(uint32_t n, uint32_t m); + void WriteDaqf(uint32_t n, uint32_t m); + void WriteSdpqf(uint32_t n, uint32_t m); + + void WriteFctrl(uint32_t m); + void WriteFhft_1(uint32_t n, uint32_t m); + void WriteFhft_2(uint32_t n, uint32_t m); + + void WritePfuta(uint32_t n, uint32_t m); + void WriteMcstctrl(uint32_t m); + + void WriteRttdqsel(uint32_t m); + void WriteRttbcnrc(uint32_t m); + + void WriteDcaTxctrlTxdescWbro(uint32_t n, uint32_t m); + void WriteDcaTxctrl(uint32_t n, uint32_t m); + void WriteDcaRxctrl(uint32_t n, uint32_t m); + void WriteDcaRxctrlClear(uint32_t n, uint32_t m); + void WriteDcaRxctrl_1(uint32_t n, uint32_t m); + void WriteDcaRxctrl_2(uint32_t n, uint32_t m); + void WriteDcaCtrl(uint32_t m); + + void WriteRdbal_1(uint32_t n, uint32_t m); + void WriteRdbal_2(uint32_t n, uint32_t m); + + void WriteRdbah_1(uint32_t n, uint32_t m); + void WriteRdbah_2(uint32_t n, uint32_t m); + + void WriteRdlen_1(uint32_t n, uint32_t m); + void WriteRdlen_2(uint32_t n, uint32_t m); + + void WriteSrrctl_1(uint32_t n, uint32_t m); + void WriteSrrctlZero(uint32_t n); + void WriteSrrctl_1_desctype(uint32_t n, uint32_t m); + void WriteRscdbu(uint32_t m); + + void WriteRdt_1(uint32_t n, uint32_t m); + void WriteRdh_1(uint32_t n, uint32_t m); + void WriteRdt_2(uint32_t n, uint32_t m); + + void WriteIvarAlloc0(uint32_t n, uint32_t m); + void WriteIvarAllocval0(uint32_t n, uint32_t m); + void WriteIvarAlloc1(uint32_t n, uint32_t m); + void WriteIvarAllocval1(uint32_t n, uint32_t m); + void WriteIvarAlloc2(uint32_t n, uint32_t m); + void WriteIvarAllocval2(uint32_t n, uint32_t m); + void WriteIvarAlloc3(uint32_t n, uint32_t m); + void WriteIvarAllocval3(uint32_t n, uint32_t m); + + void WriteSecrxctrl_Rx_Dis(uint32_t m); + + void WriteTdbal(uint32_t n, uint32_t m); + void WriteTdbah(uint32_t n, uint32_t m); + void WriteTdlen(uint32_t n, uint32_t m); + + void WriteTdh(uint32_t n, uint32_t m); + void WriteTdt(uint32_t n, uint32_t m); + + void WriteTdwbal(uint32_t n, uint32_t m); + void WriteTdwbah(uint32_t n, uint32_t m); + + void WriteHlreg0(uint32_t m); + void WriteRdrxctl(uint32_t m); + void WriteRdrxctlRSCFRSTSIZE(uint32_t m); + + void WriteEiac(uint32_t m); + void WriteEimsn(uint32_t n, uint32_t m); + + void WriteRfctl(uint32_t m); + + void WriteRscctl(uint32_t n, uint32_t m); + void WritePsrtype(uint32_t n, uint32_t m); + + void WriteRxcsum(uint32_t m); + void WriteTxpbthresh(uint32_t n, uint32_t m); + void WriteMrqc(uint32_t m); + void WriteDtxmxszrq(uint32_t m); + void WriteMflcn(uint32_t m); + void WriteReta(uint32_t n, uint32_t m); + + void WritePsrtypeZero(uint32_t n); + + void WriteRttdcs(uint32_t m); + void WriteRttdcsArbdisEn(uint32_t m); + void WriteRxpbsize(uint32_t n, uint32_t m); + void WriteTxpbsize(uint32_t n, uint32_t m); + void WriteTxpbThresh(uint32_t n, uint32_t m); + void WriteMtqc(uint32_t m); + void WritePfvtctl(uint32_t m); + void WriteRtrup2tc(uint32_t m); + void WriteRttup2tc(uint32_t m); + void WritePfqde(uint32_t m); + void WriteRttdt1c(uint32_t m); + void WriteRttdt2c(uint32_t n, uint32_t m); + void WriteRttpt2c(uint32_t n, uint32_t m); + void WriteRtrpt4c(uint32_t n, uint32_t m); + void WriteRttpcs(uint32_t m); + void WriteRtrpcs(uint32_t m); + void WritePfvml2flt(uint32_t n, uint32_t m); + + void WriteMngtxmap(uint32_t m); + + void WriteRxfeccerr0(uint32_t m); + + uint8_t ReadRdrxctlDmaidone(); + + void ReadEicr(); + bool ReadStatusPcieMes(); + uint8_t ReadStatusLanId(); + void ReadCtrl(); + bool ReadEerdDone(); + uint16_t ReadEerdData(); + uint16_t ReadEeprom(uint16_t offset); + uint8_t ReadAnlp1(); + uint8_t ReadAutocRestartAn(); + uint8_t ReadEecAutoRd(); + uint32_t ReadEims(); + + uint32_t ReadRal(uint32_t n); + uint16_t ReadRah(uint32_t n); + uint8_t ReadRahAv(uint32_t n); + + uint8_t ReadRxdctl_1_enable(uint32_t n); + uint8_t ReadSecrxstat_Sr_Rdy(); + + uint8_t ReadTxdctl_enable(uint32_t n); + + uint16_t ReadRdh_1(uint32_t n); + uint16_t ReadTdh(uint32_t n); + uint16_t ReadRdt_1(uint32_t n); + + // some statistics + uint32_t ReadTpr(); + uint32_t ReadGprc(); + bool ReadLinksLinkUp(); + + // Process packet functions + void ProcessPacket(uint32_t n); + uint32_t GetRxBuf(uint32_t* len, uint64_t* bAddr); + void SendPacket(uint32_t n); + + e10k_queue_t& GetQueue() const { return *ixgq; } + + e10Kq& GetMultiQueue(size_t index) const { return *ixgmq[index]; } + + pci::Device& dev_; + pci::Bar& bar0_; + + struct IxgbeRegs { + volatile uint32_t kIxgbeCtrl; + volatile uint32_t kIxgbeCtrlBak; + volatile uint32_t kIxgbeStatus; + }; + + e10k_queue_t* ixgq; + uint8_t rcv_vector{0}; + + std::vector> ixgmq; + + friend class IxgbeDriverRep; +}; // class IxgbeDriver + +class IxgbeDriverRep : public MulticoreEbb { + public: + explicit IxgbeDriverRep(const IxgbeDriver& root); + void Run(); + void ReceivePoll(); + void ReclaimTx(); + void ReclaimRx(); + void Send(std::unique_ptr buf, PacketInfo pinfo); + void AddContext(uint8_t idx, uint8_t maclen, uint16_t iplen, uint8_t l4len, + enum l4_type l4type); + void AddTx(const uint8_t* pa, uint64_t len, uint64_t totallen, bool first, + bool last, uint8_t ctx, bool ip_cksum, bool tcpudp_cksum); + + private: + uint16_t ReadRdh_1(uint32_t n); + uint16_t ReadRdt_1(uint32_t n); + void WriteRdt_1(uint32_t n, uint32_t m); + void WriteRdh_1(uint32_t n, uint32_t m); + // uint16_t ReadRdt_1(uint32_t n); + // uint16_t ReadRdh_1(uint32_t n); + void WriteTdt_1(uint32_t n, uint32_t m); + void WriteEimcn(uint32_t n, uint32_t m); + uint32_t GetRxBuf(uint32_t* len, uint64_t* bAddr, uint64_t* rxflag, + bool* process_rsc, uint32_t* rnt); + + const IxgbeDriver& root_; + e10k_queue_t& ixgq_; + IxgbeDriver::e10Kq& ixgmq_; + + EventManager::IdleCallback receive_callback_; + +}; // class IxgbeDriverRep + +} // namespace ebbrt + +#endif // BAREMETAL_SRC_INCLUDE_EBBRT_IXGBE_DRIVER_H_ diff --git a/src/native/Main.cc b/src/native/Main.cc index de40afc9..4ac2ed6e 100644 --- a/src/native/Main.cc +++ b/src/native/Main.cc @@ -47,7 +47,11 @@ #include "Trans.h" #include "VMem.h" #include "VMemAllocator.h" +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ +#include "IxgbeDriver.h" +#else #include "VirtioNet.h" +#endif namespace { bool started_once = false; @@ -146,18 +150,29 @@ ebbrt::Main(multiboot::Information* mbi) { Timer::Init(); smp::Init(); event_manager->ReceiveToken(); + #ifdef __EBBRT_ENABLE_NETWORKING__ NetworkManager::Init(); pci::Init(); + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + pci::RegisterProbe(IxgbeDriver::Probe); +#else pci::RegisterProbe(VirtioNetDriver::Probe); +#endif + pci::LoadDrivers(); network_manager->StartDhcp().Then([](Future fut) { fut.Get(); // Dhcp completed #ifdef __EBBRT_ENABLE_DISTRIBUTED_RUNTIME__ +// Currently not supported in BMNIC since we don't pass arguments +// via grub +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ Messenger::Init(); runtime::Init(); #endif +#endif #endif // run global ctors for (unsigned i = 0; i < (end_ctors - start_ctors); ++i) { diff --git a/src/native/Msr.h b/src/native/Msr.h index 4e3b7ba6..9adc0699 100644 --- a/src/native/Msr.h +++ b/src/native/Msr.h @@ -30,6 +30,17 @@ inline uint64_t Read(uint32_t index) { inline void Write(uint32_t index, uint64_t data) { uint32_t low = data; uint32_t high = data >> 32; + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // TODO - correct fix is here? + // GP fault happens when writing a 1 to bit #3 for kX2apicDcr, + // which is a reserved bit + // only happens in baremetal, VM prob virtualized this issue + if ((((data >> 2) & 0x1) == 1) && index == kX2apicDcr) { + low = (data & 0x3) | ((data & 0x4) << 1); + high = 0x0; + } +#endif asm volatile("wrmsr" : : "c"(index), "a"(low), "d"(high)); } } // namespace msr diff --git a/src/native/Net.cc b/src/native/Net.cc index 581cec54..d884b7e0 100644 --- a/src/native/Net.cc +++ b/src/native/Net.cc @@ -12,7 +12,8 @@ ebbrt::NetworkManager::NewInterface(EthernetDevice& ether_dev) { return *interface_; } -void ebbrt::NetworkManager::Interface::Receive(std::unique_ptr buf) { +void ebbrt::NetworkManager::Interface::Receive(std::unique_ptr buf, + uint64_t rxflag) { auto packet_len = buf->ComputeChainDataLength(); // Drop packets that are too small @@ -26,7 +27,7 @@ void ebbrt::NetworkManager::Interface::Receive(std::unique_ptr buf) { switch (ntohs(eth_header.type)) { case kEthTypeIp: { - ReceiveIp(eth_header, std::move(buf)); + ReceiveIp(eth_header, std::move(buf), rxflag); break; } case kEthTypeArp: { diff --git a/src/native/Net.h b/src/native/Net.h index d7b575cb..6ecf742d 100644 --- a/src/native/Net.h +++ b/src/native/Net.h @@ -25,9 +25,16 @@ #include "RcuTable.h" #include "SharedPoolAllocator.h" +// IP and L4 checksum offload bits +#define RXFLAG_IPCS (1 << 0) +#define RXFLAG_IPCS_VALID (1 << 1) +#define RXFLAG_L4CS (1 << 2) +#define RXFLAG_L4CS_VALID (1 << 3) + namespace ebbrt { struct PacketInfo { static const constexpr uint8_t kNeedsCsum = 1; + static const constexpr uint8_t kNeedsIpCsum = 2; static const constexpr uint8_t kGsoNone = 0; static const constexpr uint8_t kGsoTcpv4 = 1; static const constexpr uint8_t kGsoUdp = 3; @@ -230,7 +237,7 @@ class NetworkManager : public StaticSharedEbb { explicit Interface(EthernetDevice& ether_dev) : address_(nullptr), ether_dev_(ether_dev) {} - void Receive(std::unique_ptr buf); + void Receive(std::unique_ptr buf, uint64_t rxflag); void Send(std::unique_ptr buf, PacketInfo pinfo = PacketInfo()); void SendUdp(UdpPcb& pcb, Ipv4Address addr, uint16_t port, std::unique_ptr buf); @@ -260,11 +267,14 @@ class NetworkManager : public StaticSharedEbb { }; void ReceiveArp(EthernetHeader& eh, std::unique_ptr buf); - void ReceiveIp(EthernetHeader& eh, std::unique_ptr buf); + void ReceiveIp(EthernetHeader& eh, std::unique_ptr buf, + uint64_t rxflag = 0); void ReceiveIcmp(EthernetHeader& eh, Ipv4Header& ih, std::unique_ptr buf); - void ReceiveUdp(Ipv4Header& ih, std::unique_ptr buf); - void ReceiveTcp(const Ipv4Header& ih, std::unique_ptr buf); + void ReceiveUdp(Ipv4Header& ih, std::unique_ptr buf, + uint64_t rxflag = 0); + void ReceiveTcp(const Ipv4Header& ih, std::unique_ptr buf, + uint64_t rxflag = 0); void ReceiveDhcp(Ipv4Address from_addr, uint16_t from_port, std::unique_ptr buf); void EthArpSend(uint16_t proto, const Ipv4Header& ih, diff --git a/src/native/NetIcmp.cc b/src/native/NetIcmp.cc index e5c06153..6ecfde0d 100644 --- a/src/native/NetIcmp.cc +++ b/src/native/NetIcmp.cc @@ -19,9 +19,11 @@ void ebbrt::NetworkManager::Interface::ReceiveIcmp( auto dp = buf->GetMutDataPointer(); auto& icmp_header = dp.Get(); - // checksum +#ifndef __EBBRT_ENABLE_BAREMETAL_NIC__ + // software checksum if (IpCsum(*buf)) return; +#endif // if echo_request, send reply if (icmp_header.type == kIcmpEchoRequest) { @@ -43,9 +45,19 @@ void ebbrt::NetworkManager::Interface::ReceiveIcmp( ip_header.ttl = kIpDefaultTtl; ip_header.chksum = 0; + + PacketInfo pinfo; + pinfo.flags = 0; + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // hardware ip checksum offload + pinfo.flags |= PacketInfo::kNeedsIpCsum; +#else ip_header.chksum = ip_header.ComputeChecksum(); +#endif buf->Retreat(ip_header.HeaderLength()); - EthArpSend(kEthTypeIp, ip_header, std::move(buf)); + + EthArpSend(kEthTypeIp, ip_header, std::move(buf), pinfo); } } diff --git a/src/native/NetIp.cc b/src/native/NetIp.cc index 683311f3..339b613b 100644 --- a/src/native/NetIp.cc +++ b/src/native/NetIp.cc @@ -28,8 +28,9 @@ bool ebbrt::NetworkManager::Interface::ItfAddress::isLocalNetwork( } // Receive an Ipv4 packet -void ebbrt::NetworkManager::Interface::ReceiveIp( - EthernetHeader& eth_header, std::unique_ptr buf) { +void ebbrt::NetworkManager::Interface::ReceiveIp(EthernetHeader& eth_header, + std::unique_ptr buf, + uint64_t rxflag) { auto packet_len = buf->ComputeChainDataLength(); if (unlikely(packet_len < sizeof(Ipv4Header))) @@ -51,8 +52,21 @@ void ebbrt::NetworkManager::Interface::ReceiveIp( buf->TrimEnd(packet_len - tot_len); +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // baremetal checksum offload + if (unlikely((rxflag & RXFLAG_IPCS) == 0)) { + ebbrt::kprintf("%s RXFLAG_IPCS failed\n", __FUNCTION__); + return; + } + + if (unlikely((rxflag & RXFLAG_IPCS_VALID) == 0)) { + ebbrt::kprintf("%s RXFLAG_IPCS_VALID failed\n", __FUNCTION__); + return; + } +#else if (unlikely(ip_header.ComputeChecksum() != 0)) return; +#endif auto addr = Address(); // Unless the protocol is UDP or we have an address on this interface and the @@ -79,11 +93,11 @@ void ebbrt::NetworkManager::Interface::ReceiveIp( break; } case kIpProtoUDP: { - ReceiveUdp(ip_header, std::move(buf)); + ReceiveUdp(ip_header, std::move(buf), rxflag); break; } case kIpProtoTCP: { - ReceiveTcp(ip_header, std::move(buf)); + ReceiveTcp(ip_header, std::move(buf), rxflag); break; } } @@ -115,9 +129,14 @@ void ebbrt::NetworkManager::Interface::SendIp(std::unique_ptr buf, ih.chksum = 0; ih.src = src; ih.dst = dst; - ih.chksum = ih.ComputeChecksum(); +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // baremetal ip checksum offload + pinfo.flags |= PacketInfo::kNeedsIpCsum; +#else + ih.chksum = ih.ComputeChecksum(); kassert(ih.ComputeChecksum() == 0); +#endif pinfo.csum_start += sizeof(Ipv4Header); pinfo.hdr_len += sizeof(Ipv4Header); diff --git a/src/native/NetTcp.cc b/src/native/NetTcp.cc index e792f34a..a66d2eac 100644 --- a/src/native/NetTcp.cc +++ b/src/native/NetTcp.cc @@ -124,11 +124,11 @@ uint16_t ebbrt::NetworkManager::TcpPcb::Connect(Ipv4Address address, auto& tcp_header = dp.Get(); auto opts = reinterpret_cast((&tcp_header) + 1); *opts = htonl(0x02040000 | (1460 & 0xFFFF)); - auto nop_ptr = reinterpret_cast(opts+1); - nop_ptr[0] = 0x1; // NOP - nop_ptr[1] = 0x3; // WS type - nop_ptr[2] = 0x3; // opt length - nop_ptr[3] = kWindowShift; // shift value + auto nop_ptr = reinterpret_cast(opts + 1); + nop_ptr[0] = 0x1; // NOP + nop_ptr[1] = 0x3; // WS type + nop_ptr[2] = 0x3; // opt length + nop_ptr[3] = kWindowShift; // shift value entry_->EnqueueSegment(tcp_header, std::move(new_buf), kTcpSyn, optlen); auto now = ebbrt::clock::Wall::Now(); @@ -194,8 +194,9 @@ ebbrt::Ipv4Address ebbrt::NetworkManager::TcpPcb::GetRemoteAddress() { } // Receive a TCP packet on an interface -void ebbrt::NetworkManager::Interface::ReceiveTcp( - const Ipv4Header& ih, std::unique_ptr buf) { +void ebbrt::NetworkManager::Interface::ReceiveTcp(const Ipv4Header& ih, + std::unique_ptr buf, + uint64_t rxflag) { auto packet_len = buf->ComputeChainDataLength(); // Ensure we have a header @@ -210,10 +211,21 @@ void ebbrt::NetworkManager::Interface::ReceiveTcp( if (unlikely(addr->isBroadcast(ih.dst) || ih.dst.isMulticast())) return; - // XXX: Check if rxcsum is supported - // if (unlikely(IpPseudoCsum(*buf, ih.proto, ih.src, ih.dst))) - // return; +// XXX: Check if rxcsum is supported +// if (unlikely(IpPseudoCsum(*buf, ih.proto, ih.src, ih.dst))) +// return; +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + if (unlikely((rxflag & RXFLAG_L4CS) == 0)) { + ebbrt::kprintf("%s RXFLAG_L4CS failed\n"); + return; + } + + if (unlikely((rxflag & RXFLAG_L4CS_VALID) == 0)) { + ebbrt::kprintf("%s RXFLAG_L4CS_VALID failed\n"); + return; + } +#endif auto hdr_len = tcp_header.HdrLen(); if (unlikely(hdr_len < sizeof(TcpHeader) || hdr_len > packet_len)) return; @@ -427,11 +439,11 @@ void ebbrt::NetworkManager::ListeningTcpEntry::Input( auto& tcp_header = dp.Get(); auto opts = reinterpret_cast((&tcp_header) + 1); *opts = htonl(0x02040000 | (1460 & 0xFFFF)); - auto nop_ptr = reinterpret_cast(opts+1); - nop_ptr[0] = 0x1; // NOP - nop_ptr[1] = 0x3; // WS type - nop_ptr[2] = 0x3; // opt length - nop_ptr[3] = kWindowShift; // shift value + auto nop_ptr = reinterpret_cast(opts + 1); + nop_ptr[0] = 0x1; // NOP + nop_ptr[1] = 0x3; // WS type + nop_ptr[2] = 0x3; // opt length + nop_ptr[3] = kWindowShift; // shift value entry->EnqueueSegment(tcp_header, std::move(new_buf), kTcpSyn | kTcpAck, optlen); diff --git a/src/native/NetUdp.cc b/src/native/NetUdp.cc index 7da5fdc4..992ee21f 100644 --- a/src/native/NetUdp.cc +++ b/src/native/NetUdp.cc @@ -57,8 +57,9 @@ void ebbrt::NetworkManager::UdpPcb::Receive( } // Receive UDP packet on an interface -void ebbrt::NetworkManager::Interface::ReceiveUdp( - Ipv4Header& ip_header, std::unique_ptr buf) { +void ebbrt::NetworkManager::Interface::ReceiveUdp(Ipv4Header& ip_header, + std::unique_ptr buf, + uint64_t rxflag) { auto packet_len = buf->ComputeChainDataLength(); // Ensure we have a header @@ -75,10 +76,20 @@ void ebbrt::NetworkManager::Interface::ReceiveUdp( // trim any excess off the packet buf->TrimEnd(packet_len - ntohs(udp_header.length)); - // XXX: Check if rxcsum supported - // if (udp_header.checksum && - // IpPseudoCsum(*buf, ip_header.proto, ip_header.src, ip_header.dst)) - // return; +// XXX: Check if rxcsum supported +// if (udp_header.checksum && +// IpPseudoCsum(*buf, ip_header.proto, ip_header.src, ip_header.dst)) +// return; +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + if (unlikely((rxflag & RXFLAG_L4CS) == 0)) { + ebbrt::kprintf("%s RXFLAG_L4CS failed\n"); + return; + } + if (unlikely((rxflag & RXFLAG_L4CS_VALID) == 0)) { + ebbrt::kprintf("%s RXFLAG_L4CS_VALID failed\n"); + return; + } +#endif auto entry = network_manager->udp_pcbs_.find(ntohs(udp_header.dst_port)); diff --git a/src/native/Pci.cc b/src/native/Pci.cc index cdd53dae..2a740f7e 100644 --- a/src/native/Pci.cc +++ b/src/native/Pci.cc @@ -9,6 +9,7 @@ #include "../Align.h" #include "../ExplicitlyConstructed.h" #include "Debug.h" +#include "GeneralPurposeAllocator.h" #include "Io.h" #include "VMem.h" #include "VMemAllocator.h" @@ -34,7 +35,11 @@ uint8_t PciRead8(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset) { } uint16_t PciRead16(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset) { PciSetAddr(bus, device, func, offset); +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + return ebbrt::io::In16(kPciDataPort + (offset & 2)); +#else return ebbrt::io::In16(kPciDataPort); +#endif } uint32_t PciRead32(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset) { @@ -45,7 +50,12 @@ uint32_t PciRead32(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset) { void PciWrite16(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset, uint16_t val) { PciSetAddr(bus, device, func, offset); + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + ebbrt::io::Out16(kPciDataPort + (offset & 2), val); +#else ebbrt::io::Out16(kPciDataPort, val); +#endif } void PciWrite32(uint8_t bus, uint8_t device, uint8_t func, uint8_t offset, @@ -71,8 +81,12 @@ void EnumerateBus(uint8_t bus) { if (dev) continue; + dev.DumpAddress(); + dev.DumpInfo(); + if (dev.IsBridge()) { - ebbrt::kabort("Secondary bus unsupported!\n"); + // ebbrt::kabort("Secondary bus unsupported!\n"); + continue; } else { devices->emplace_back(bus, device, func); } @@ -101,6 +115,11 @@ void ebbrt::pci::Init() { devices.construct(); driver_probes.construct(); EnumerateAllBuses(); +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // TODO - Kludge to identify where NIC sits in device tree, should incorporate + // Dan's pull request for enumerating bridges + EnumerateBus(0x1); +#endif } void ebbrt::pci::RegisterProbe(std::function probe) { @@ -149,6 +168,18 @@ uint16_t ebbrt::pci::Function::GetCommand() const { return Read16(kCommandAddr); } +uint8_t ebbrt::pci::Function::GetClassCode() const { + return Read8(kClassCodeAddr); +} + +uint8_t ebbrt::pci::Function::GetFunc() const { return func_; } + +uint8_t ebbrt::pci::Function::GetSubclass() const { + return Read8(kSubclassAddr); +} + +uint8_t ebbrt::pci::Function::GetProgIf() const { return Read8(kProgIfAddr); } + uint8_t ebbrt::pci::Function::GetHeaderType() const { return Read8(kHeaderTypeAddr) & ~kHeaderMultifuncMask; } @@ -187,6 +218,11 @@ void ebbrt::pci::Function::DumpAddress() const { kprintf("%u:%u:%u\n", bus_, device_, func_); } +void ebbrt::pci::Function::DumpInfo() const { + kprintf("Vendor ID: 0x%x ", GetVendorId()); + kprintf("Device ID: 0x%x\n", GetDeviceId()); +} + ebbrt::pci::Bar::Bar(pci::Device& dev, uint32_t bar_val, uint8_t idx) : vaddr_(nullptr), is_64_(false), prefetchable_(false) { mmio_ = !(bar_val & kIoSpaceFlag); @@ -226,6 +262,8 @@ ebbrt::pci::Bar::~Bar() { kbugon(vaddr_ != nullptr, "pci::Bar: Need to free mapped region\n"); } +void* ebbrt::pci::Bar::GetVaddr() { return vaddr_; } + bool ebbrt::pci::Bar::Is64() const { return is_64_; } void ebbrt::pci::Bar::Map() { @@ -233,10 +271,21 @@ void ebbrt::pci::Bar::Map() { return; auto npages = align::Up(size_, pmem::kPageSize) >> pmem::kPageShift; + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + auto pf = std::make_unique(); + auto& ref = *pf; + auto page = vmem_allocator->Alloc(npages, std::move(pf)); + vaddr_ = reinterpret_cast(page.ToAddr()); + kbugon(page == Pfn::None(), "Failed to allocate virtual pages for mmio\n"); + vmem::MapMemory(page, Pfn::Down(addr_), size_); + ref.SetMap(page, Pfn::Down(addr_), size_); +#else auto page = vmem_allocator->Alloc(npages); vaddr_ = reinterpret_cast(page.ToAddr()); kbugon(page == Pfn::None(), "Failed to allocate virtual pages for mmio\n"); vmem::MapMemory(page, Pfn::Down(addr_), size_); +#endif } uint8_t ebbrt::pci::Bar::Read8(size_t offset) { @@ -415,7 +464,15 @@ void ebbrt::pci::Device::SetMsixEntry(size_t entry, uint8_t vector, uint8_t dest) { auto& msix_bar = GetBar(msix_bar_idx_); auto offset = msix_table_offset_ + entry * kMsixTableEntrySize; + +#ifdef __EBBRT_ENABLE_BAREMETAL_NIC__ + // more precise + msix_bar.Write32(offset + kMsixTableEntryAddrLow, 0xFEE00000 | dest << 12); + msix_bar.Write32(offset + kMsixTableEntryAddrHigh, 0x0); +#else msix_bar.Write32(offset + kMsixTableEntryAddr, 0xFEE00000 | dest << 12); +#endif + msix_bar.Write32(offset + kMsixTableEntryData, vector); MsixUnmaskEntry(entry); } diff --git a/src/native/Pci.h b/src/native/Pci.h index 15bc1dce..3d40af31 100644 --- a/src/native/Pci.h +++ b/src/native/Pci.h @@ -30,6 +30,7 @@ class Function { uint8_t GetLatencyTimer() const; uint8_t GetHeaderType() const; uint8_t GetBist() const; + uint8_t GetFunc() const; operator bool() const; bool IsMultifunc() const; @@ -40,6 +41,7 @@ class Function { void DisableInt(); void DumpAddress() const; + void DumpInfo() const; protected: static const constexpr uint8_t kVendorIdAddr = 0x00; @@ -87,6 +89,7 @@ class Bar { void Write8(size_t offset, uint8_t val); void Write16(size_t offset, uint16_t val); void Write32(size_t offset, uint32_t val); + void* GetVaddr(); private: static const constexpr uint32_t kIoSpaceFlag = 0x1; @@ -166,6 +169,8 @@ class Device : public Function { static const constexpr size_t kMsixTableEntryAddr = 0; static const constexpr size_t kMsixTableEntryData = 8; static const constexpr size_t kMsixTableEntryControl = 12; + static const constexpr size_t kMsixTableEntryAddrLow = 0; + static const constexpr size_t kMsixTableEntryAddrHigh = 4; static const constexpr uint32_t kMsixTableEntryControlMaskBit = 1; diff --git a/src/native/VirtioNet.cc b/src/native/VirtioNet.cc index 61064a2f..11f3f746 100644 --- a/src/native/VirtioNet.cc +++ b/src/native/VirtioNet.cc @@ -270,7 +270,7 @@ void ebbrt::VirtioNetRep::ReceivePoll() { // } b->Advance(sizeof(VirtioNetHeader)); - root_.itf_.Receive(std::move(b)); + root_.itf_.Receive(std::move(b), 0); } void ebbrt::VirtioNetRep::FillRxRing() { diff --git a/src/native/config.cmake b/src/native/config.cmake index f3831979..6e51122c 100644 --- a/src/native/config.cmake +++ b/src/native/config.cmake @@ -1,6 +1,7 @@ # EbbRT native platform-specific configuration option(__EBBRT_ENABLE_DISTRIBUTED_RUNTIME__ "Enable Distributed Runtime Support" ON) option(__EBBRT_ENABLE_NETWORKING__ "Enable Networking" ON) +option(__EBBRT_ENABLE_BAREMETAL_NIC__ "Enable Baremetal NIC" OFF) option(__EBBRT_ENABLE_TRACE__ "Enable Tracing Subsystem" OFF) option(LARGE_WINDOW_HACK "Enable Large TCP Window Hack" OFF) option(PAGE_CHECKER "Enable Page Checker" OFF) diff --git a/src/native/config.h.in b/src/native/config.h.in index dc9a773e..8ae06f0a 100644 --- a/src/native/config.h.in +++ b/src/native/config.h.in @@ -2,6 +2,7 @@ #cmakedefine __EBBRT_ENABLE_DISTRIBUTED_RUNTIME__ #cmakedefine __EBBRT_ENABLE_NETWORKING__ #cmakedefine __EBBRT_ENABLE_TRACE__ +#cmakedefine __EBBRT_ENABLE_BAREMETAL_NIC__ #cmakedefine LARGE_WINDOW_HACK #cmakedefine PAGE_CHECKER #cmakedefine VIRTIO_ZERO_COPY