From 22f7470c6921e0fca923498766981c4c2d591403 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 15 May 2025 18:41:58 +0300 Subject: [PATCH 1/3] Update kernel headers To commit: ?? ("RDMA/efa: Add CQ with external memory support"). Signed-off-by: Michael Margolin --- kernel-headers/rdma/efa-abi.h | 8 +++++++- kernel-headers/rdma/rdma_netlink.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel-headers/rdma/efa-abi.h b/kernel-headers/rdma/efa-abi.h index 11b94b0b0..f2bcef789 100644 --- a/kernel-headers/rdma/efa-abi.h +++ b/kernel-headers/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -56,6 +56,7 @@ struct efa_ibv_alloc_pd_resp { enum { EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, EFA_CREATE_CQ_WITH_SGID = 1 << 1, + EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF = 1 << 2, }; struct efa_ibv_create_cq { @@ -64,6 +65,10 @@ struct efa_ibv_create_cq { __u16 num_sub_cqs; __u8 flags; __u8 reserved_58[5]; + __aligned_u64 ext_mem_offset; + __aligned_u64 ext_mem_length; + __u32 ext_mem_fd; + __u8 reserved_120[4]; }; enum { @@ -131,6 +136,7 @@ enum { EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4, EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5, EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6, + EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 7, }; struct efa_ibv_ex_query_device_resp { diff --git a/kernel-headers/rdma/rdma_netlink.h b/kernel-headers/rdma/rdma_netlink.h index 9f9cf20c1..f41f0228f 100644 --- a/kernel-headers/rdma/rdma_netlink.h +++ b/kernel-headers/rdma/rdma_netlink.h @@ -580,6 +580,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */ + + RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, /* u8 */ /* * Always the end */ From dffe7615f7e4a23b1688609b9725ed6a001b3469 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 15 May 2025 19:15:59 +0300 Subject: [PATCH 2/3] efa: Add direct verbs query QP and CQ Extend EFA direct verbs to allow querying of QP and CQ parameters. At first stage those new verbs enable getting queues' virtual addresses as well as their structure what can be used for accelerator driven datapath. Reviewed-by: Daniel Kranzdorf Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin --- debian/ibverbs-providers.symbols | 3 + providers/efa/CMakeLists.txt | 2 +- providers/efa/efadv.h | 76 ++++++++++++++-------- providers/efa/libefa.map | 6 ++ providers/efa/man/CMakeLists.txt | 2 + providers/efa/man/efadv_query_cq.3.md | 69 ++++++++++++++++++++ providers/efa/man/efadv_query_qp_wqs.3.md | 78 +++++++++++++++++++++++ providers/efa/verbs.c | 56 +++++++++++++++- 8 files changed, 263 insertions(+), 29 deletions(-) create mode 100644 providers/efa/man/efadv_query_cq.3.md create mode 100644 providers/efa/man/efadv_query_qp_wqs.3.md diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols index d0da2e11f..e1897f381 100644 --- a/debian/ibverbs-providers.symbols +++ b/debian/ibverbs-providers.symbols @@ -170,6 +170,7 @@ libefa.so.1 ibverbs-providers #MINVER# EFA_1.1@EFA_1.1 26 EFA_1.2@EFA_1.2 43 EFA_1.3@EFA_1.3 50 + EFA_1.4@EFA_1.4 58 efadv_create_driver_qp@EFA_1.0 24 efadv_create_qp_ex@EFA_1.1 26 efadv_query_device@EFA_1.1 26 @@ -177,6 +178,8 @@ libefa.so.1 ibverbs-providers #MINVER# efadv_cq_from_ibv_cq_ex@EFA_1.2 43 efadv_create_cq@EFA_1.2 43 efadv_query_mr@EFA_1.3 50 + efadv_query_qp_wqs@EFA_1.4 58 + efadv_query_cq@EFA_1.4 58 libhns.so.1 ibverbs-providers #MINVER# * Build-Depends-Package: libibverbs-dev HNS_1.0@HNS_1.0 51 diff --git a/providers/efa/CMakeLists.txt b/providers/efa/CMakeLists.txt index e999f3b77..c4ce3c0fe 100644 --- a/providers/efa/CMakeLists.txt +++ b/providers/efa/CMakeLists.txt @@ -3,7 +3,7 @@ if (ENABLE_LTTNG AND LTTNGUST_FOUND) endif() rdma_shared_provider(efa libefa.map - 1 1.3.${PACKAGE_VERSION} + 1 1.4.${PACKAGE_VERSION} ${TRACE_FILE} efa.c verbs.c diff --git a/providers/efa/efadv.h b/providers/efa/efadv.h index 16c769571..0bb9f7e74 100644 --- a/providers/efa/efadv.h +++ b/providers/efa/efadv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef __EFADV_H__ @@ -16,32 +16,6 @@ extern "C" { #endif -enum { - /* Values must match the values in efa-abi.h */ - EFADV_QP_DRIVER_TYPE_SRD = 0, -}; - -struct ibv_qp *efadv_create_driver_qp(struct ibv_pd *ibvpd, - struct ibv_qp_init_attr *attr, - uint32_t driver_qp_type); - -enum { - EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV = 1 << 0, -}; - -struct efadv_qp_init_attr { - uint64_t comp_mask; - uint32_t driver_qp_type; - uint16_t flags; - uint8_t sl; - uint8_t reserved[1]; -}; - -struct ibv_qp *efadv_create_qp_ex(struct ibv_context *ibvctx, - struct ibv_qp_init_attr_ex *attr_ex, - struct efadv_qp_init_attr *efa_attr, - uint32_t inlen); - enum { EFADV_DEVICE_ATTR_CAPS_RDMA_READ = 1 << 0, EFADV_DEVICE_ATTR_CAPS_RNR_RETRY = 1 << 1, @@ -75,6 +49,45 @@ struct efadv_ah_attr { int efadv_query_ah(struct ibv_ah *ibvah, struct efadv_ah_attr *attr, uint32_t inlen); +enum { + /* Values must match the values in efa-abi.h */ + EFADV_QP_DRIVER_TYPE_SRD = 0, +}; + +struct ibv_qp *efadv_create_driver_qp(struct ibv_pd *ibvpd, + struct ibv_qp_init_attr *attr, + uint32_t driver_qp_type); + +enum { + EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV = 1 << 0, +}; + +struct efadv_qp_init_attr { + uint64_t comp_mask; + uint32_t driver_qp_type; + uint16_t flags; + uint8_t sl; + uint8_t reserved; +}; + +struct ibv_qp *efadv_create_qp_ex(struct ibv_context *ibvctx, + struct ibv_qp_init_attr_ex *attr_ex, + struct efadv_qp_init_attr *efa_attr, + uint32_t inlen); + +struct efadv_wq_attr { + uint64_t comp_mask; + uint8_t *buffer; + uint32_t entry_size; + uint32_t num_entries; + uint32_t *doorbell; + uint32_t max_batch; + uint8_t reserved[4]; +}; + +int efadv_query_qp_wqs(struct ibv_qp *ibvqp, struct efadv_wq_attr *sq_attr, + struct efadv_wq_attr *rq_attr, uint32_t inlen); + struct efadv_cq { uint64_t comp_mask; int (*wc_read_sgid)(struct efadv_cq *efadv_cq, union ibv_gid *sgid); @@ -96,6 +109,15 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, struct efadv_cq_init_attr *efa_attr, uint32_t inlen); +struct efadv_cq_attr { + uint64_t comp_mask; + uint8_t *buffer; + uint32_t entry_size; + uint32_t num_entries; +}; + +int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen); + struct efadv_cq *efadv_cq_from_ibv_cq_ex(struct ibv_cq_ex *ibvcqx); static inline int efadv_wc_read_sgid(struct efadv_cq *efadv_cq, diff --git a/providers/efa/libefa.map b/providers/efa/libefa.map index d27b58c16..13fac76a3 100644 --- a/providers/efa/libefa.map +++ b/providers/efa/libefa.map @@ -23,3 +23,9 @@ EFA_1.3 { global: efadv_query_mr; } EFA_1.2; + +EFA_1.4 { + global: + efadv_query_qp_wqs; + efadv_query_cq; +} EFA_1.3; diff --git a/providers/efa/man/CMakeLists.txt b/providers/efa/man/CMakeLists.txt index e7ad126ed..07ca589a3 100644 --- a/providers/efa/man/CMakeLists.txt +++ b/providers/efa/man/CMakeLists.txt @@ -3,6 +3,8 @@ rdma_man_pages( efadv_create_driver_qp.3.md efadv_create_qp_ex.3.md efadv_query_ah.3.md + efadv_query_cq.3.md efadv_query_device.3.md efadv_query_mr.3.md + efadv_query_qp_wqs.3.md ) diff --git a/providers/efa/man/efadv_query_cq.3.md b/providers/efa/man/efadv_query_cq.3.md new file mode 100644 index 000000000..ac9b0cf49 --- /dev/null +++ b/providers/efa/man/efadv_query_cq.3.md @@ -0,0 +1,69 @@ +--- +layout: page +title: EFADV_QUERY_CQ +section: 3 +tagline: Verbs +date: 2025-04-15 +header: "EFA Direct Verbs Manual" +footer: efa +--- + +# NAME + +efadv_query_cq - Query EFA specific Completion Queue attributes + +# SYNOPSIS + +```c +#include + +int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, + uint32_t inlen); +``` + +# DESCRIPTION + +**efadv_query_cq()** queries device-specific Completion Queue attributes. + +Compatibility is handled using the comp_mask and inlen fields. + +```c +struct efadv_cq_attr { + uint64_t comp_mask; + uint8_t *buffer; + uint32_t entry_size; + uint32_t num_entries; +}; +``` + +*inlen* +: In: Size of struct efadv_cq_attr. + +*comp_mask* +: Compatibility mask. + +*buffer* +: Completion queue buffer. + +*entry_size* +: Size of each completion queue entry. + +*num_entries* +: Maximal number of entries in the completion queue. + +# RETURN VALUE + +**efadv_query_cq()** returns 0 on success, or the value of errno on failure +(which indicates the failure reason). + +# SEE ALSO + +**efadv**(7) + +# NOTES + +* Compatibility mask (comp_mask) is an out field and currently has no values. + +# AUTHORS + +Michael Margolin diff --git a/providers/efa/man/efadv_query_qp_wqs.3.md b/providers/efa/man/efadv_query_qp_wqs.3.md new file mode 100644 index 000000000..8a824a0b9 --- /dev/null +++ b/providers/efa/man/efadv_query_qp_wqs.3.md @@ -0,0 +1,78 @@ +--- +layout: page +title: EFADV_QUERY_QP_WQS +section: 3 +tagline: Verbs +date: 2025-05-14 +header: "EFA Direct Verbs Manual" +footer: efa +--- + +# NAME + +efadv_query_qp_wqs - Query EFA specific Queue Pair work queue attributes + +# SYNOPSIS + +```c +#include + +int efadv_query_qp_wqs(struct ibv_qp *ibvqp, struct efadv_wq_attr *sq_attr, + struct efadv_wq_attr *rq_attr, uint32_t inlen); +``` + +# DESCRIPTION + +**efadv_query_qp_wqs()** queries device-specific Queue Pair work queue attributes. + +Compatibility is handled using the comp_mask and inlen fields. + +```c +struct efadv_wq_attr { + uint64_t comp_mask; + uint8_t *buffer; + uint32_t entry_size; + uint32_t num_entries; + uint32_t *doorbell; + uint32_t max_batch; + uint8_t reserved[4]; +}; +``` + +*inlen* +: In: Size of struct efadv_wq_attr. + +*comp_mask* +: Compatibility mask. + +*buffer* +: Queue buffer. + +*entry_size* +: Size of each entry in the queue. + +*num_entries* +: Maximal number of entries in the queue. + +*doorbell* +: Queue doorbell. + +*max_batch* +: Maximum batch size for queue submissions. + +# RETURN VALUE + +**efadv_query_qp_wqs()** returns 0 on success, or the value of errno on failure +(which indicates the failure reason). + +# SEE ALSO + +**efadv**(7) + +# NOTES + +* Compatibility mask (comp_mask) is an out field and currently has no values. + +# AUTHORS + +Michael Margolin diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c index 24bef2844..f4dbc91e5 100644 --- a/providers/efa/verbs.c +++ b/providers/efa/verbs.c @@ -1046,6 +1046,28 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, return create_cq(ibvctx, attr_ex, efa_attr); } +int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen) +{ + struct efa_cq *cq = to_efa_cq(ibvcq); + + if (!is_efa_dev(ibvcq->context->device)) { + verbs_err(verbs_get_ctx(ibvcq->context), "Not an EFA device\n"); + return EOPNOTSUPP; + } + + if (!vext_field_avail(typeof(*attr), num_entries, inlen)) { + verbs_err(verbs_get_ctx(ibvcq->context), "Compatibility issues\n"); + return EINVAL; + } + + attr->comp_mask = 0; + attr->buffer = cq->buf; + attr->entry_size = cq->cqe_size; + attr->num_entries = ibvcq->cqe; + + return 0; +} + struct efadv_cq *efadv_cq_from_ibv_cq_ex(struct ibv_cq_ex *ibvcqx) { struct efa_cq *cq = to_efa_cq_ex(ibvcqx); @@ -1644,7 +1666,7 @@ struct ibv_qp *efadv_create_qp_ex(struct ibv_context *ibvctx, !vext_field_avail(struct efadv_qp_init_attr, driver_qp_type, inlen) || efa_attr->comp_mask || - !is_reserved_cleared(efa_attr->reserved) || + efa_attr->reserved || (inlen > sizeof(*efa_attr) && !is_ext_cleared(efa_attr, inlen))) { verbs_err(verbs_get_ctx(ibvctx), "Compatibility issues\n"); errno = EINVAL; @@ -1688,6 +1710,38 @@ int efa_query_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, &cmd, sizeof(cmd)); } +int efadv_query_qp_wqs(struct ibv_qp *ibvqp, struct efadv_wq_attr *sq_attr, + struct efadv_wq_attr *rq_attr, uint32_t inlen) +{ + struct efa_qp *qp = to_efa_qp(ibvqp); + + if (!is_efa_dev(ibvqp->context->device)) { + verbs_err(verbs_get_ctx(ibvqp->context), "Not an EFA device\n"); + return EOPNOTSUPP; + } + + if (!vext_field_avail(typeof(*sq_attr), max_batch, inlen)) { + verbs_err(verbs_get_ctx(ibvqp->context), "Compatibility issues\n"); + return EINVAL; + } + + sq_attr->comp_mask = 0; + sq_attr->buffer = qp->sq.desc; + sq_attr->entry_size = sizeof(struct efa_io_tx_wqe); + sq_attr->num_entries = qp->sq.wq.wqe_cnt; + sq_attr->doorbell = qp->sq.wq.db; + sq_attr->max_batch = qp->sq.max_batch_wr; + + rq_attr->comp_mask = 0; + rq_attr->buffer = qp->rq.buf; + rq_attr->entry_size = sizeof(struct efa_io_rx_desc); + rq_attr->num_entries = qp->rq.wq.wqe_cnt; + rq_attr->doorbell = qp->rq.wq.db; + rq_attr->max_batch = qp->rq.wq.wqe_cnt; + + return 0; +} + int efa_query_qp_data_in_order(struct ibv_qp *ibvqp, enum ibv_wr_opcode op, uint32_t flags) { From 7e8fed39bc303474aad6e8bc6b4431d48ded9d41 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 15 May 2025 19:17:02 +0300 Subject: [PATCH 3/3] efa: Add option to create CQ with external memory Extend the EFA direct verbs interface to enable creation of CQs on top of pre-allocated memory buffers. The memory can be passed by supplying a dmabuf fd and offset. Reviewed-by: Daniel Kranzdorf Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin --- providers/efa/efa.h | 1 + providers/efa/efadv.h | 13 +++++ providers/efa/man/efadv_create_cq.3.md | 30 +++++++++++ providers/efa/man/efadv_query_device.3.md | 4 ++ providers/efa/verbs.c | 64 ++++++++++++++++------- 5 files changed, 93 insertions(+), 19 deletions(-) diff --git a/providers/efa/efa.h b/providers/efa/efa.h index 4a8f86a18..c1ec12fe7 100644 --- a/providers/efa/efa.h +++ b/providers/efa/efa.h @@ -69,6 +69,7 @@ struct efa_cq { size_t cqe_size; uint8_t *buf; size_t buf_size; + bool buf_mmaped; uint32_t *db; uint8_t *db_mmap_addr; uint16_t cc; /* Consumer Counter */ diff --git a/providers/efa/efadv.h b/providers/efa/efadv.h index 0bb9f7e74..b77f9a8ad 100644 --- a/providers/efa/efadv.h +++ b/providers/efa/efadv.h @@ -22,6 +22,7 @@ enum { EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2, EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3, EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV = 1 << 4, + EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 5, }; struct efadv_device_attr { @@ -99,9 +100,21 @@ enum { EFADV_WC_EX_WITH_IS_UNSOLICITED = 1 << 1, }; +enum { + EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF = 1 << 0, +}; + struct efadv_cq_init_attr { uint64_t comp_mask; uint64_t wc_flags; + uint64_t flags; + struct { + uint8_t *buffer; + uint64_t length; + uint64_t offset; + uint32_t fd; + uint8_t reserved[4]; + } ext_mem_dmabuf; }; struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, diff --git a/providers/efa/man/efadv_create_cq.3.md b/providers/efa/man/efadv_create_cq.3.md index 4166ab826..1ca1a20ee 100644 --- a/providers/efa/man/efadv_create_cq.3.md +++ b/providers/efa/man/efadv_create_cq.3.md @@ -47,6 +47,14 @@ Compatibility is handled using the comp_mask and inlen fields. struct efadv_cq_init_attr { uint64_t comp_mask; uint64_t wc_flags; + uint64_t flags; + struct { + uint8_t *buffer; + uint64_t length; + uint64_t offset; + uint32_t fd; + uint8_t reserved[4]; + } ext_mem_dmabuf; }; ``` @@ -65,6 +73,28 @@ struct efadv_cq_init_attr { EFADV_WC_EX_WITH_IS_UNSOLICITED: request for an option to check whether a receive WC is unsolicited. +*flags* +: A bitwise OR of the various values described below. + + EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF: + create CQ with external memory provided via dmabuf. + +*ext_mem_dmabuf* +: Structure containing information about external memory when using + EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF flag. + + buffer: + Pointer to the memory mapped in the process's virtual address space. The field is + optional, but if not provided, the use of CQ poll interfaces should be avoided. + + length: + Length of the memory region to use. + + fd: + File descriptor of the dmabuf. + + offset: + Offset within the dmabuf. # Completion iterator functions diff --git a/providers/efa/man/efadv_query_device.3.md b/providers/efa/man/efadv_query_device.3.md index fa43b779c..c41bc3d9d 100644 --- a/providers/efa/man/efadv_query_device.3.md +++ b/providers/efa/man/efadv_query_device.3.md @@ -85,6 +85,10 @@ struct efadv_device_attr { requests in order to receive RDMA write with immediate and a WC generated for such receive will be marked as unsolicited. + EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF: + Indicates that creating CQs with external memory buffers by passing dmabuf is + supported. + *max_rdma_size* : Maximum RDMA transfer size in bytes. diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c index f4dbc91e5..760333bae 100644 --- a/providers/efa/verbs.c +++ b/providers/efa/verbs.c @@ -175,6 +175,9 @@ int efadv_query_device(struct ibv_context *ibvctx, if (EFA_DEV_CAP(ctx, UNSOLICITED_WRITE_RECV)) attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV; + + if (EFA_DEV_CAP(ctx, CQ_WITH_EXT_MEM_DMABUF)) + attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF; } if (vext_field_avail(typeof(*attr), max_rdma_size, inlen)) { @@ -873,9 +876,9 @@ static void efa_cq_fill_pfns(struct efa_cq *cq, if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) ibvcqx->read_dlid_path_bits = efa_wc_read_dlid_path_bits; - if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)) + if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID) cq->dv_cq.wc_read_sgid = efa_wc_read_sgid; - if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED)) + if (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED) cq->dv_cq.wc_is_unsolicited = efa_wc_is_unsolicited; } @@ -925,12 +928,20 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx, if (!cq) return NULL; - if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)) + if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID) cmd.flags |= EFA_CREATE_CQ_WITH_SGID; num_sub_cqs = ctx->sub_cqs_per_cq; cmd.num_sub_cqs = num_sub_cqs; cmd.cq_entry_size = cqe_size; + + if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) { + cmd.ext_mem_fd = efa_attr->ext_mem_dmabuf.fd; + cmd.ext_mem_offset = efa_attr->ext_mem_dmabuf.offset; + cmd.ext_mem_length = efa_attr->ext_mem_dmabuf.length; + cmd.flags |= EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF; + } + if (attr->channel) cmd.flags |= EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL; @@ -945,22 +956,30 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx, sub_cq_size = cq->verbs_cq.cq.cqe; cq->cqn = resp.cq_idx; - cq->buf_size = resp.q_mmap_size; cq->num_sub_cqs = num_sub_cqs; cq->cqe_size = cqe_size; cq->dev = ibvctx->device; - cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED, - ibvctx->cmd_fd, resp.q_mmap_key); - if (cq->buf == MAP_FAILED) - goto err_destroy_cq; + if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) { + cq->buf_size = efa_attr->ext_mem_dmabuf.length; + cq->buf = efa_attr->ext_mem_dmabuf.buffer; + } else { + cq->buf_size = resp.q_mmap_size; + cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED, ibvctx->cmd_fd, + resp.q_mmap_key); + if (cq->buf == MAP_FAILED) + goto err_destroy_cq; + + cq->buf_mmaped = true; + } - buf = cq->buf; - sub_buf_size = cq->cqe_size * sub_cq_size; - for (i = 0; i < num_sub_cqs; i++) { - efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size, - cq->cqe_size); - buf += sub_buf_size; + if (cq->buf) { + buf = cq->buf; + sub_buf_size = cq->cqe_size * sub_cq_size; + for (i = 0; i < num_sub_cqs; i++) { + efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size, cq->cqe_size); + buf += sub_buf_size; + } } if (resp.comp_mask & EFA_CREATE_CQ_RESP_DB_OFF) { @@ -979,7 +998,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx, return &cq->verbs_cq.cq_ex; err_unmap_cq: - munmap(cq->buf, cq->buf_size); + if (cq->buf_mmaped) + munmap(cq->buf, cq->buf_size); err_destroy_cq: ibv_cmd_destroy_cq(&cq->verbs_cq.cq); err_free_cq: @@ -991,6 +1011,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx, struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe, struct ibv_comp_channel *channel, int vec) { + struct efadv_cq_init_attr efa_attr = {}; struct ibv_cq_init_attr_ex attr_ex = { .cqe = ncqe, .channel = channel, @@ -998,7 +1019,7 @@ struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe, }; struct ibv_cq_ex *ibvcqx; - ibvcqx = create_cq(ibvctx, &attr_ex, NULL); + ibvcqx = create_cq(ibvctx, &attr_ex, &efa_attr); return ibvcqx ? ibv_cq_ex_to_cq(ibvcqx) : NULL; } @@ -1006,7 +1027,9 @@ struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe, struct ibv_cq_ex *efa_create_cq_ex(struct ibv_context *ibvctx, struct ibv_cq_init_attr_ex *attr_ex) { - return create_cq(ibvctx, attr_ex, NULL); + struct efadv_cq_init_attr efa_attr = {}; + + return create_cq(ibvctx, attr_ex, &efa_attr); } struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, @@ -1014,6 +1037,7 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, struct efadv_cq_init_attr *efa_attr, uint32_t inlen) { + struct efadv_cq_init_attr local_efa_attr = {}; uint64_t supp_wc_flags = 0; struct efa_context *ctx; @@ -1043,7 +1067,8 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx, return NULL; } - return create_cq(ibvctx, attr_ex, efa_attr); + memcpy(&local_efa_attr, efa_attr, min_t(uint32_t, inlen, sizeof(local_efa_attr))); + return create_cq(ibvctx, attr_ex, &local_efa_attr); } int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen) @@ -1088,7 +1113,8 @@ int efa_destroy_cq(struct ibv_cq *ibvcq) } munmap(cq->db_mmap_addr, to_efa_dev(cq->dev)->pg_sz); - munmap(cq->buf, cq->buf_size); + if (cq->buf_mmaped) + munmap(cq->buf, cq->buf_size); pthread_spin_destroy(&cq->lock);