Skip to content

efa: Enable creation of CQs with external memory #1608

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions debian/ibverbs-providers.symbols
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,16 @@ libefa.so.1 ibverbs-providers #MINVER#
EFA_1.1@EFA_1.1 26
EFA_1.2@EFA_1.2 43
EFA_1.3@EFA_1.3 50
EFA_1.4@EFA_1.4 58
efadv_create_driver_qp@EFA_1.0 24
efadv_create_qp_ex@EFA_1.1 26
efadv_query_device@EFA_1.1 26
efadv_query_ah@EFA_1.1 26
efadv_cq_from_ibv_cq_ex@EFA_1.2 43
efadv_create_cq@EFA_1.2 43
efadv_query_mr@EFA_1.3 50
efadv_query_qp_wqs@EFA_1.4 58
efadv_query_cq@EFA_1.4 58
libhns.so.1 ibverbs-providers #MINVER#
* Build-Depends-Package: libibverbs-dev
HNS_1.0@HNS_1.0 51
Expand Down
8 changes: 7 additions & 1 deletion kernel-headers/rdma/efa-abi.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
* Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#ifndef EFA_ABI_USER_H
Expand Down Expand Up @@ -56,6 +56,7 @@ struct efa_ibv_alloc_pd_resp {
enum {
EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0,
EFA_CREATE_CQ_WITH_SGID = 1 << 1,
EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF = 1 << 2,
};

struct efa_ibv_create_cq {
Expand All @@ -64,6 +65,10 @@ struct efa_ibv_create_cq {
__u16 num_sub_cqs;
__u8 flags;
__u8 reserved_58[5];
__aligned_u64 ext_mem_offset;
__aligned_u64 ext_mem_length;
__u32 ext_mem_fd;
__u8 reserved_120[4];
};

enum {
Expand Down Expand Up @@ -131,6 +136,7 @@ enum {
EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4,
EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5,
EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6,
EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 7,
};

struct efa_ibv_ex_query_device_resp {
Expand Down
2 changes: 2 additions & 0 deletions kernel-headers/rdma/rdma_netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,8 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */

RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */

RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, /* u8 */
/*
* Always the end
*/
Expand Down
2 changes: 1 addition & 1 deletion providers/efa/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if (ENABLE_LTTNG AND LTTNGUST_FOUND)
endif()

rdma_shared_provider(efa libefa.map
1 1.3.${PACKAGE_VERSION}
1 1.4.${PACKAGE_VERSION}
${TRACE_FILE}
efa.c
verbs.c
Expand Down
1 change: 1 addition & 0 deletions providers/efa/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ struct efa_cq {
size_t cqe_size;
uint8_t *buf;
size_t buf_size;
bool buf_mmaped;
uint32_t *db;
uint8_t *db_mmap_addr;
uint16_t cc; /* Consumer Counter */
Expand Down
89 changes: 62 additions & 27 deletions providers/efa/efadv.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2019-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/

#ifndef __EFADV_H__
Expand All @@ -16,38 +16,13 @@
extern "C" {
#endif

enum {
/* Values must match the values in efa-abi.h */
EFADV_QP_DRIVER_TYPE_SRD = 0,
};

struct ibv_qp *efadv_create_driver_qp(struct ibv_pd *ibvpd,
struct ibv_qp_init_attr *attr,
uint32_t driver_qp_type);

enum {
EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV = 1 << 0,
};

struct efadv_qp_init_attr {
uint64_t comp_mask;
uint32_t driver_qp_type;
uint16_t flags;
uint8_t sl;
uint8_t reserved[1];
};

struct ibv_qp *efadv_create_qp_ex(struct ibv_context *ibvctx,
struct ibv_qp_init_attr_ex *attr_ex,
struct efadv_qp_init_attr *efa_attr,
uint32_t inlen);

enum {
EFADV_DEVICE_ATTR_CAPS_RDMA_READ = 1 << 0,
EFADV_DEVICE_ATTR_CAPS_RNR_RETRY = 1 << 1,
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2,
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3,
EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV = 1 << 4,
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 5,
};

struct efadv_device_attr {
Expand Down Expand Up @@ -75,6 +50,45 @@ struct efadv_ah_attr {
int efadv_query_ah(struct ibv_ah *ibvah, struct efadv_ah_attr *attr,
uint32_t inlen);

enum {
/* Values must match the values in efa-abi.h */
EFADV_QP_DRIVER_TYPE_SRD = 0,
};

struct ibv_qp *efadv_create_driver_qp(struct ibv_pd *ibvpd,
struct ibv_qp_init_attr *attr,
uint32_t driver_qp_type);

enum {
EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV = 1 << 0,
};

struct efadv_qp_init_attr {
uint64_t comp_mask;
uint32_t driver_qp_type;
uint16_t flags;
uint8_t sl;
uint8_t reserved;
};

struct ibv_qp *efadv_create_qp_ex(struct ibv_context *ibvctx,
struct ibv_qp_init_attr_ex *attr_ex,
struct efadv_qp_init_attr *efa_attr,
uint32_t inlen);

struct efadv_wq_attr {
uint64_t comp_mask;
uint8_t *buffer;
uint32_t entry_size;
uint32_t num_entries;
uint32_t *doorbell;
uint32_t max_batch;
uint8_t reserved[4];
};

int efadv_query_qp_wqs(struct ibv_qp *ibvqp, struct efadv_wq_attr *sq_attr,
struct efadv_wq_attr *rq_attr, uint32_t inlen);

struct efadv_cq {
uint64_t comp_mask;
int (*wc_read_sgid)(struct efadv_cq *efadv_cq, union ibv_gid *sgid);
Expand All @@ -86,16 +100,37 @@ enum {
EFADV_WC_EX_WITH_IS_UNSOLICITED = 1 << 1,
};

enum {
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF = 1 << 0,
};

struct efadv_cq_init_attr {
uint64_t comp_mask;
uint64_t wc_flags;
uint64_t flags;
struct {
uint8_t *buffer;
uint64_t length;
uint64_t offset;
uint32_t fd;
uint8_t reserved[4];
} ext_mem_dmabuf;
};

struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
struct ibv_cq_init_attr_ex *attr_ex,
struct efadv_cq_init_attr *efa_attr,
uint32_t inlen);

struct efadv_cq_attr {
uint64_t comp_mask;
uint8_t *buffer;
uint32_t entry_size;
uint32_t num_entries;
};

int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen);

struct efadv_cq *efadv_cq_from_ibv_cq_ex(struct ibv_cq_ex *ibvcqx);

static inline int efadv_wc_read_sgid(struct efadv_cq *efadv_cq,
Expand Down
6 changes: 6 additions & 0 deletions providers/efa/libefa.map
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,9 @@ EFA_1.3 {
global:
efadv_query_mr;
} EFA_1.2;

EFA_1.4 {
global:
efadv_query_qp_wqs;
efadv_query_cq;
} EFA_1.3;
2 changes: 2 additions & 0 deletions providers/efa/man/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ rdma_man_pages(
efadv_create_driver_qp.3.md
efadv_create_qp_ex.3.md
efadv_query_ah.3.md
efadv_query_cq.3.md
efadv_query_device.3.md
efadv_query_mr.3.md
efadv_query_qp_wqs.3.md
)
30 changes: 30 additions & 0 deletions providers/efa/man/efadv_create_cq.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ Compatibility is handled using the comp_mask and inlen fields.
struct efadv_cq_init_attr {
uint64_t comp_mask;
uint64_t wc_flags;
uint64_t flags;
struct {
uint8_t *buffer;
uint64_t length;
uint64_t offset;
uint32_t fd;
uint8_t reserved[4];
} ext_mem_dmabuf;
};
```

Expand All @@ -65,6 +73,28 @@ struct efadv_cq_init_attr {
EFADV_WC_EX_WITH_IS_UNSOLICITED:
request for an option to check whether a receive WC is unsolicited.

*flags*
: A bitwise OR of the various values described below.

EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF:
create CQ with external memory provided via dmabuf.

*ext_mem_dmabuf*
: Structure containing information about external memory when using
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF flag.

buffer:
Pointer to the memory mapped in the process's virtual address space. The field is
optional, but if not provided, the use of CQ poll interfaces should be avoided.

length:
Length of the memory region to use.

fd:
File descriptor of the dmabuf.

offset:
Offset within the dmabuf.

# Completion iterator functions

Expand Down
69 changes: 69 additions & 0 deletions providers/efa/man/efadv_query_cq.3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
layout: page
title: EFADV_QUERY_CQ
section: 3
tagline: Verbs
date: 2025-04-15
header: "EFA Direct Verbs Manual"
footer: efa
---

# NAME

efadv_query_cq - Query EFA specific Completion Queue attributes

# SYNOPSIS

```c
#include <infiniband/efadv.h>

int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr,
uint32_t inlen);
```

# DESCRIPTION

**efadv_query_cq()** queries device-specific Completion Queue attributes.

Compatibility is handled using the comp_mask and inlen fields.

```c
struct efadv_cq_attr {
uint64_t comp_mask;
uint8_t *buffer;
uint32_t entry_size;
uint32_t num_entries;
};
```

*inlen*
: In: Size of struct efadv_cq_attr.

*comp_mask*
: Compatibility mask.

*buffer*
: Completion queue buffer.

*entry_size*
: Size of each completion queue entry.

*num_entries*
: Maximal number of entries in the completion queue.

# RETURN VALUE

**efadv_query_cq()** returns 0 on success, or the value of errno on failure
(which indicates the failure reason).

# SEE ALSO

**efadv**(7)

# NOTES

* Compatibility mask (comp_mask) is an out field and currently has no values.

# AUTHORS

Michael Margolin <mrgolin@amazon.com>
4 changes: 4 additions & 0 deletions providers/efa/man/efadv_query_device.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ struct efadv_device_attr {
requests in order to receive RDMA write with immediate and a WC generated for such
receive will be marked as unsolicited.

EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF:
Indicates that creating CQs with external memory buffers by passing dmabuf is
supported.

*max_rdma_size*
: Maximum RDMA transfer size in bytes.

Expand Down
Loading