Skip to content

Commit b12e189

Browse files
committed
efa: Add option to create CQ with external memory
Extend the EFA direct verbs interface to enable creation of CQs on top of pre-allocated memory buffers. The memory can be passed by supplying a dmabuf fd and offset. Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com> Reviewed-by: Yonatan Nachum <ynachum@amazon.com> Signed-off-by: Michael Margolin <mrgolin@amazon.com>
1 parent 5716443 commit b12e189

File tree

5 files changed

+93
-19
lines changed

5 files changed

+93
-19
lines changed

providers/efa/efa.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct efa_cq {
6969
size_t cqe_size;
7070
uint8_t *buf;
7171
size_t buf_size;
72+
bool buf_mmaped;
7273
uint32_t *db;
7374
uint8_t *db_mmap_addr;
7475
uint16_t cc; /* Consumer Counter */

providers/efa/efadv.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ enum {
2222
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2,
2323
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3,
2424
EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV = 1 << 4,
25+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 5,
2526
};
2627

2728
struct efadv_device_attr {
@@ -99,9 +100,21 @@ enum {
99100
EFADV_WC_EX_WITH_IS_UNSOLICITED = 1 << 1,
100101
};
101102

103+
enum {
104+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF = 1 << 0,
105+
};
106+
102107
struct efadv_cq_init_attr {
103108
uint64_t comp_mask;
104109
uint64_t wc_flags;
110+
uint64_t flags;
111+
struct {
112+
uint8_t *buffer;
113+
uint64_t length;
114+
uint64_t offset;
115+
uint32_t fd;
116+
uint8_t reserved[4];
117+
} ext_mem_dmabuf;
105118
};
106119

107120
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,

providers/efa/man/efadv_create_cq.3.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ Compatibility is handled using the comp_mask and inlen fields.
4747
struct efadv_cq_init_attr {
4848
uint64_t comp_mask;
4949
uint64_t wc_flags;
50+
uint64_t flags;
51+
struct {
52+
uint8_t *buffer;
53+
uint64_t length;
54+
uint64_t offset;
55+
uint32_t fd;
56+
uint8_t reserved[4];
57+
} ext_mem_dmabuf;
5058
};
5159
```
5260

@@ -65,6 +73,28 @@ struct efadv_cq_init_attr {
6573
EFADV_WC_EX_WITH_IS_UNSOLICITED:
6674
request for an option to check whether a receive WC is unsolicited.
6775

76+
*flags*
77+
: A bitwise OR of the various values described below.
78+
79+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF:
80+
create CQ with external memory provided via dmabuf.
81+
82+
*ext_mem_dmabuf*
83+
: Structure containing information about external memory when using
84+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF flag.
85+
86+
buffer:
87+
Pointer to the memory mapped in the process's virtual address space. The field is
88+
optional, but if not provided, the use of CQ poll interfaces should be avoided.
89+
90+
length:
91+
Length of the memory region to use.
92+
93+
fd:
94+
File descriptor of the dmabuf.
95+
96+
offset:
97+
Offset within the dmabuf.
6898

6999
# Completion iterator functions
70100

providers/efa/man/efadv_query_device.3.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ struct efadv_device_attr {
8585
requests in order to receive RDMA write with immediate and a WC generated for such
8686
receive will be marked as unsolicited.
8787

88+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF:
89+
Indicates that creating CQs with external memory buffers by passing dmabuf is
90+
supported.
91+
8892
*max_rdma_size*
8993
: Maximum RDMA transfer size in bytes.
9094

providers/efa/verbs.c

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ int efadv_query_device(struct ibv_context *ibvctx,
175175

176176
if (EFA_DEV_CAP(ctx, UNSOLICITED_WRITE_RECV))
177177
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV;
178+
179+
if (EFA_DEV_CAP(ctx, CQ_WITH_EXT_MEM_DMABUF))
180+
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF;
178181
}
179182

180183
if (vext_field_avail(typeof(*attr), max_rdma_size, inlen)) {
@@ -873,9 +876,9 @@ static void efa_cq_fill_pfns(struct efa_cq *cq,
873876
if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
874877
ibvcqx->read_dlid_path_bits = efa_wc_read_dlid_path_bits;
875878

876-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
879+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
877880
cq->dv_cq.wc_read_sgid = efa_wc_read_sgid;
878-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED))
881+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED)
879882
cq->dv_cq.wc_is_unsolicited = efa_wc_is_unsolicited;
880883
}
881884

@@ -925,12 +928,20 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
925928
if (!cq)
926929
return NULL;
927930

928-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
931+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
929932
cmd.flags |= EFA_CREATE_CQ_WITH_SGID;
930933

931934
num_sub_cqs = ctx->sub_cqs_per_cq;
932935
cmd.num_sub_cqs = num_sub_cqs;
933936
cmd.cq_entry_size = cqe_size;
937+
938+
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
939+
cmd.ext_mem_fd = efa_attr->ext_mem_dmabuf.fd;
940+
cmd.ext_mem_offset = efa_attr->ext_mem_dmabuf.offset;
941+
cmd.ext_mem_length = efa_attr->ext_mem_dmabuf.length;
942+
cmd.flags |= EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF;
943+
}
944+
934945
if (attr->channel)
935946
cmd.flags |= EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL;
936947

@@ -945,22 +956,30 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
945956

946957
sub_cq_size = cq->verbs_cq.cq.cqe;
947958
cq->cqn = resp.cq_idx;
948-
cq->buf_size = resp.q_mmap_size;
949959
cq->num_sub_cqs = num_sub_cqs;
950960
cq->cqe_size = cqe_size;
951961
cq->dev = ibvctx->device;
952962

953-
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED,
954-
ibvctx->cmd_fd, resp.q_mmap_key);
955-
if (cq->buf == MAP_FAILED)
956-
goto err_destroy_cq;
963+
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
964+
cq->buf_size = efa_attr->ext_mem_dmabuf.length;
965+
cq->buf = efa_attr->ext_mem_dmabuf.buffer;
966+
} else {
967+
cq->buf_size = resp.q_mmap_size;
968+
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED, ibvctx->cmd_fd,
969+
resp.q_mmap_key);
970+
if (cq->buf == MAP_FAILED)
971+
goto err_destroy_cq;
972+
973+
cq->buf_mmaped = true;
974+
}
957975

958-
buf = cq->buf;
959-
sub_buf_size = cq->cqe_size * sub_cq_size;
960-
for (i = 0; i < num_sub_cqs; i++) {
961-
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size,
962-
cq->cqe_size);
963-
buf += sub_buf_size;
976+
if (cq->buf) {
977+
buf = cq->buf;
978+
sub_buf_size = cq->cqe_size * sub_cq_size;
979+
for (i = 0; i < num_sub_cqs; i++) {
980+
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size, cq->cqe_size);
981+
buf += sub_buf_size;
982+
}
964983
}
965984

966985
if (resp.comp_mask & EFA_CREATE_CQ_RESP_DB_OFF) {
@@ -979,7 +998,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
979998
return &cq->verbs_cq.cq_ex;
980999

9811000
err_unmap_cq:
982-
munmap(cq->buf, cq->buf_size);
1001+
if (cq->buf_mmaped)
1002+
munmap(cq->buf, cq->buf_size);
9831003
err_destroy_cq:
9841004
ibv_cmd_destroy_cq(&cq->verbs_cq.cq);
9851005
err_free_cq:
@@ -991,29 +1011,33 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
9911011
struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe,
9921012
struct ibv_comp_channel *channel, int vec)
9931013
{
1014+
struct efadv_cq_init_attr efa_attr = {};
9941015
struct ibv_cq_init_attr_ex attr_ex = {
9951016
.cqe = ncqe,
9961017
.channel = channel,
9971018
.comp_vector = vec
9981019
};
9991020
struct ibv_cq_ex *ibvcqx;
10001021

1001-
ibvcqx = create_cq(ibvctx, &attr_ex, NULL);
1022+
ibvcqx = create_cq(ibvctx, &attr_ex, &efa_attr);
10021023

10031024
return ibvcqx ? ibv_cq_ex_to_cq(ibvcqx) : NULL;
10041025
}
10051026

10061027
struct ibv_cq_ex *efa_create_cq_ex(struct ibv_context *ibvctx,
10071028
struct ibv_cq_init_attr_ex *attr_ex)
10081029
{
1009-
return create_cq(ibvctx, attr_ex, NULL);
1030+
struct efadv_cq_init_attr efa_attr = {};
1031+
1032+
return create_cq(ibvctx, attr_ex, &efa_attr);
10101033
}
10111034

10121035
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10131036
struct ibv_cq_init_attr_ex *attr_ex,
10141037
struct efadv_cq_init_attr *efa_attr,
10151038
uint32_t inlen)
10161039
{
1040+
struct efadv_cq_init_attr local_efa_attr = {};
10171041
uint64_t supp_wc_flags = 0;
10181042
struct efa_context *ctx;
10191043

@@ -1043,7 +1067,8 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10431067
return NULL;
10441068
}
10451069

1046-
return create_cq(ibvctx, attr_ex, efa_attr);
1070+
memcpy(&local_efa_attr, efa_attr, min_t(uint32_t, inlen, sizeof(local_efa_attr)));
1071+
return create_cq(ibvctx, attr_ex, &local_efa_attr);
10471072
}
10481073

10491074
int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen)
@@ -1088,7 +1113,8 @@ int efa_destroy_cq(struct ibv_cq *ibvcq)
10881113
}
10891114

10901115
munmap(cq->db_mmap_addr, to_efa_dev(cq->dev)->pg_sz);
1091-
munmap(cq->buf, cq->buf_size);
1116+
if (cq->buf_mmaped)
1117+
munmap(cq->buf, cq->buf_size);
10921118

10931119
pthread_spin_destroy(&cq->lock);
10941120

0 commit comments

Comments
 (0)