Topics

[PATCH RFC 1/3] afxdp: add ebpf code for afxdp and xskmap.

William Tu
 

AF_XDP requires attaching an xdp program and xskmap for
each netdev. The patch provides these program/map and
the loading and attaching implementation.

Signed-off-by: William Tu <u9012063@...>
---
acinclude.m4 | 1 +
bpf/api.h | 6 ++++++
bpf/helpers.h | 2 ++
bpf/maps.h | 12 ++++++++++++
bpf/xdp.h | 34 +++++++++++++++++++++++++++++-----
lib/bpf.c | 41 +++++++++++++++++++++++++++++++++++++----
lib/bpf.h | 6 ++++--
vswitchd/bridge.c | 1 +
8 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 257de4e178a8..badc1e564487 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -376,6 +376,7 @@ AC_DEFUN([OVS_CHECK_BPF], [
AC_DEFINE([HAVE_BPF], [1],
[Define to 1 if BPF is available.])
BPF_LDADD="-lbpf -lelf"
+ AC_DEFINE([AFXDP_NETDEV], [1], [System uses the AFXDP module.])
AC_SUBST([BPF_LDADD])
fi
])
diff --git a/bpf/api.h b/bpf/api.h
index f2db1f729157..15a44b744e6a 100644
--- a/bpf/api.h
+++ b/bpf/api.h
@@ -131,6 +131,12 @@
sizeof(uint32_t), pin, __NR_CPUS__)
#endif

+#ifndef BPF_XSKMAP
+# define BPF_XSKMAP(name, max_elem) \
+ __BPF_MAP(name, BPF_MAP_TYPE_XSKMAP, 0, sizeof(int), \
+ sizeof(int), 1, max_elem)
+#endif
+
/** Classifier helper */

#ifndef BPF_H_DEFAULT
diff --git a/bpf/helpers.h b/bpf/helpers.h
index fc4c4933e189..424cc06bd6aa 100644
--- a/bpf/helpers.h
+++ b/bpf/helpers.h
@@ -163,6 +163,8 @@ static int (*bpf_skb_change_tail)(void *ctx, int len, int flags) =
(void *) BPF_FUNC_skb_change_tail;
static int (*bpf_get_hash_recalc)(void *ctx) =
(void *) BPF_FUNC_get_hash_recalc;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+ (void *) BPF_FUNC_redirect_map;

static int OVS_UNUSED vlan_push(void *ctx, ovs_be16 proto, u16 tci)
{
diff --git a/bpf/maps.h b/bpf/maps.h
index d0e39c79a098..63953f3b045f 100644
--- a/bpf/maps.h
+++ b/bpf/maps.h
@@ -153,6 +153,18 @@ BPF_PERCPU_ARRAY(percpu_executing_key,
1
);

+/* af_xdp map:
+ * key can be anything by our design,
+ * value is the receive queue id the userspace
+ * program received from
+ * we need one map per device
+ * the only parameter is the number of queues
+ */
+BPF_XSKMAP(xsks_map0, 4);
+BPF_XSKMAP(xsks_map1, 4);
+BPF_XSKMAP(xsks_map2, 4);
+BPF_XSKMAP(xsks_map3, 4);
+
struct ebpf_headers_t;
struct ebpf_metadata_t;

diff --git a/bpf/xdp.h b/bpf/xdp.h
index 15c379e7f43c..c007184e950a 100644
--- a/bpf/xdp.h
+++ b/bpf/xdp.h
@@ -68,10 +68,34 @@ static int xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
#endif
}

-__section("af_xdp")
-static int af_xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
+#define AFXDP_REDIRECT(xskmap) { \
+ int idx = 0; \
+ int flags = 0; \
+ int len = (long)ctx->data_end - (long)ctx->data; \
+ printt("ingress_ifindex %d rx_queue_index %d pkt len %d\n", \
+ ctx->ingress_ifindex, ctx->rx_queue_index, len); \
+ printt("send to queue xsk queue 0\n"); \
+ return bpf_redirect_map(xskmap, idx, flags); \
+}\
+
+/* For AFXDP, we need one map and one afxdp program per netdev */
+__section("afxdp0")
+static int af_xdp_ingress0(struct xdp_md *ctx OVS_UNUSED)
{
- /* TODO: see xdpsock_kern.c ans xdpsock_user.c */
- return XDP_PASS;
+ AFXDP_REDIRECT(&xsks_map0);
+}
+__section("afxdp1")
+static int af_xdp_ingress1(struct xdp_md *ctx OVS_UNUSED)
+{
+ AFXDP_REDIRECT(&xsks_map1);
+}
+__section("afxdp2")
+static int af_xdp_ingress2(struct xdp_md *ctx OVS_UNUSED)
+{
+ AFXDP_REDIRECT(&xsks_map2);
+}
+__section("afxdp3")
+static int af_xdp_ingress3(struct xdp_md *ctx OVS_UNUSED)
+{
+ AFXDP_REDIRECT(&xsks_map3);
}
-
diff --git a/lib/bpf.c b/lib/bpf.c
index 48c677e54659..d59ed1bf1e65 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -174,6 +174,7 @@ bpf_format_state(struct ds *ds, struct bpf_state *state)
bpf_format_prog(ds, &state->egress);
bpf_format_prog(ds, &state->ingress);
bpf_format_prog(ds, &state->xdp);
+ //bpf_format_prog(ds, &state->afxdp);
}

/* Populates 'state' with the standard set of programs and maps for openvswitch
@@ -194,6 +195,10 @@ bpf_get(struct bpf_state *state, bool verbose)
{&state->egress.fd, "egress/0"},
{&state->downcall.fd, "downcall/0"},
{&state->xdp.fd, "xdp/0"},
+ {&state->afxdp[0].fd, "afxdp0/0"},
+ {&state->afxdp[1].fd, "afxdp1/0"},
+ {&state->afxdp[2].fd, "afxdp2/0"},
+ {&state->afxdp[3].fd, "afxdp3/0"},
/* BPF Maps */
{&state->upcalls.fd, "upcalls"},
{&state->flow_table.fd, "flow_table"},
@@ -201,6 +206,10 @@ bpf_get(struct bpf_state *state, bool verbose)
{&state->tailcalls.fd, "tailcalls"},
{&state->execute_actions.fd, "execute_actions"},
{&state->dp_flow_stats.fd, "dp_flow_stats"},
+ {&state->xsks_map[0].fd, "xsks_map0"},
+ {&state->xsks_map[1].fd, "xsks_map1"},
+ {&state->xsks_map[2].fd, "xsks_map2"},
+ {&state->xsks_map[3].fd, "xsks_map3"},
};
int i, k, error = 0;
char buf[BUFSIZ];
@@ -217,7 +226,7 @@ bpf_get(struct bpf_state *state, bool verbose)
}
error = bpf_obj_get(buf);
if (error > 0) {
- VLOG_DBG("Loaded BPF object at %s fd %d", buf, error);
+ VLOG_INFO("Loaded BPF object at %s fd %d", buf, error);
*objs[i].fd = error;
error = 0;
continue;
@@ -229,7 +238,7 @@ bpf_get(struct bpf_state *state, bool verbose)

prog_array_fd = state->tailcalls.fd;

- VLOG_DBG("start loading/pinning program array\n");
+ VLOG_INFO("start loading/pinning program array\n");
for (k = 0; k < BPF_MAX_PROG_ARRAY; k++) {
struct stat s;
int prog_fd;
@@ -243,7 +252,7 @@ bpf_get(struct bpf_state *state, bool verbose)

prog_fd = bpf_obj_get(buf);
if (prog_fd > 0) {
- VLOG_DBG("Loaded BPF object at %s", buf);
+ VLOG_INFO("Loaded BPF object at %s", buf);
state->tailarray[k].fd = prog_fd;
error = bpf_map_update_elem(prog_array_fd, &k, &prog_fd, BPF_ANY);
if (error < 0) {
@@ -280,9 +289,17 @@ bpf_get(struct bpf_state *state, bool verbose)
state->downcall.name = xstrdup("ovs_cls_downcall");
state->upcalls.name = xstrdup("upcalls");
state->xdp.name = xstrdup("xdp");
+ state->afxdp[0].name = xstrdup("afxdp0");
+ state->afxdp[1].name = xstrdup("afxdp1");
+ state->afxdp[2].name = xstrdup("afxdp2");
+ state->afxdp[3].name = xstrdup("afxdp3");
state->flow_table.name = xstrdup("flow_table");
state->datapath_stats.name = xstrdup("datapath_stats");
state->dp_flow_stats.name = xstrdup("dp_flow_stats");
+ state->xsks_map[0].name = xstrdup("xsks_map0");
+ state->xsks_map[1].name = xstrdup("xsks_map1");
+ state->xsks_map[2].name = xstrdup("xsks_map2");
+ state->xsks_map[3].name = xstrdup("xsks_map3");
// add parser, lookup, action, deparser
state->tailcalls.name = xstrdup("tailcalls");

@@ -309,17 +326,33 @@ bpf_put(struct bpf_state *state)
xclose(state->downcall.fd, state->downcall.name);
xclose(state->upcalls.fd, state->upcalls.name);
xclose(state->xdp.fd, state->xdp.name);
+ xclose(state->afxdp[0].fd, state->afxdp[0].name);
+ xclose(state->afxdp[1].fd, state->afxdp[1].name);
+ xclose(state->afxdp[2].fd, state->afxdp[2].name);
+ xclose(state->afxdp[3].fd, state->afxdp[3].name);
xclose(state->flow_table.fd, "ovs_map_flow_table");
xclose(state->datapath_stats.fd, "ovs_datapath_stats");
xclose(state->dp_flow_stats.fd, state->dp_flow_stats.name);
+ xclose(state->xsks_map[0].fd, state->xsks_map[0].name);
+ xclose(state->xsks_map[1].fd, state->xsks_map[1].name);
+ xclose(state->xsks_map[2].fd, state->xsks_map[2].name);
+ xclose(state->xsks_map[3].fd, state->xsks_map[3].name);
free((void *)state->ingress.name);
free((void *)state->egress.name);
free((void *)state->downcall.name);
free((void *)state->upcalls.name);
free((void *)state->xdp.name);
+ free((void *)state->afxdp[0].name);
+ free((void *)state->afxdp[1].name);
+ free((void *)state->afxdp[2].name);
+ free((void *)state->afxdp[3].name);
free((void *)state->flow_table.name);
free((void *)state->datapath_stats.name);
free((void *)state->dp_flow_stats.name);
+ free((void *)state->xsks_map[0].name);
+ free((void *)state->xsks_map[1].name);
+ free((void *)state->xsks_map[2].name);
+ free((void *)state->xsks_map[3].name);
}

static void
@@ -335,7 +368,7 @@ process(struct bpf_object *obj)
int error;

VLOG_DBG(" - %s\n", title);
- if (strstr(title, "xdp")) {
+ if (strstr(title, "xdp")) { /* handle both xdp and afxdp */
error = bpf_program__set_xdp(prog);
} else {
error = bpf_program__set_sched_cls(prog); // or sched_act?
diff --git a/lib/bpf.h b/lib/bpf.h
index 4b5afaf4f77f..69091aa640d3 100644
--- a/lib/bpf.h
+++ b/lib/bpf.h
@@ -38,6 +38,7 @@ struct bpf_map {
struct bpf_state;
struct ds;

+#define MAX_AFXDP_DEV 4 /* Max number of supported AFXDP netdev */
#define BPF_MAX_PROG_ARRAY 64
struct bpf_state {
/* File descriptors for programs. */
@@ -46,14 +47,15 @@ struct bpf_state {
struct bpf_prog downcall; /* BPF_PROG_TYPE_SCHED_CLS */
struct bpf_prog tailarray[BPF_MAX_PROG_ARRAY];
struct bpf_prog xdp; /* BPF_PROG_TYPE_XDP */
- // william: struct bpf_prog parser, deparser, action,
-
+ struct bpf_prog afxdp[MAX_AFXDP_DEV]; /* BPF_PROG_TYPE_XDP:
+ each netdev need one */
struct bpf_map upcalls; /* BPF_MAP_TYPE_PERF_ARRAY */
struct bpf_map flow_table; /* BPF_MAP_TYPE_HASH */
struct bpf_map datapath_stats; /* BPF_MAP_TYPE_ARRAY */
struct bpf_map tailcalls; /* BPF_PROG_TYPE_PROG_ARRARY */
struct bpf_map execute_actions; /* BPF_MAP_TYPE_ARRAY */
struct bpf_map dp_flow_stats; /* BPF_MAP_TYPE_HASH */
+ struct bpf_map xsks_map[MAX_AFXDP_DEV]; /* BPF_MAP_TYPE_XSKMAP */
};

int bpf_get(struct bpf_state *state, bool verbose);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index ca6d73810420..56711c657dd4 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -518,6 +518,7 @@ init_ebpf(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
if (ovsthread_once_start(&once)) {
char *bpf_elf = xasprintf("%s/bpf/datapath.o", ovs_pkgdatadir());

+ VLOG_DBG("%s bpf elf: %s", __func__, bpf_elf);
error = bpf_init();
if (!error) {
error = bpf_load(bpf_elf);
--
2.7.4