Topics

[RFC PATCH 07/11] bpf: implement OVS BPF datapath.

William Tu
 

This patch adds the OVS-eBPF datapath implementation for dpif-bpf.
Three stages are added: parse, lookup, and actions. Each stages are
tail called to the next stage. When executing multiple actions,
the current action also tail calls the subsequent action, based on
the result of flow table lookup.

The protocol headers are auto-generated and defined at generated_headers.h.
The bpf_flow_key is extracted using the P4-to-eBPF compiler from
the bcc project. A couple of manual tweaks are required, see parser.h.

Signed-off-by: William Tu <u9012063@...>
Signed-off-by: Yifeng Sun <pkusunyifeng@...>
Signed-off-by: Joe Stringer <joe@...>
Co-authored-by: Joe Stringer <joe@...>
Co-authored-by: Yifeng Sun <pkusunyifeng@...>
---
Makefile.am | 1 +
bpf/action.h | 628 ++++++++++++++++++++++++++++++++++++++++++++++++
bpf/api.h | 279 +++++++++++++++++++++
bpf/automake.mk | 60 +++++
bpf/datapath.c | 187 ++++++++++++++
bpf/datapath.h | 71 ++++++
bpf/generated_headers.h | 185 ++++++++++++++
bpf/helpers.h | 209 ++++++++++++++++
bpf/lookup.h | 227 +++++++++++++++++
bpf/maps.h | 170 +++++++++++++
bpf/odp-bpf.h | 254 ++++++++++++++++++++
bpf/openvswitch.h | 49 ++++
bpf/ovs-p4.h | 112 +++++++++
bpf/ovs-proto.p4 | 329 +++++++++++++++++++++++++
bpf/parser.h | 412 +++++++++++++++++++++++++++++++
bpf/xdp.h | 35 +++
16 files changed, 3208 insertions(+)
create mode 100644 bpf/action.h
create mode 100644 bpf/api.h
create mode 100644 bpf/automake.mk
create mode 100644 bpf/datapath.c
create mode 100644 bpf/datapath.h
create mode 100644 bpf/generated_headers.h
create mode 100644 bpf/helpers.h
create mode 100644 bpf/lookup.h
create mode 100644 bpf/maps.h
create mode 100644 bpf/odp-bpf.h
create mode 100644 bpf/openvswitch.h
create mode 100644 bpf/ovs-p4.h
create mode 100644 bpf/ovs-proto.p4
create mode 100644 bpf/parser.h
create mode 100644 bpf/xdp.h

diff --git a/Makefile.am b/Makefile.am
index 21e27fa32965..ec1fc53b1060 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -440,6 +440,7 @@ dist-docs:

include Documentation/automake.mk
include m4/automake.mk
+include bpf/automake.mk
include lib/automake.mk
include ofproto/automake.mk
include utilities/automake.mk
diff --git a/bpf/action.h b/bpf/action.h
new file mode 100644
index 000000000000..49213698c00b
--- /dev/null
+++ b/bpf/action.h
@@ -0,0 +1,628 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+/* OVS Datapath Execution
+ * ======================
+ *
+ * When a lookup is successful the eBPF gets a list of actions to be
+ * executed, such as outputting the packet to a certain port, or
+ * pushing a VLAN tag. The list of actions is configured in ovs-vswitchd
+ * and may be a variable length depending on the desired network processing
+ * behaviour. For example, an L2 switch doing unknown broadcast sends
+ * packet to all its current ports. The OVS datapath’s actions is derived
+ * from the OpenFlow action specification and the OVSDB schema for
+ * ovs-vswitchd.
+ *
+ */
+#include <errno.h>
+#include <stdint.h>
+#include <iproute2/bpf_elf.h>
+#include <linux/ip.h>
+
+#include "api.h"
+#include "maps.h"
+#include "helpers.h"
+
+#define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR))
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+#define TTL_OFF (ETH_HLEN + offsetof(struct iphdr, ttl))
+#define DST_OFF (ETH_HLEN + offsetof(struct iphdr, daddr))
+#define SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+ __u8 old_tos = load_byte(skb, TOS_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_tos, new_tos, 2);
+
+ /* Use helper here because using direct packet
+ * access causes verifier error
+ */
+ bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+static inline void set_ip_ttl(struct __sk_buff *skb, __u8 new_ttl)
+{
+ __u8 old_ttl = load_byte(skb, TTL_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ttl, new_ttl, 2);
+ bpf_skb_store_bytes(skb, TTL_OFF, &new_ttl, sizeof(new_ttl), 0);
+}
+
+static inline void set_ip_dst(struct __sk_buff *skb, __u32 new_dst)
+{
+ __u32 old_dst = load_word(skb, DST_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_dst, new_dst, 4);
+ bpf_skb_store_bytes(skb, DST_OFF, &new_dst, sizeof(new_dst), 0);
+}
+
+static inline void set_ip_src(struct __sk_buff *skb, __u32 new_src)
+{
+ __u32 old_src = load_word(skb, SRC_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_src, new_src, 4);
+ bpf_skb_store_bytes(skb, SRC_OFF, &new_src, sizeof(new_src), 0);
+}
+
+/*
+ * Every OVS action need to lookup the action list and
+ * with index, find out the next action to process
+ */
+static inline struct bpf_action *pre_tail_action(struct __sk_buff *skb,
+ struct bpf_action_batch **__batch)
+{
+ uint32_t index = ovs_cb_get_action_index(skb);
+ struct bpf_action *action = NULL;
+ struct bpf_action_batch *batch;
+ int zero_index = 0;
+
+ if (index >= BPF_DP_MAX_ACTION) {
+ printt("ERR max ebpf action hit\n");
+ return NULL;
+ }
+
+ if (skb->cb[OVS_CB_DOWNCALL_EXE]) {
+ /* Downcall packet has a dedicated action list */
+ batch = bpf_map_lookup_elem(&execute_actions, &zero_index);
+ } else {
+ struct bpf_flow_key *exe_flow_key, flow_key;
+
+ exe_flow_key = bpf_map_lookup_elem(&percpu_executing_key,
+ &zero_index);
+ if (!exe_flow_key) {
+ printt("empty percpu_executing_key\n");
+ return NULL;
+ }
+
+ flow_key = *exe_flow_key;
+ batch = bpf_map_lookup_elem(&flow_table, &flow_key);
+ }
+ if (!batch) {
+ printt("no batch action found\n");
+ return NULL;
+ }
+
+ *__batch = batch;
+ action = &((batch)->actions[index]);
+ return action;
+}
+
+/*
+ * After processing the action, tail call the next.
+ */
+static inline int post_tail_action(struct __sk_buff *skb,
+ struct bpf_action_batch *batch)
+{
+ struct bpf_action *next_action;
+ uint32_t index;
+
+ if (!batch)
+ return TC_ACT_SHOT;
+
+ index = skb->cb[OVS_CB_ACT_IDX] + 1;
+ skb->cb[OVS_CB_ACT_IDX] = index;
+
+ if (index >= BPF_DP_MAX_ACTION)
+ goto finish;
+
+ next_action = &batch->actions[index];
+ if (next_action->type == 0)
+ goto finish;
+
+ printt("next action type = %d\n", next_action->type);
+ bpf_tail_call(skb, &tailcalls, next_action->type);
+
+ printt("[BUG] tail call missing\n");
+ return TC_ACT_SHOT;
+
+finish:
+ if (skb->cb[OVS_CB_DOWNCALL_EXE]) {
+ int index = 0;
+ bpf_map_delete_elem(&execute_actions, &index);
+ }
+ return TC_ACT_STOLEN;
+}
+
+/*
+ * Use this action to indicate end of action list
+ * BPF program: tail-0
+ */
+__section_tail(OVS_ACTION_ATTR_UNSPEC)
+static int tail_action_unspec(struct __sk_buff *skb)
+{
+ int index OVS_UNUSED = ovs_cb_get_action_index(skb);
+
+ printt("action index = %d, end of processing\n", index);
+
+ /* Handle actions=drop, we return SHOT so the device's dropped stats
+ will be incremented (see sch_handle_ingress).
+
+ If there are more actions, ex: actions=a1,a2,drop, this is
+ handled in post_tail_actions and return STOLEN
+ */
+ return TC_ACT_SHOT;
+}
+
+/*
+ * BPF program: tail-1
+ */
+__section_tail(OVS_ACTION_ATTR_OUTPUT)
+static int tail_action_output(struct __sk_buff *skb)
+{
+ int ret __attribute__((__unused__));
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+ int flags;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* Internal dev is tap type and hooked only to bpf egress filter.
+ When output to an internal device, a packet is clone-redirected to
+ this device's ingress so that this packet is processed by kernel stack.
+ Why? Since if the packet is sent to its egress, it is delivered to the
+ tap device's socket, not kernel.
+ */
+ flags = action->u.out.flags & OVS_BPF_FLAGS_TX_STACK ? BPF_F_INGRESS : 0;
+ printt("output action port = %d ingress? %d\n",
+ action->u.out.port, (flags));
+
+ bpf_clone_redirect(skb, action->u.out.port, flags);
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements OVS userspace
+ * BPF program: tail-2
+ */
+__section_tail(OVS_ACTION_ATTR_USERSPACE)
+static int tail_action_userspace(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* XXX If move this declaration to top, the stack will overflow. */
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_ACTION,
+ .skb_len = skb->len,
+ .ifindex = skb->ifindex,
+ };
+
+ if (action->u.userspace.nlattr_len > sizeof(md.uactions)) {
+ printt("userspace action is too large\n");
+ return TC_ACT_SHOT;
+ }
+
+ memcpy(md.uactions, action->u.userspace.nlattr_data, sizeof(md.uactions));
+ md.uactions_len = action->u.userspace.nlattr_len;
+
+ struct ebpf_headers_t *hdrs = bpf_get_headers();
+ if (!hdrs) {
+ printt("headers is NULL\n");
+ return TC_ACT_SHOT;
+ }
+
+ memcpy(&md.key.headers, hdrs, sizeof(*hdrs));
+
+ uint64_t flags = skb->len;
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+ int err = skb_event_output(skb, &upcalls, flags, &md, sizeof md);
+
+ if (err) {
+ printt("skb_event_output of userspace action: %d", err);
+ return TC_ACT_SHOT;
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements BPF tunnel
+ * BPF program: tail-3
+ */
+__section_tail(OVS_ACTION_ATTR_SET)
+static int tail_action_tunnel_set(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+ uint64_t flags;
+
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+ struct ovs_action_set_tunnel *tunnel;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ if (action->is_set) {
+ /* set_masked */
+ printt("ERR: this is set tunnel action\n");
+ return TC_ACT_SHOT;
+ }
+
+ tunnel = &action->u.tunnel;
+
+ /* hard-coded now, should fetch it from action->u */
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.tunnel_id = tunnel->tunnel_id;
+ key.tunnel_tos = tunnel->tunnel_tos;
+ key.tunnel_ttl = tunnel->tunnel_ttl;
+
+ printt("tunnel_id = %x\n", key.tunnel_id);
+
+ /* TODO: handle BPF_F_DONT_FRAGMENT and BPF_F_SEQ_NUMBER */
+ flags = BPF_F_ZERO_CSUM_TX;
+ if (!tunnel->use_ipv6) {
+ key.remote_ipv4 = tunnel->remote_ipv4;
+ flags &= ~BPF_F_TUNINFO_IPV6;
+ } else {
+ memcpy(&key.remote_ipv4, &tunnel->remote_ipv4, 16);
+ flags |= BPF_F_TUNINFO_IPV6;
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), flags);
+ if (ret < 0)
+ printt("ERR setting tunnel key\n");
+
+ if (tunnel->gnvopt_valid) {
+ ret = bpf_skb_set_tunnel_opt(skb, &tunnel->gnvopt,
+ sizeof tunnel->gnvopt);
+ if (ret < 0)
+ printt("ERR setting tunnel opt\n");
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements VLAN push
+ * BPF program: tail-4
+ */
+__section_tail(OVS_ACTION_ATTR_PUSH_VLAN)
+static int tail_action_push_vlan(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("vlan push tci %d\n", action->u.push_vlan.vlan_tci);
+ printt("vlan push tpid %d\n", action->u.push_vlan.vlan_tpid);
+ bpf_skb_vlan_push(skb, action->u.push_vlan.vlan_tpid,
+ action->u.push_vlan.vlan_tci & ~VLAN_TAG_PRESENT);
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements VLAN pop
+ * BPF program: tail-5
+ */
+__section_tail(OVS_ACTION_ATTR_POP_VLAN)
+static int tail_action_pop_vlan(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("vlan pop %d\n");
+ bpf_skb_vlan_pop(skb);
+
+ /* FIXME: invalidate_flow_key()? */
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements sample
+ * BPF program: tail-6
+ */
+__section_tail(OVS_ACTION_ATTR_SAMPLE)
+static int tail_action_sample(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Sample action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements recirculation
+ * BPF program: tail-7
+ */
+__section_tail(OVS_ACTION_ATTR_RECIRC)
+static int tail_action_recirc(struct __sk_buff *skb)
+{
+ u32 recirc_id = 0;
+ struct bpf_action *action;
+ struct bpf_action_batch *batch ;
+ struct ebpf_metadata_t *ebpf_md;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* recirc should be the last action.
+ * level does not handle */
+
+ /* don't check the is_flow_key_valid(),
+ * now always re-parsing the header.
+ */
+ recirc_id = action->u.recirc_id;
+ printt("recirc id = %d\n", recirc_id);
+
+ /* update metadata */
+ ebpf_md = bpf_get_mds();
+ if (!ebpf_md) {
+ printt("lookup metadata failed\n");
+ return TC_ACT_SHOT;
+ }
+ ebpf_md->md.recirc_id = recirc_id;
+
+ skb->cb[OVS_CB_ACT_IDX] = 0;
+ skb->cb[OVS_CB_DOWNCALL_EXE] = 0;
+
+ /* FIXME: recirc should not call this. */
+ bpf_tail_call(skb, &tailcalls, MATCH_ACTION_CALL);
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implement hash
+ * BPF program: tail-8
+ */
+__section_tail(OVS_ACTION_ATTR_HASH)
+static int tail_action_hash(struct __sk_buff *skb)
+{
+ u32 hash = 0;
+ int index = 0;
+ struct ebpf_metadata_t *ebpf_md;
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("skb->hash before = %x\n", skb->hash);
+ hash = bpf_get_hash_recalc(skb);
+ printt("skb->hash = %x hash \n", skb->hash);
+ if (!hash)
+ hash = 0x1;
+
+ ebpf_md = bpf_map_lookup_elem(&percpu_metadata, &index);
+ if (!ebpf_md) {
+ printt("LOOKUP metadata failed\n");
+ return TC_ACT_SHOT;
+ }
+ printt("save hash to ebpf_md->md.dp_hash\n");
+ ebpf_md->md.dp_hash = hash; /* or create a ovs_flow_hash?*/
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements MPLS push
+ * BPF program: tail-9
+ */
+__section_tail(OVS_ACTION_ATTR_PUSH_MPLS)
+static int tail_action_mpls_push(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Push MPLS action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements MPLS pop
+ * BPF program: tail-10
+ */
+__section_tail(OVS_ACTION_ATTR_POP_MPLS)
+static int tail_action_mpls_pop(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Pop MPLS action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements set packet's fields, mask not supported.
+ * Many other fields not implemented yet.
+ * BPF program: tail-11
+ * TODO: hit verifier limit here, maybe create more program and
+ * more tail call.
+ */
+__section_tail(OVS_ACTION_ATTR_SET_MASKED)
+static int tail_action_set_masked(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ switch (action->u.mset.key_type) {
+ case OVS_KEY_ATTR_ETHERNET: {
+ u8 *data = (u8 *)(long)skb->data;
+ u8 *data_end = (u8 *)(long)skb->data_end;
+ struct ethhdr *eth;
+ struct ovs_key_ethernet *ether;
+ int i;
+
+ /* packet data */
+ eth = (struct ethhdr *)data;
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_SHOT;
+
+ /* value from map */
+ ether = &action->u.mset.key.ether;
+ for (i = 0; i < 6; i++)
+ eth->h_dest[i] = ether->eth_dst.ea[i];
+ for (i = 0; i < 6; i++)
+ eth->h_source[i] = ether->eth_src.ea[i];
+ break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ u8 *data = (u8 *)(long)skb->data;
+ u8 *data_end = (u8 *)(long)skb->data_end;
+ struct iphdr *nh;
+ struct ovs_key_ipv4 *ipv4;
+
+ /* packet data */
+ nh = ALIGNED_CAST(struct iphdr *, data + sizeof(struct ethhdr));
+ if ((u8 *)nh + sizeof(struct iphdr) + 12 > data_end) {
+ return TC_ACT_SHOT;
+ }
+
+ /* value from map */
+ ipv4 = &action->u.mset.key.ipv4;
+ memcpy(&nh->saddr, &ipv4->ipv4_src, 8);
+ nh->protocol = ipv4->ipv4_proto;
+ nh->tos = ipv4->ipv4_tos;
+ nh->ttl = ipv4->ipv4_ttl;
+
+ set_ip_tos(skb, ipv4->ipv4_tos);
+ set_ip_ttl(skb, ipv4->ipv4_ttl);
+ //set_ip_src(skb, ipv4->ipv4_src);
+ //set_ip_dst(skb, ipv4->ipv4_dst);
+
+ //bpf_l3_csum_replace(skb, IP_CSUM_OFF, nh->saddr, ipv4->ipv4_src, 4);
+ //bpf_l3_csum_replace(skb, IP_CSUM_OFF, nh->daddr, ipv4->ipv4_dst, 4);
+ //bpf_l3_csum_replace(skb, IP_CSUM_OFF, nh->protocol, ipv4->ipv4_proto, 1);
+ //bpf_l3_csum_replace(skb, IP_CSUM_OFF, nh->tos, ipv4->ipv4_tos, 2);
+ //bpf_l3_csum_replace(skb, IP_CSUM_OFF, nh->ttl, ipv4->ipv4_ttl, 1);
+
+ /* XXX ignore frag */
+
+ break;
+ }
+ case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_ENCAP:
+ case OVS_KEY_ATTR_PRIORITY: /* u32 skb->priority */
+ case OVS_KEY_ATTR_IN_PORT: /* u32 OVS dp port number */
+ case OVS_KEY_ATTR_VLAN: /* be16 VLAN TCI */
+ case OVS_KEY_ATTR_ETHERTYPE: /* be16 Ethernet type */
+ case OVS_KEY_ATTR_IPV6: /* struct ovs_key_ipv6 */
+ case OVS_KEY_ATTR_TCP: /* struct ovs_key_tcp */
+ case OVS_KEY_ATTR_UDP: /* struct ovs_key_udp */
+ case OVS_KEY_ATTR_ICMP: /* struct ovs_key_icmp */
+ case OVS_KEY_ATTR_ICMPV6: /* struct ovs_key_icmpv6 */
+ case OVS_KEY_ATTR_ARP: /* struct ovs_key_arp */
+ case OVS_KEY_ATTR_ND: /* struct ovs_key_nd */
+ case OVS_KEY_ATTR_SKB_MARK: /* u32 skb mark */
+ case OVS_KEY_ATTR_TUNNEL: /* Nested set of ovs_tunnel attributes */
+ case OVS_KEY_ATTR_SCTP: /* struct ovs_key_sctp */
+ case OVS_KEY_ATTR_TCP_FLAGS: /* be16 TCP flags. */
+ case OVS_KEY_ATTR_DP_HASH: /* u32 hash value. Value 0 indicates the hash */
+ case OVS_KEY_ATTR_RECIRC_ID: /* u32 recirc id */
+ case OVS_KEY_ATTR_MPLS: /* array of struct ovs_key_mpls. */
+ case OVS_KEY_ATTR_CT_STATE: /* u32 bitmask of OVS_CS_F_* */
+ case OVS_KEY_ATTR_CT_ZONE: /* u16 connection tracking zone. */
+ case OVS_KEY_ATTR_CT_MARK: /* u32 connection tracking mark */
+ case OVS_KEY_ATTR_CT_LABELS: /* 16-octet connection tracking labels */
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: /* struct ovs_key_ct_tuple_ipv4 */
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: /* struct ovs_key_ct_tuple_ipv6 */
+ case OVS_KEY_ATTR_NSH: /* Nested set of ovs_nsh_key_* */
+#ifdef __KERNEL__
+ case OVS_KEY_ATTR_TUNNEL_INFO: /* struct ovs_tunnel_info */
+#endif
+#ifndef __KERNEL__
+ case OVS_KEY_ATTR_PACKET_TYPE: /* be32 packet type */
+#endif
+ case __OVS_KEY_ATTR_MAX:
+ default:
+ printt("ERR Un-implemented set %d\n", action->type);
+ return TC_ACT_SHOT;
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements connection tracking
+ * BPF program: tail-12
+ */
+__section_tail(OVS_ACTION_ATTR_CT)
+static int tail_action_ct(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: CT (connection tracking) not implemented,\
+ do you want to do it? \n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements packet truncate
+ * BPF program: tail-13
+ */
+__section_tail(OVS_ACTION_ATTR_TRUNC)
+static int tail_action_trunc(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("len before: %d\n", skb->len);
+ printt("truncate to %d\n", action->u.trunc.max_len);
+
+ /* The helper will resize the skb to the given new size */
+ bpf_skb_change_tail(skb, action->u.trunc.max_len, 0);
+
+ printt("len after: %d\n", skb->len);
+ return post_tail_action(skb, batch);
+}
diff --git a/bpf/api.h b/bpf/api.h
new file mode 100644
index 000000000000..f2db1f729157
--- /dev/null
+++ b/bpf/api.h
@@ -0,0 +1,279 @@
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define UNSPEC_CALL 0
+#define OUTPUT_CALL 1
+#define PARSER_CALL 32
+#define MATCH_ACTION_CALL 33
+#define DEPARSER_CALL 34
+#define UPCALL_CALL 35
+
+#ifndef TC_ACT_OK
+#define TC_ACT_OK 0
+#define TC_ACT_RECLASSIFY 1
+#define TC_ACT_SHOT 2
+#define TC_ACT_PIPE 3
+#define TC_ACT_STOLEN 4
+#define TC_ACT_QUEUED 5
+#define TC_ACT_REPEAT 6
+#define TC_ACT_REDIRECT 7
+#endif
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X) #X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
+
+#ifndef htons
+# define htons(X) __constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X) __constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X) __constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X) __constant_ntohl((X))
+#endif
+
+#ifndef __inline__
+# define __inline__ __attribute__((always_inline))
+#endif
+
+#ifndef __section
+# define __section(NAME) \
+ __attribute__((section(NAME), used))
+#endif
+
+#ifndef __section_tail
+# define __section_tail(KEY) \
+ __section("tail-" __stringify(KEY))
+#endif
+
+#ifndef __section_license
+# define __section_license \
+ __section(ELF_SECTION_LICENSE)
+#endif
+
+#ifndef __section_maps
+# define __section_maps \
+ __section(ELF_SECTION_MAPS)
+#endif
+
+#ifndef BPF_LICENSE
+# define BPF_LICENSE(NAME) \
+ char ____license[] __section_license = NAME
+#endif
+
+#ifndef __BPF_MAP
+# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ struct bpf_map_def __section_maps NAME = { \
+ .type = (TYPE), \
+ .key_size = (SIZE_KEY), \
+ .value_size = (SIZE_VALUE), \
+ .max_entries = (MAX_ELEM), \
+ .map_flags = 0, \
+ }
+#endif
+
+#ifndef BPF_HASH
+# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERCPU_HASH
+# define BPF_PERCPU_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PERCPU_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY
+# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERCPU_ARRAY
+# define BPF_PERCPU_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PERCPU_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PROG_ARRAY
+# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \
+ sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERF_OUTPUT
+# define BPF_PERF_OUTPUT(name, pin) \
+ __BPF_MAP(name, BPF_MAP_TYPE_PERF_EVENT_ARRAY, 0, sizeof(uint32_t), \
+ sizeof(uint32_t), pin, __NR_CPUS__)
+#endif
+
+/** Classifier helper */
+
+#ifndef BPF_H_DEFAULT
+# define BPF_H_DEFAULT -1
+#endif
+
+/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
+
+#ifndef BPF_FUNC
+# define BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+#endif
+
+#ifndef BPF_FUNC2
+# define BPF_FUNC2(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused
+#endif
+
+/* Map access/manipulation */
+static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
+static int BPF_FUNC(map_update_elem, void *map, const void *key,
+ const void *value, uint32_t flags);
+static int BPF_FUNC(map_delete_elem, void *map, const void *key);
+
+/* Time access */
+static uint64_t BPF_FUNC(ktime_get_ns, void);
+
+/* Debugging */
+
+/* FIXME: __attribute__ ((format(printf, 1, 3))) not possible unless
+ * llvm bug https://llvm.org/bugs/show_bug.cgi?id=26243 gets resolved.
+ * It would require ____fmt to be made const, which generates a reloc
+ * entry (non-map).
+ */
+static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
+
+#ifndef printt
+# ifdef DEBUG_BPF_OFF
+# define printt(fmt, ...)
+# else
+# define printt(fmt, ...) \
+ ({ \
+ char ____fmt[] = fmt; \
+ trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
+ })
+# endif
+#endif
+
+/* Random numbers */
+static uint32_t BPF_FUNC(get_prandom_u32, void);
+
+/* Tail calls */
+static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
+ uint32_t index);
+
+/* System helpers */
+static uint32_t BPF_FUNC(get_smp_processor_id, void);
+
+/* Packet misc meta data */
+static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
+
+static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
+
+/* Packet redirection */
+static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
+static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
+ uint32_t flags);
+
+/* Packet manipulation */
+static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, uint32_t off,
+ void *to, uint32_t len);
+static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
+ const void *from, uint32_t len, uint32_t flags);
+
+static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(csum_diff, void *from, uint32_t from_size, void *to,
+ uint32_t to_size, uint32_t seed);
+
+static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
+static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
+ uint32_t flags);
+static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
+ uint32_t flags);
+
+/* Packet vlan encap/decap */
+static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
+ uint16_t vlan_tci);
+static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
+
+/* Packet tunnel encap/decap */
+static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
+static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
+ const struct bpf_tunnel_key *from, uint32_t size,
+ uint32_t flags);
+
+static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb,
+ void *to, uint32_t size);
+static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb,
+ const void *from, uint32_t size);
+
+/* Events for user space */
+static int BPF_FUNC2(skb_event_output, struct __sk_buff *skb, void *map, uint64_t index,
+ const void *data, uint32_t size) = (void *)BPF_FUNC_perf_event_output;
+
+/** LLVM built-ins, mem*() routines work for constant size */
+
+#ifndef lock_xadd
+# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
+#endif
+
+#ifndef memset
+# define memset(s, c, n) __builtin_memset((s), (c), (n))
+#endif
+
+#ifndef memcpy
+# define memcpy(d, s, n) __builtin_memcpy((d), (s), (n))
+#endif
+
+#ifndef memmove
+# define memmove(d, s, n) __builtin_memmove((d), (s), (n))
+#endif
+
+/* FIXME: __builtin_memcmp() is not yet fully useable unless llvm bug
+ * https://llvm.org/bugs/show_bug.cgi?id=26218 gets resolved. Also
+ * this one would generate a reloc entry (non-map), otherwise.
+ */
+#if 0
+#ifndef memcmp
+# define memcmp(a, b, n) __builtin_memcmp((a), (b), (n))
+#endif
+#endif
+
+unsigned long long load_byte(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.byte");
+
+unsigned long long load_half(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.half");
+
+unsigned long long load_word(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.word");
+
+#endif /* __BPF_API__ */
diff --git a/bpf/automake.mk b/bpf/automake.mk
new file mode 100644
index 000000000000..3028c585b6cc
--- /dev/null
+++ b/bpf/automake.mk
@@ -0,0 +1,60 @@
+bpf_sources = bpf/datapath.c
+bpf_headers = \
+ bpf/api.h \
+ bpf/datapath.h \
+ bpf/odp-bpf.h \
+ bpf/ovs-p4.h \
+ bpf/helpers.h \
+ bpf/openvswitch.h \
+ bpf/maps.h \
+ bpf/parser.h \
+ bpf/lookup.h \
+ bpf/action.h \
+ bpf/generated_headers.h \
+ bpf/xdp.h
+bpf_extra = \
+ bpf/ovs-proto.p4
+
+# Regardless of configuration with GCC, we must compile the BPF with clang
+# since GCC doesn't have a BPF backend. Clang dones't support these flags,
+# so we filter them out.
+
+bpf_FILTER_FLAGS := $(filter-out -Wbool-compare, $(AM_CFLAGS))
+bpf_FILTER_FLAGS2 := $(filter-out -Wduplicated-cond, $(bpf_FILTER_FLAGS))
+bpf_FILTER_FLAGS3 := $(filter-out --coverage, $(bpf_FILTER_FLAGS2))
+bpf_CFLAGS := $(bpf_FILTER_FLAGS3)
+bpf_CFLAGS += -D__NR_CPUS__=$(shell nproc) -O2 -Wall -Werror -emit-llvm
+bpf_CFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include
+bpf_CFLAGS += -Wno-error=pointer-arith # Allow skb->data arithmetic
+bpf_CFLAGS += -I${IPROUTE2_SRC_PATH}/include/uapi/
+# FIXME:
+#bpf_CFLAGS += -D__KERNEL__
+
+dist_sources = $(bpf_sources)
+dist_headers = $(bpf_headers)
+build_sources = $(dist_sources)
+build_headers = $(dist_headers)
+build_objects = $(patsubst %.c,%.o,$(build_sources))
+
+LLC ?= llc-3.8
+CLANG ?= clang-3.8
+
+bpf: $(build_objects)
+bpf/datapath.o: $(bpf_sources) $(bpf_headers)
+ $(MKDIR_P) $(dir $@)
+@which$(CLANG) >/dev/null 2>&1 || \
+ (echo "Unable to find clang, Install clang (>=3.7) package"; exit 1)
+ $(AM_V_CC) $(CLANG) $(bpf_CFLAGS) -c $< -o - | \
+ $(LLC) -march=bpf -filetype=obj -o $@
+
+bpf/datapath_dbg.o: $(bpf_sources) $(bpf_headers)
+@... > /dev/null 2>&1 || \
+ (echo "Unable to find clang-4.0 for debugging"; exit 1)
+ clang-4.0 $(bpf_CFLAGS) -g -c $< -o -| llc-4.0 -march=bpf -filetype=obj -o $@_dbg
+ llvm-objdump-4.0 -S -no-show-raw-insn $@_dbg > $@_dbg.objdump
+
+EXTRA_DIST += $(dist_sources) $(dist_headers) $(bpf_extra)
+if HAVE_BPF
+dist_bpf_DATA += $(build_objects)
+endif
+
diff --git a/bpf/datapath.c b/bpf/datapath.c
new file mode 100644
index 000000000000..627177208059
--- /dev/null
+++ b/bpf/datapath.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <iproute2/bpf_elf.h>
+
+#include "api.h"
+#include "odp-bpf.h"
+#include "datapath.h"
+
+/*
+ * Instead of having multiple BPF object files,
+ * include all headers and generate single datapath.o
+ */
+#include "maps.h"
+#include "parser.h"
+#include "lookup.h"
+#include "action.h"
+#include "xdp.h"
+
+/* We don't rely on specific versions of the kernel; however libbpf requires
+ * this to be both specified and non-zero. */
+static const __maybe_unused __section("version") uint32_t version = 0x1;
+
+static inline void __maybe_unused
+bpf_debug(struct __sk_buff *skb, enum ovs_dbg_subtype subtype, int error)
+{
+ uint64_t cpu = get_smp_processor_id();
+ uint64_t flags = skb->len;
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_DEBUG,
+ .subtype = subtype,
+ .ifindex = skb->ingress_ifindex,
+ .cpu = cpu,
+ .skb_len = skb->len,
+ .error = error
+ };
+
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+
+ skb_event_output(skb, &upcalls, flags, &md, sizeof(md));
+}
+
+/*
+ * This program forwards the packet to userspace, using the
+ * perf_event_output helper function.
+ * BPF program: tail-35
+ */
+__section_tail(UPCALL_CALL)
+static inline int process_upcall(struct __sk_buff *skb)
+{
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_MISS,
+ .skb_len = skb->len,
+ //.ifindex = ovs_cb_get_ifindex(skb),
+ };
+ int stat, err;
+ struct ebpf_headers_t *hdrs = bpf_get_headers();
+ struct ebpf_metadata_t *mds = bpf_get_mds();
+
+ if (!hdrs || !mds) {
+ printt("headers/mds is NULL\n");
+ return TC_ACT_OK;
+ }
+
+ md.ifindex = mds->md.in_port;
+
+ memcpy(&md.key.headers, hdrs, sizeof(struct ebpf_headers_t));
+ memcpy(&md.key.mds, mds, sizeof(struct ebpf_metadata_t));
+
+ if (hdrs->valid & VLAN_VALID) {
+ printt("upcall skb->len(%d) with vlan %x %x\n",
+ skb->len, hdrs->vlan.etherType, hdrs->vlan.tci);
+ skb_vlan_push(skb, hdrs->vlan.etherType,
+ hdrs->vlan.tci & ~VLAN_TAG_PRESENT);
+ md.skb_len = skb->len;
+ }
+
+ uint64_t flags = skb->len;
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+
+ err = skb_event_output(skb, &upcalls, flags, &md, sizeof(md));
+ stat = !err ? OVS_DP_STATS_MISSED
+ : err == -ENOSPC ? OVS_DP_STATS_LOST
+ : OVS_DP_STATS_ERRORS;
+ stats_account(stat);
+ return TC_ACT_OK;
+}
+
+/*
+ * This is the ENTRY POINT for packet seen at ingress queue
+ */
+__section("ingress")
+static int to_stack(struct __sk_buff *skb)
+{
+ printt("\n\ningress from %d (%d)\n", skb->ingress_ifindex, skb->ifindex);
+
+ ovs_cb_init(skb, true);
+ bpf_tail_call(skb, &tailcalls, PARSER_CALL);
+
+ printt("ERR: tail call fail in ingress\n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This is the ENTRY POINT for packet seen at egress queue
+ */
+__section("egress")
+static int from_stack(struct __sk_buff *skb)
+{
+ printt("\n\negress from %d (%d)\n", skb->ingress_ifindex, skb->ifindex);
+
+ ovs_cb_init(skb, false);
+ bpf_tail_call(skb, &tailcalls, PARSER_CALL);
+
+ printt("ERR: tail call fail in egress\n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This is the ENTRY POINT for downcall packet
+ */
+__section("downcall")
+static int execute(struct __sk_buff *skb)
+{
+ struct bpf_downcall md;
+ u32 ebpf_zero = 0;
+ int flags, ofs;
+
+ ofs = skb->len - sizeof(md);
+ skb_load_bytes(skb, ofs, &md, sizeof(md));
+ flags = md.flags & OVS_BPF_FLAGS_TX_STACK ? BPF_F_INGRESS : 0;
+
+ printt("downcall (%d) from %d flags %d\n", md.type,
+ md.ifindex, flags);
+
+ bpf_map_update_elem(&percpu_metadata, &ebpf_zero, &md.md, BPF_ANY);
+
+ skb_change_tail(skb, ofs, 0);
+
+ switch (md.type) {
+ case OVS_BPF_DOWNCALL_EXECUTE: {
+ struct bpf_action_batch *action_batch;
+
+ action_batch = bpf_map_lookup_elem(&execute_actions, &ebpf_zero);
+ if (action_batch) {
+ printt("get valid action_batch\n");
+ skb->cb[OVS_CB_DOWNCALL_EXE] = 1;
+ bpf_tail_call(skb, &tailcalls, action_batch->actions[0].type);
+ } else {
+ printt("get null action_batch\n");
+ }
+ break;
+ }
+ case OVS_BPF_DOWNCALL_OUTPUT: {
+ /* Skip writing the BPF metadata in parser */
+ skb->cb[OVS_CB_ACT_IDX] = -1;
+ /* Redirect to the device this packet came from, so it's as though the
+ * packet was freshly received. This should execute PARSER_CALL. */
+ return redirect(md.ifindex, flags);
+ }
+ default:
+ printt("Unknown downcall type %d\n", md.type);
+ break;
+ }
+ return 0;
+}
+
+BPF_LICENSE("GPL");
diff --git a/bpf/datapath.h b/bpf/datapath.h
new file mode 100644
index 000000000000..d9f48461cc79
--- /dev/null
+++ b/bpf/datapath.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "odp-bpf.h"
+
+#define SKB_CB_U32S 5 /* According to linux/bpf.h. */
+
+enum ovs_cb_idx {
+ OVS_CB_ACT_IDX, /* Next action to process in action batch. */
+ OVS_CB_INGRESS, /* 0 = egress; nonzero = ingress. */
+ OVS_CB_DOWNCALL_EXE, /* 0 = match/execute, 1 = downcall/execute. */
+};
+
+static void
+ovs_cb_init(struct __sk_buff *skb, bool ingress)
+{
+ for (int i = 0; i < SKB_CB_U32S; i++)
+ skb->cb[i] = 0;
+ skb->cb[OVS_CB_INGRESS] = ingress;
+}
+
+static bool
+ovs_cb_is_initial_parse(struct __sk_buff *skb) {
+ int index = skb->cb[OVS_CB_ACT_IDX];
+
+ if (index != 0) {
+ printt("recirc, don't update metadata, index %d\n", index);
+ }
+ return index == 0;
+}
+
+static uint32_t
+ovs_cb_get_action_index(struct __sk_buff *skb)
+{
+ return skb->cb[OVS_CB_ACT_IDX];
+}
+
+static uint32_t OVS_UNUSED
+ovs_cb_get_ifindex(struct __sk_buff *skb)
+{
+ uint32_t ifindex;
+
+ if (!skb)
+ return 0;
+
+ /* This workaround the compiler optimization issue */
+ if (skb->cb[OVS_CB_INGRESS]) {
+ __asm__ __volatile__("": : :"memory");
+ return skb->ingress_ifindex;
+ }
+
+ ifindex = skb->ifindex;
+ __asm__ __volatile__("": : :"memory");
+
+ return ifindex;
+}
diff --git a/bpf/generated_headers.h b/bpf/generated_headers.h
new file mode 100644
index 000000000000..52e33a8601a6
--- /dev/null
+++ b/bpf/generated_headers.h
@@ -0,0 +1,185 @@
+#ifndef P4_GENERATED_HEADERS
+#define P4_GENERATED_HEADERS
+
+/* We sometimes disable IPV6 to work
+ * around 512-Byte BPF stack limit
+ */
+#define BPF_ENABLE_IPV6
+
+#ifndef BPF_TYPES
+#define BPF_TYPES
+typedef signed char s8;
+typedef unsigned char u8;
+typedef signed short s16;
+typedef unsigned short u16;
+typedef signed int s32;
+typedef unsigned int u32;
+typedef signed long long s64;
+typedef unsigned long long u64;
+#endif
+
+struct ipv6_t {
+ u8 version; /* 4 bits */
+ u8 trafficClass; /* 8 bits */
+ u32 flowLabel; /* 20 bits */
+ u16 payloadLen; /* 16 bits */
+ u8 nextHdr; /* 8 bits */
+ u8 hopLimit; /* 8 bits */
+ char srcAddr[16]; /* 128 bits */
+ char dstAddr[16]; /* 128 bits */
+};
+struct pkt_metadata_t {
+ u32 recirc_id; /* 32 bits */
+ u32 dp_hash; /* 32 bits */
+ u32 skb_priority; /* 32 bits */
+ u32 pkt_mark; /* 32 bits */
+ u16 ct_state; /* 16 bits */
+ u16 ct_zone; /* 16 bits */
+ u32 ct_mark; /* 32 bits */
+ char ct_label[16]; /* 128 bits */
+ u32 in_port; /* 32 bits */
+ u32 packet_length;
+};
+struct udp_t {
+ u16 srcPort; /* 16 bits */
+ u16 dstPort; /* 16 bits */
+ u16 length_; /* 16 bits */
+ u16 checksum; /* 16 bits */
+};
+struct arp_rarp_t {
+ ovs_be16 ar_hrd; /* format of hardware address */
+ ovs_be16 ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ ovs_be16 ar_op; /* ARP opcode (command) */
+
+ /* Ethernet+IPv4 specific members. */
+ unsigned char ar_sha[6]; /* sender hardware address */
+ unsigned char ar_sip[4]; /* sender IP address: be32 */
+ unsigned char ar_tha[6]; /* target hardware address */
+ unsigned char ar_tip[4]; /* target IP address: be32 */
+} __attribute__((packed));
+struct icmp_t {
+ u8 type;
+ u8 code;
+};
+struct icmpv6_t {
+ u8 type;
+ u8 code;
+ u16 csum;
+ union {
+ uint32_t data32[1]; /* type-specific field */
+ uint16_t data16[2]; /* type-specific field */
+ uint8_t data8[4]; /* type-specific field */
+ } dataun;
+};
+struct ipv4_t {
+ u8 ttl; /* 8 bits */
+ u8 protocol; /* 8 bits */
+ ovs_be32 srcAddr; /* 32 bits */
+ ovs_be32 dstAddr; /* 32 bits */
+};
+struct gnv_opt {
+ ovs_be16 opt_class;
+ uint8_t type;
+ uint8_t length:5;
+ uint8_t r3:1;
+ uint8_t r2:1;
+ uint8_t r1:1;
+ uint8_t opt_data[4]; /* hard-coded to 4 byte */
+};
+struct flow_tnl_t {
+ union {
+ struct {
+ u32 ip_dst; /* 32 bits */ // BPF uses host byte-order
+ u32 ip_src; /* 32 bits */
+ } ip4;
+#ifdef BPF_ENABLE_IPV6
+ struct {
+ char ipv6_dst[16]; /* 128 bits */
+ char ipv6_src[16]; /* 128 bits */
+ } ip6;
+#endif
+ };
+ u32 tun_id; /* 32 bits */
+ u16 flags; /* 16 bits */
+ u8 ip_tos; /* 8 bits */
+ u8 ip_ttl; /* 8 bits */
+ ovs_be16 tp_src; /* 16 bits */
+ ovs_be16 tp_dst; /* 16 bits */
+ u16 gbp_id; /* 16 bits */
+ u8 gbp_flags; /* 8 bits */
+ u8 use_ipv6: 4,
+ gnvopt_valid: 4;
+ struct gnv_opt gnvopt;
+ char pad1[0]; /* 40 bits */
+};
+struct tcp_t {
+ ovs_be16 srcPort; /* 16 bits */
+ ovs_be16 dstPort; /* 16 bits */
+ u32 seqNo; /* 32 bits */
+ u32 ackNo; /* 32 bits */
+ u8 dataOffset:4, /* 4 bits */
+ res:4; /* 4 bits */
+ u8 flags; /* 8 bits */
+ u16 window; /* 16 bits */
+ u16 checksum; /* 16 bits */
+ u16 urgentPtr; /* 16 bits */
+};
+struct ethernet_t {
+ char dstAddr[6]; /* 48 bits */
+ char srcAddr[6]; /* 48 bits */
+ ovs_be16 etherType; /* 16 bits */
+};
+struct vlan_tag_t {
+ union {
+ u16 pcp:3,
+ cfi:1,
+ vid:12;
+ ovs_be16 tci; /* host byte order */
+ };
+ ovs_be16 etherType; /* network byte order */
+};
+struct mpls_t {
+ ovs_be32 top_lse; /* top label stack entry */
+};
+
+enum proto_valid {
+ ETHER_VALID = 1 << 0,
+ MPLS_VALID = 1 << 1,
+ IPV4_VALID = 1 << 2,
+ IPV6_VALID = 1 << 3,
+ ARP_VALID = 1 << 4,
+ TCP_VALID = 1 << 5,
+ UDP_VALID = 1 << 6,
+ ICMP_VALID = 1 << 7,
+ VLAN_VALID = 1 << 8,
+ CVLAN_VALID = 1 << 9,
+ ICMPV6_VALID = 1 << 10,
+};
+
+struct ebpf_headers_t {
+ u32 valid;
+ struct ethernet_t ethernet;
+ struct mpls_t mpls;
+ union {
+ struct ipv4_t ipv4;
+#ifdef BPF_ENABLE_IPV6
+ struct ipv6_t ipv6;
+#endif
+ struct arp_rarp_t arp;
+ };
+ union {
+ struct tcp_t tcp;
+ struct udp_t udp;
+ struct icmp_t icmp;
+ struct icmpv6_t icmpv6;
+ };
+ struct vlan_tag_t vlan;
+ struct vlan_tag_t cvlan;
+};
+struct ebpf_metadata_t {
+ struct pkt_metadata_t md;
+ struct flow_tnl_t tnl_md;
+};
+#endif
diff --git a/bpf/helpers.h b/bpf/helpers.h
new file mode 100644
index 000000000000..69fdbb344075
--- /dev/null
+++ b/bpf/helpers.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef __OVSBPF_HELPERS_H
+#define __OVSBPF_HELPERS_H
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bpf.h>
+
+/* Additional headers */
+# define printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+#define ERR_EXIT() \
+ ({printk("[ERROR] \n"); return TC_ACT_OK;})
+
+#define NOT_HERE() \
+ ({printk("[ERROR] Program should not reach here\n");})
+
+#ifndef BPF_TYPES
+#define BPF_TYPES
+typedef signed char s8;
+typedef unsigned char u8;
+typedef signed short s16;
+typedef unsigned short u16;
+typedef signed int s32;
+typedef unsigned int u32;
+typedef signed long long s64;
+typedef unsigned long long u64;
+#endif
+
+#define ___constant_swab16(x) ((__u16)( \
+ (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+ (((__u16)(x) & (__u16)0xff00U) >> 8)))
+
+#define ___constant_swab32(x) ((__u32)( \
+ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
+ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
+ (((__u32)(x) & (__u32)0xff000000UL) >> 24)))
+
+#define ___constant_swab64(x) ((__u64)( \
+ (((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+ (((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+ (((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+ (((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
+ (((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
+ (((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+ (((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+ (((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
+
+#define __constant_htonl(x) (___constant_swab32((x)))
+#define __constant_ntohl(x) (___constant_swab32(x))
+#define __constant_htons(x) (___constant_swab16((x)))
+#define __constant_ntohs(x) ___constant_swab16((x))
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static int (*bpf_perf_event_read)(void *map, int index) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
+ (void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+ (void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+ (void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+ (void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+ (void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_skb_vlan_push)(void *ctx, int vlan_proto, int vlan_tci) =
+ (void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+ (void *) BPF_FUNC_skb_vlan_pop;
+static int (*bpf_skb_change_tail)(void *ctx, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_tail;
+static int (*bpf_get_hash_recalc)(void *ctx) =
+ (void *) BPF_FUNC_get_hash_recalc;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int id;
+ unsigned int pinning;
+};
+
+/* used in TC */
+/*
+struct bpf_elf_map {
+ __u32 type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 id;
+ __u32 pinning;
+};
+*/
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+ (void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_head;
+
+#if defined(__x86_64__)
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+#endif
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
diff --git a/bpf/lookup.h b/bpf/lookup.h
new file mode 100644
index 000000000000..db60289b46b9
--- /dev/null
+++ b/bpf/lookup.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+#include <openvswitch/compiler.h>
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+#include "maps.h"
+
+/* eBPF executes actions by tailcall because eBPF doesn't support for-loop and
+ * unroll produces oversized code.
+ *
+ * Each action handler uses current packet's key to look for the next action.
+ * However, the key can be changed by some actions like hash, so a stable
+ * key is kept in an eBPF map named percpu_executing_key. In action handler,
+ * firstly, the stable key is got from percpu_executing_key, then it is used
+ * to look up the actions being executed. skb->cb[OVS_CB_ACT_IDX] points to
+ * next action.
+ */
+static inline void ovs_execute_actions(struct __sk_buff *skb,
+ struct bpf_action *action)
+{
+ enum ovs_action_attr type;
+ type = action->type;
+
+ printt("action type %d\n", type);
+
+ /* note: this isn't a for loop, tail call won't return. */
+ switch (type) {
+ case OVS_ACTION_ATTR_UNSPEC:
+ printt("end of action processing\n");
+ break;
+ case OVS_ACTION_ATTR_OUTPUT:
+ printt("output action port = %d\n", action->u.out.port);
+ break;
+ case OVS_ACTION_ATTR_USERSPACE:
+ printt("userspace action, len = %d, ifindex = %d upcall back\n",
+ action->u.userspace.nlattr_len, ovs_cb_get_ifindex(skb));
+ break;
+ case OVS_ACTION_ATTR_SET:
+ printt("set action, remote ipv4 = %x, is_set = %d\n",
+ action->u.tunnel.remote_ipv4, action->is_set);
+ break;
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ printt("vlan push tci %d\n", action->u.push_vlan.vlan_tci);
+ break;
+ case OVS_ACTION_ATTR_POP_VLAN:
+ printt("vlan pop\n");
+ break;
+ case OVS_ACTION_ATTR_RECIRC:
+ printt("recirc\n");
+ break;
+ case OVS_ACTION_ATTR_HASH:
+ printt("hash\n");
+ break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ printt("set masked\n");
+ break;
+ case OVS_ACTION_ATTR_CT:
+ printt("ct\n");
+ break;
+ case OVS_ACTION_ATTR_TRUNC:
+ printt("truncate\n");
+ break;
+ case OVS_ACTION_ATTR_SAMPLE: /* Nested case OVS_SAMPLE_ATTR_*. */
+ case OVS_ACTION_ATTR_PUSH_MPLS: /* struct ovs_action_push_mpls. */
+ case OVS_ACTION_ATTR_POP_MPLS: /* __be16 ethertype. */
+ case OVS_ACTION_ATTR_PUSH_ETH: /* struct ovs_action_push_eth. */
+ case OVS_ACTION_ATTR_POP_ETH: /* No argument. */
+ case OVS_ACTION_ATTR_CT_CLEAR: /* No argument. */
+ case OVS_ACTION_ATTR_PUSH_NSH: /* Nested case OVS_NSH_KEY_ATTR_*. */
+ case OVS_ACTION_ATTR_POP_NSH: /* No argument. */
+#ifndef __KERNEL__
+ case OVS_ACTION_ATTR_TUNNEL_PUSH: /* struct ovs_action_push_tnl*/
+ case OVS_ACTION_ATTR_TUNNEL_POP: /* u32 port number. */
+ case OVS_ACTION_ATTR_CLONE: /* Nested case OVS_CLONE_ATTR_*. */
+ case OVS_ACTION_ATTR_METER: /* u32 meter number. */
+#endif
+ case __OVS_ACTION_ATTR_MAX:
+#ifdef __KERNEL__
+ case OVS_ACTION_ATTR_SET_TO_MASKED: /* Kernel module internal masked
+ * set action converted from
+ * case OVS_ACTION_ATTR_SET. */
+#endif
+ default:
+ printt("ERR: action type %d not supportedn", type);
+ break;
+ }
+
+ bpf_tail_call(skb, &tailcalls, type);
+
+ /* OVS_NOT_REACHED */
+ return;
+}
+
+static inline void
+stats_account(enum ovs_bpf_dp_stats index)
+{
+ uint32_t stat = 1;
+ uint64_t *value;
+
+ value = map_lookup_elem(&datapath_stats, &index);
+ if (value) {
+ __sync_fetch_and_add(value, stat);
+ }
+}
+
+/* OVS revalidator thread reads each entry in eBPF map
+ * (flow_table and dp_flow_stats), reports to OpenFlow
+ * table statistics, and decide to remove/keep the entry
+ * by comparing its timestamp.
+ */
+static inline void
+flow_stats_account(struct ebpf_headers_t *headers,
+ struct ebpf_metadata_t *mds,
+ size_t bytes)
+{
+ struct bpf_flow_key flow_key;
+ struct bpf_flow_stats *flow_stats;
+
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+
+ flow_stats = bpf_map_lookup_elem(&dp_flow_stats, &flow_key);
+ if (!flow_stats) {
+ struct bpf_flow_stats s = {0, 0, 0};
+ int err;
+
+ printt("flow not found in flow stats, first install\n");
+ s.packet_count = 1;
+ s.byte_count = bytes;
+ s.used = bpf_ktime_get_ns() / (1000*1000); /* msec */
+ err = bpf_map_update_elem(&dp_flow_stats, &flow_key, &s, BPF_ANY);
+ if (err) {
+ return;
+ }
+ } else {
+ flow_stats->packet_count += 1;
+ flow_stats->byte_count += bytes;
+ flow_stats->used = bpf_ktime_get_ns() / (1000*1000); /* msec */
+ printt("current: packets %d count %d ts %d\n",
+ flow_stats->packet_count, flow_stats->byte_count, flow_stats->used);
+ }
+
+ return;
+}
+
+static inline struct bpf_action_batch *
+ovs_lookup_flow(struct ebpf_headers_t *headers,
+ struct ebpf_metadata_t *mds)
+{
+ struct bpf_flow_key flow_key;
+
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+
+ return bpf_map_lookup_elem(&flow_table, &flow_key);
+}
+
+__section_tail(MATCH_ACTION_CALL)
+static int lookup(struct __sk_buff* skb OVS_UNUSED)
+{
+ struct bpf_action_batch *action_batch;
+ struct ebpf_headers_t *headers;
+ struct ebpf_metadata_t *mds;
+
+ headers = bpf_get_headers();
+ if (!headers) {
+ printt("no packet header found\n");
+ ERR_EXIT();
+ }
+
+ mds = bpf_get_mds();
+ if (!mds) {
+ printt("no packet metadata found\n");
+ ERR_EXIT();
+ }
+
+ /* LOOKUP */
+ action_batch = ovs_lookup_flow(headers, mds);
+ if (!action_batch) {
+ printt("no action found, upcall to userspace\n");
+ bpf_tail_call(skb, &tailcalls, UPCALL_CALL);
+
+ /* OVS_NOT_REACHED */
+ return TC_ACT_OK;
+ } else {
+ /* DP Stats Update */
+ stats_account(OVS_DP_STATS_HIT);
+ /* Flow Stats Update */
+ flow_stats_account(headers, mds, skb->len);
+ }
+
+ /* Hit verifier limit when moving declaration up. */
+ struct bpf_flow_key flow_key;
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+ int index = 0;
+ int error = bpf_map_update_elem(&percpu_executing_key, &index,
+ &flow_key, BPF_ANY);
+ if (error) {
+ printt("update percpu_executing_key failed: %d\n", error);
+ return TC_ACT_OK;
+ }
+
+ /* the subsequent actions will be tail called. */
+ ovs_execute_actions(skb, &action_batch->actions[0]);
+
+ printt("ERROR: tail call fails\n");
+
+ /* OVS_NOT_REACHED */
+ return TC_ACT_OK;
+}
diff --git a/bpf/maps.h b/bpf/maps.h
new file mode 100644
index 000000000000..aa1c15864975
--- /dev/null
+++ b/bpf/maps.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef BPFMAP_OPENVSWITCH_H
+#define BPFMAP_OPENVSWITCH_H 1
+
+#include "api.h"
+#include "openvswitch.h"
+#include "ovs-p4.h"
+
+/* ovs-vswitchd as a writer will update these maps.
+ * bpf datapath as reader lookup and processes */
+
+/* FIXME: copy from iproute2 */
+enum {
+ BPF_MAP_ID_PROTO,
+ BPF_MAP_ID_QUEUE,
+ BPF_MAP_ID_DROPS,
+ BPF_MAP_ID_ACTION,
+ BPF_MAP_ID_INGRESS,
+ __BPF_MAP_ID_MAX,
+#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
+};
+
+/* A bpf flow key is extracted from the
+ * parser.h and saved in
+ * 1) percpu_headers, and
+ * 2) percpu_metadata
+ * Access: BPF is the only writer/reader
+ */
+BPF_PERCPU_ARRAY(percpu_headers,
+ 0,
+ sizeof(struct ebpf_headers_t),
+ 0,
+ 1
+);
+BPF_PERCPU_ARRAY(percpu_metadata,
+ 0,
+ sizeof(struct ebpf_metadata_t),
+ 0,
+ 1
+);
+
+/* BPF flow tale
+ * Access: BPF is the reader for lookup,
+ * ovs-vswitchd is the writer
+ */
+BPF_HASH(flow_table,
+ 0,
+ sizeof(struct bpf_flow_key),
+ sizeof(struct bpf_action_batch),
+ 0,
+ 256
+);
+
+/* BPF flow stats table
+ * Access: BPF is the writer for updating,
+ * ovs-vswitchd/revalidator is the reader
+ */
+BPF_HASH(dp_flow_stats,
+ 0,
+ sizeof(struct bpf_flow_key),
+ sizeof(struct bpf_flow_stats),
+ 0,
+ 256
+);
+
+/*
+ * Map for implementing the upcall, which forwards the
+ * first packet (lookup misses) to ovs-vswitchd
+ */
+BPF_PERF_OUTPUT(upcalls, 0);
+
+
+/* BPF datapath stats
+ * Access: BPF is the writer,
+ * ovs-vswitchd is the reader
+ * XXX: switch to percpu to improve performance
+ */
+BPF_ARRAY(datapath_stats,
+ 0,
+ sizeof(uint64_t),
+ 0,
+ __OVS_DP_STATS_MAX
+);
+
+/* Global tail call map:
+ * index 0-31 for actions (OVS_ACTION_ATTR_*)
+ * index 32-63 for others
+ */
+BPF_PROG_ARRAY(tailcalls,
+ 0,
+ 0,
+ 64
+);
+
+/* A dedicated action list for downcall packet.
+ * Access: ovs-vswitch is the writer,
+ * BPF is the reader
+ */
+BPF_ARRAY(execute_actions,
+ 0,
+ sizeof(struct bpf_action_batch),
+ 0,
+ 1
+);
+
+/* A dedicated key for downcall packet.
+ * Access: ovs-vswitch is the writer,
+ * BPF is the reader
+ */
+BPF_PERCPU_ARRAY(percpu_executing_key,
+ 0,
+ sizeof(struct bpf_flow_key),
+ 0,
+ 1
+);
+
+struct ebpf_headers_t;
+struct ebpf_metadata_t;
+
+static inline struct ebpf_headers_t *bpf_get_headers()
+{
+ int ebpf_zero = 0;
+ return bpf_map_lookup_elem(&percpu_headers, &ebpf_zero);
+}
+
+static inline struct ebpf_metadata_t *bpf_get_mds()
+{
+ int ebpf_zero = 0;
+ return bpf_map_lookup_elem(&percpu_metadata, &ebpf_zero);
+}
+
+#endif /* BPFMAP_OPENVSWITCH_H */
diff --git a/bpf/odp-bpf.h b/bpf/odp-bpf.h
new file mode 100644
index 000000000000..b1df3bbe6840
--- /dev/null
+++ b/bpf/odp-bpf.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef BPF_OPENVSWITCH_H
+#define BPF_OPENVSWITCH_H 1
+
+#include "odp-netlink.h"
+#include "generated_headers.h"
+
+enum ovs_upcall_cmd {
+ OVS_UPCALL_UNSPEC = OVS_PACKET_CMD_UNSPEC,
+
+ /* Kernel-to-user notifications. */
+ OVS_UPCALL_MISS = OVS_PACKET_CMD_MISS,
+ OVS_UPCALL_ACTION = OVS_PACKET_CMD_ACTION,
+
+ /* Userspace commands. */
+ OVS_UPCALL_EXECUTE = OVS_PACKET_CMD_EXECUTE,
+
+ OVS_UPCALL_DEBUG,
+};
+
+enum ovs_dbg_subtype {
+ OVS_DBG_ST_UNSPEC,
+ OVS_DBG_ST_REDIRECT,
+ __OVS_DBG_ST_MAX,
+};
+#define OVS_DBG_ST_MAX (__OVS_DBG_ST_MAX - 1)
+
+static const char *bpf_upcall_subtypes[] OVS_UNUSED = {
+ [OVS_DBG_ST_UNSPEC] = "Unspecified",
+ [OVS_DBG_ST_REDIRECT] = "Downcall redirect",
+};
+
+/* Used with 'datapath_stats' map. */
+enum ovs_bpf_dp_stats {
+ OVS_DP_STATS_UNSPEC,
+ OVS_DP_STATS_HIT,
+ OVS_DP_STATS_MISSED,
+ OVS_DP_STATS_LOST,
+ OVS_DP_STATS_FLOWS,
+ OVS_DP_STATS_MASK_HIT,
+ OVS_DP_STATS_MASKS,
+ OVS_DP_STATS_ERRORS,
+ __OVS_DP_STATS_MAX,
+};
+#define OVS_DP_STATS_MAX (__OVS_DP_STATS_MAX - 1)
+
+struct bpf_flow {
+ uint64_t value; /* XXX */
+};
+
+struct bpf_flow_stats {
+ uint64_t packet_count; /* Number of packets matched. */
+ uint64_t byte_count; /* Number of bytes matched. */
+ uint64_t used; /* Last used time (in jiffies). */
+ //spinlock_t lock; /* Lock for atomic stats update. */
+ //__be16 tcp_flags; /* Union of seen TCP flags. */
+};
+
+struct bpf_flow_key {
+ struct ebpf_headers_t headers;
+ struct ebpf_metadata_t mds;
+};
+
+struct bpf_upcall {
+ uint8_t type;
+ uint8_t subtype;
+ uint32_t ifindex; /* Incoming device */
+ uint32_t cpu;
+ uint32_t error;
+ uint32_t skb_len;
+#ifdef BPF_ENABLE_IPV6
+ uint8_t uactions[24]; /* Contains 'struct nlattr' */
+#else
+ uint8_t uactions[64];
+#endif
+ uint32_t uactions_len;
+ struct bpf_flow_key key;
+ /* Followed by 'skb_len' of packet data. */
+};
+
+#define OVS_BPF_FLAGS_TX_STACK (1 << 0)
+
+#define OVS_BPF_DOWNCALL_UNSPEC 0
+#define OVS_BPF_DOWNCALL_OUTPUT 1
+#define OVS_BPF_DOWNCALL_EXECUTE 2
+
+struct bpf_downcall {
+ uint32_t type;
+ uint32_t ifindex;
+ uint32_t debug;
+ uint32_t flags;
+ struct ebpf_metadata_t md;
+ /* Followed by packet data. */
+};
+
+#define ETH_ALEN 6
+
+#define OVS_ACTION_ATTR_UNSPEC 0
+#define OVS_ACTION_ATTR_OUTPUT 1
+#define OVS_ACTION_ATTR_USERSPACE 2
+#define OVS_ACTION_ATTR_SET 3
+#define OVS_ACTION_ATTR_PUSH_VLAN 4
+#define OVS_ACTION_ATTR_POP_VLAN 5
+#define OVS_ACTION_ATTR_SAMPLE 6
+#define OVS_ACTION_ATTR_RECIRC 7
+#define OVS_ACTION_ATTR_HASH 8
+#define OVS_ACTION_ATTR_PUSH_MPLS 9
+#define OVS_ACTION_ATTR_POP_MPLS 10
+#define OVS_ACTION_ATTR_SET_MASKED 11
+#define OVS_ACTION_ATTR_CT 12
+#define OVS_ACTION_ATTR_TRUNC 13
+#define OVS_ACTION_ATTR_PUSH_ETH 14
+#define OVS_ACTION_ATTR_POP_ETH 15
+
+#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT VLAN_CFI_MASK
+
+struct flow_key {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct ovs_action_set_tunnel {
+ /* light weight tunnel key */
+ __u32 tunnel_id; /* tunnel id is host byte order */
+ union {
+ __u32 remote_ipv4; /* host byte order */
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext;
+ __u32 tunnel_label;
+ struct gnv_opt gnvopt;
+ __u8 gnvopt_valid;
+ __u8 use_ipv6;
+};
+
+struct ovs_action_set_masked {
+ enum ovs_key_attr key_type;
+ union {
+ struct ovs_key_ethernet ether;
+ struct ovs_key_mpls mpls;
+ struct ovs_key_ipv4 ipv4;
+ struct ovs_key_ipv6 ipv6;
+ struct ovs_key_tcp tcp;
+ struct ovs_key_udp udp;
+ struct ovs_key_sctp sctp;
+ struct ovs_key_icmp icmp;
+ struct ovs_key_icmpv6 icmpv6;
+ struct ovs_key_arp arp;
+ } key;
+#if 0
+ /* BPF datapath does not support mask */
+ union {
+ struct ovs_key_ethernet ether;
+ struct ovs_key_mpls mpls;
+ struct ovs_key_ipv4 ipv4;
+ struct ovs_key_ipv6 ipv6;
+ struct ovs_key_tcp tcp;
+ struct ovs_key_udp udp;
+ struct ovs_key_sctp sctp;
+ struct ovs_key_icmp icmp;
+ struct ovs_key_icmpv6 icmpv6;
+ struct ovs_key_arp arp;
+ } mask;
+#endif
+};
+
+struct ovs_action_output {
+ uint32_t port;
+ uint32_t flags;
+};
+
+struct ovs_action_ct {
+ int commit;
+ /* XXX: Include everything in enum ovs_ct_attr. */
+};
+
+struct ovs_action_userspace {
+ __u16 nlattr_len;
+ __u8 nlattr_data[64];
+};
+
+struct bpf_action {
+ enum ovs_action_attr type; /* action type */
+ uint32_t is_set;
+ union {
+ struct ovs_action_output out; /* OVS_ACTION_ATTR_OUTPUT: 8B */
+ struct ovs_action_trunc trunc; /* OVS_ACTION_ATTR_TRUNC: 4B */
+ struct ovs_action_hash hash; /* OVS_ACTION_ATTR_HASH: 8B */
+ struct ovs_action_push_mpls mpls; /* OVS_ACTION_ATTR_PUSH_MPLS: 6B */
+ ovs_be16 ethertype; /* OVS_ACTION_ATTR_POP_MPLS: 2B */
+ struct ovs_action_push_vlan push_vlan; /* OVS_ACTION_ATTR_PUSH_VLAN: 4B */
+ /* OVS_ACTION_ATTR_POP_VLAN: 0B */
+ uint32_t recirc_id; /* OVS_ACTION_ATTR_RECIRC: 4B */
+ struct ovs_action_set_tunnel tunnel;
+ struct ovs_action_set_masked mset; /* OVS_ACTION_ATTR_SET_MASK: */
+ struct ovs_action_ct ct; /* OVS_ACTION_ATTR_CT: */
+ struct ovs_action_userspace userspace; /* OVS_ACTION_ATTR_USERSPACE: */
+
+ uint64_t aligned[16]; // make it 128 byte
+ } u;
+};
+
+#define BPF_DP_MAX_ACTION 32
+struct bpf_action_batch {
+ struct bpf_action actions[BPF_DP_MAX_ACTION];
+};
+
+#endif /* BPF_OPENVSWITCH_H */
diff --git a/bpf/openvswitch.h b/bpf/openvswitch.h
new file mode 100644
index 000000000000..602e223bd280
--- /dev/null
+++ b/bpf/openvswitch.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef __BPF_OPENVSWITCH__
+#define __BPF_OPENVSWITCH__
+#include <stdint.h>
+#include "odp-netlink.h"
+
+#ifndef BPFNL_OPENVSWITCH_H
+#define BPFNL_OPENVSWITCH_H 1
+#endif /* BPFNL_OPENVSWITCH_H */
+
+#endif /* __BPF_OPENVSWITCH__ */
diff --git a/bpf/ovs-p4.h b/bpf/ovs-p4.h
new file mode 100644
index 000000000000..49937894083a
--- /dev/null
+++ b/bpf/ovs-p4.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef BPFP4_OPENVSWITCH_H
+#define BPFP4_OPENVSWITCH_H 1
+
+#include "helpers.h"
+#include "generated_headers.h"
+/*
+ * From BCC src/cc/export/helpers.h
+ */
+#define MASK(_n) ((_n) < 64 ? (1ull << (_n)) - 1 : ((u64)-1LL))
+#define MASK128(_n) ((_n) < 128 ? ((unsigned __int128)1 << (_n)) - 1 : ((unsigned __int128)-1))
+
+static inline u16 bpf_ntohs(u16 val) {
+ /* will be recognized by gcc into rotate insn and eventually rolw 8 */
+ return (val << 8) | (val >> 8);
+}
+static inline u32 bpf_ntohl(u32 val) {
+ /* gcc will use bswapsi2 insn */
+ return __builtin_bswap32(val);
+}
+static inline u64 bpf_ntohll(u64 val) {
+ /* gcc will use bswapdi2 insn */
+ return __builtin_bswap64(val);
+}
+static inline u16 bpf_htons(u16 val) {
+ return bpf_ntohs(val);
+}
+static inline u32 bpf_htonl(u32 val) {
+ return bpf_ntohl(val);
+}
+static inline u64 bpf_htonll(u64 val) {
+ return bpf_ntohll(val);
+}
+static inline u64 load_dword(void *skb, u64 off) {
+ return ((u64)load_word(skb, off) << 32) | load_word(skb, off + 4);
+}
+
+static inline __attribute__((always_inline))
+void bpf_dins_pkt(void *pkt, u64 off, u64 bofs, u64 bsz, u64 val) {
+ // The load_xxx function does a bswap before returning the short/word/dword,
+ // so the value in register will always be host endian. However, the bytes
+ // written back need to be in network order.
+ if (bofs == 0 && bsz == 8) {
+ bpf_skb_store_bytes(pkt, off, &val, 1, 0);
+ } else if (bofs + bsz <= 8) {
+ u8 v = load_byte(pkt, off);
+ v &= ~(MASK(bsz) << (8 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (8 - (bofs + bsz)));
+ bpf_skb_store_bytes(pkt, off, &v, 1, 0);
+ } else if (bofs == 0 && bsz == 16) {
+ u16 v = bpf_htons(val);
+ bpf_skb_store_bytes(pkt, off, &v, 2, 0);
+ } else if (bofs + bsz <= 16) {
+ u16 v = load_half(pkt, off);
+ v &= ~(MASK(bsz) << (16 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (16 - (bofs + bsz)));
+ v = bpf_htons(v);
+ bpf_skb_store_bytes(pkt, off, &v, 2, 0);
+ } else if (bofs == 0 && bsz == 32) {
+ u32 v = bpf_htonl(val);
+ bpf_skb_store_bytes(pkt, off, &v, 4, 0);
+ } else if (bofs + bsz <= 32) {
+ u32 v = load_word(pkt, off);
+ v &= ~(MASK(bsz) << (32 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (32 - (bofs + bsz)));
+ v = bpf_htonl(v);
+ bpf_skb_store_bytes(pkt, off, &v, 4, 0);
+ } else if (bofs == 0 && bsz == 64) {
+ u64 v = bpf_htonll(val);
+ bpf_skb_store_bytes(pkt, off, &v, 8, 0);
+ } else if (bofs + bsz <= 64) {
+ u64 v = load_dword(pkt, off);
+ v &= ~(MASK(bsz) << (64 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (64 - (bofs + bsz)));
+ v = bpf_htonll(v);
+ bpf_skb_store_bytes(pkt, off, &v, 8, 0);
+ }
+}
+
+enum ErrorCode {
+ p4_pe_no_error,
+ p4_pe_index_out_of_bounds,
+ p4_pe_out_of_packet,
+ p4_pe_header_too_long,
+ p4_pe_header_too_short,
+ p4_pe_unhandled_select,
+ p4_pe_checksum,
+ p4_pe_too_many_encap,
+ p4_pe_ipv6_disabled,
+};
+
+#define EBPF_MASK(t, w) ((((t)(1)) << (w)) - (t)1)
+#define BYTES(w) ((w + 7) / 8)
+
+#endif
diff --git a/bpf/ovs-proto.p4 b/bpf/ovs-proto.p4
new file mode 100644
index 000000000000..c6ebdb510b75
--- /dev/null
+++ b/bpf/ovs-proto.p4
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+/* OVS P4 1.0 protocol file
+ * use bcc to generate eBPF C file
+ * see bcc project: https://github.com/iovisor/bcc.git
+ * under ~/bcc/src/cc/frontends/p4/test/
+ */
+#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
+#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
+#define ETH_P_ARP 0x0806
+#define ETH_P_IPV4 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define IPPROTO_ICMP 1
+#define IPPROTO_IGMP 2
+#define IPPROTO_TCP 6
+#define IPPROTO_UDP 17
+#define IPPROTO_GRE 47
+#define IPPROTO_SCTP 132
+
+header_type ethernet_t {
+ fields {
+ dstAddr : 48;
+ srcAddr : 48;
+ etherType : 16;
+ }
+}
+
+header_type vlan_tag_t {
+ fields {
+ pcp : 3;
+ cfi : 1;
+ vid : 12;
+ etherType : 16;
+ }
+}
+
+header_type mpls_t {
+ fields {
+ label : 20;
+ exp : 3;
+ bos : 1;
+ ttl : 8;
+ }
+}
+
+header_type arp_rarp_t {
+ fields {
+ hwType : 16;
+ protoType : 16;
+ hwAddrLen : 8;
+ protoAddrLen : 8;
+ opcode : 16;
+ }
+}
+
+header_type arp_rarp_ipv4_t {
+ fields {
+ srcHwAddr : 48;
+ srcProtoAddr : 32;
+ dstHwAddr : 48;
+ dstProtoAddr : 32;
+ }
+}
+
+header_type ipv4_t {
+ fields {
+ version : 4;
+ ihl : 4;
+ diffserv : 8;
+ totalLen : 16;
+ identification : 16;
+ flags : 3;
+ fragOffset : 13;
+ ttl : 8;
+ protocol : 8;
+ hdrChecksum : 16;
+ srcAddr : 32;
+ dstAddr: 32;
+ }
+}
+
+header_type ipv6_t {
+ fields {
+ version : 4;
+ trafficClass : 8;
+ flowLabel : 20;
+ payloadLen : 16;
+ nextHdr : 8;
+ hopLimit : 8;
+ srcAddr : 128;
+ dstAddr : 128;
+ }
+}
+
+header_type icmp_t {
+ fields {
+ typeCode : 16;
+ hdrChecksum : 16;
+ }
+}
+
+header_type tcp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ seqNo : 32;
+ ackNo : 32;
+ dataOffset : 4;
+ res : 4;
+ flags : 8;
+ window : 16;
+ checksum : 16;
+ urgentPtr : 16;
+ }
+}
+
+header_type udp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ length_ : 16;
+ checksum : 16;
+ }
+}
+
+header_type sctp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ verifTag : 32;
+ checksum : 32;
+ }
+}
+
+header_type gre_t {
+ fields {
+ C : 1;
+ R : 1;
+ K : 1;
+ S : 1;
+ s : 1;
+ recurse : 3;
+ flags : 5;
+ ver : 3;
+ proto : 16;
+ }
+}
+
+/* ----------------- metadata ---------------- */
+header_type pkt_metadata_t {
+ fields {
+ recirc_id : 32; /* Recirculation id carried with the
+ recirculating packets. 0 for packets
+ received from the wire. */
+ dp_hash : 32; /* hash value computed by the recirculation
+ action. */
+ skb_priority : 32; /* Packet priority for QoS. */
+ pkt_mark : 32; /* Packet mark. */
+ ct_state : 16; /* Connection state. */
+ ct_zone : 16; /* Connection zone. */
+ ct_mark : 32; /* Connection mark. */
+ ct_label : 128; /* Connection label. */
+ in_port : 32; /* Input port. */
+ }
+}
+
+header_type flow_tnl_t {
+ fields {
+ /* struct flow_tnl:
+ * Tunnel information used in flow key and metadata.
+ */
+ ip_dst : 32;
+ ipv6_dst : 64;
+ ip_src: 32;
+ ipv6_src : 64;
+ tun_id : 64;
+ flags : 16;
+ ip_tos : 8;
+ ip_ttl : 8;
+ tp_src : 16;
+ tp_dst : 16;
+ gbp_id : 16;
+ gbp_flags : 8;
+ pad1: 40; /* Pad to 64 bits. */
+ /* struct tun_metadata metadata; */
+ }
+}
+
+header ethernet_t ethernet;
+header ipv4_t ipv4;
+header ipv6_t ipv6;
+header arp_rarp_t arp;
+header tcp_t tcp;
+header udp_t udp;
+header icmp_t icmp;
+header vlan_tag_t vlan;
+metadata pkt_metadata_t md;
+metadata flow_tnl_t tnl_md;
+
+parser start {
+ return parse_ethernet;
+}
+
+parser parse_ethernet{
+ extract(ethernet);
+ return select(latest.etherType) {
+ ETH_P_8021Q: parse_vlan;
+ ETH_P_8021AD: parse_vlan;
+ ETH_P_ARP: parse_arp;
+ ETH_P_IPV4: parse_ipv4;
+ ETH_P_IPV6: parse_ipv6;
+ default: ingress;
+ }
+}
+
+parser parse_vlan {
+ extract(vlan);
+ return select(latest.etherType) {
+ ETH_P_ARP: parse_arp;
+ ETH_P_IPV4: parse_ipv4;
+ ETH_P_IPV6: parse_ipv6;
+ default: ingress;
+ }
+}
+
+parser parse_arp {
+ extract(arp);
+ return ingress;
+}
+
+parser parse_ipv4 {
+ extract(ipv4);
+ return select(latest.protocol) {
+ IPPROTO_TCP: parse_tcp;
+ IPPROTO_UDP: parse_udp;
+ IPPROTO_ICMP: parse_icmp;
+ default: ingress;
+ }
+}
+
+parser parse_ipv6 {
+ extract(ipv6);
+ return select(latest.nextHdr) {
+ IPPROTO_TCP: parse_tcp;
+ IPPROTO_UDP: parse_udp;
+ IPPROTO_ICMP: parse_icmp;
+ default: ingress;
+ }
+}
+
+parser parse_tcp {
+ extract(tcp);
+ return ingress;
+}
+
+parser parse_udp {
+ extract(udp);
+ return ingress;
+}
+
+parser parse_icmp {
+ extract(icmp);
+ return ingress;
+}
+/* ------------------------------------------------------------------------- */
+action nop() {}
+
+table ovs_tbl {
+ reads {
+ /* Avoid compiler optimizes out, although
+ we are not using it at all */
+ ethernet.dstAddr: exact;
+ vlan.etherType: exact;
+ ipv4.dstAddr: exact;
+ ipv6.dstAddr: exact;
+ icmp.typeCode: exact;
+ tcp.dstPort: exact;
+ udp.dstPort: exact;
+ md.in_port: exact;
+ tnl_md.tun_id: exact;
+ }
+ actions {
+ nop;
+ }
+}
+
+control ingress
+{
+ apply(ovs_tbl);
+}
+
diff --git a/bpf/parser.h b/bpf/parser.h
new file mode 100644
index 000000000000..ab43d5e30730
--- /dev/null
+++ b/bpf/parser.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+/*
+ * Protocol parser generated from P4 1.0
+ */
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+#include "maps.h"
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+
+__section_tail(PARSER_CALL)
+static int ovs_parser(struct __sk_buff* skb) {
+ struct ebpf_headers_t ebpf_headers = {};
+ struct ebpf_metadata_t ebpf_metadata = {};
+ unsigned skbOffsetInBits = 0;
+ enum ErrorCode ebpf_error = p4_pe_no_error;
+ u32 ebpf_zero = 0;
+ int offset = 0;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = data;
+
+ if ((char *)data + sizeof(*eth) > (char *)(long)skb->data_end) {
+ return 0;
+ }
+
+ ebpf_headers.valid = 0;
+ printt("proto = %x len = %d vlan_tci = %x\n",
+ eth->h_proto, skb->len, (int)skb->vlan_tci);
+ printt("skb->ingress_ifindex %d skb->ifindex %d\n",
+ skb->ingress_ifindex, skb->ifindex);
+
+ if (skb->cb[OVS_CB_ACT_IDX] != 0) {
+ printt("this is a downcall packet\n");
+ }
+
+ if (skb_load_bytes(skb, offset, &ebpf_headers.ethernet, 14) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.valid |= ETHER_VALID;
+ offset += 14;
+ skbOffsetInBits = offset * 8;
+
+ /* vlan_tci is in host byte order. */
+ if (skb->vlan_tci) {
+ ebpf_headers.vlan.tci = skb->vlan_tci | VLAN_TAG_PRESENT;
+ ebpf_headers.vlan.etherType = skb->vlan_proto;
+ ebpf_headers.valid |= VLAN_VALID;
+ printt("vlan proto %x tci %x\n", skb->vlan_proto, skb->vlan_tci);
+ }
+
+ u32 tmp_3 = eth->h_proto;
+ if (tmp_3 == 0x0081 || tmp_3 == 0xA888) {
+ if (ebpf_headers.valid & VLAN_VALID) {
+ goto parse_cvlan;
+ }
+
+ printt("Nested vlan? not supported!\n");
+ if (1) return 0;
+ if (skb->vlan_tci) {
+ goto parse_cvlan;
+ } else {
+ goto parse_vlan;
+ }
+ } if (tmp_3 == 0x0608) {
+ goto parse_arp;
+ } if (tmp_3 == 0x0008) {
+ goto parse_ipv4;
+ } if (tmp_3 == 0xDD86) {
+ goto parse_ipv6;
+ } else {
+ goto ovs_tbl_4;
+ }
+
+ parse_vlan: {
+ struct vlan_tag_t *vlan = &ebpf_headers.vlan;
+ if (skb_load_bytes(skb, offset, &vlan, 4) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ printt("parsing vlan\n");
+ offset += 4;
+ skbOffsetInBits = offset * 8;
+
+ {
+ u32 tmp_5 = ebpf_headers.vlan.etherType;
+ if (tmp_5 == 0x0608)
+ goto parse_arp;
+ if (tmp_5 == 0x0008)
+ goto parse_ipv4;
+ if (tmp_5 == 0xDD86)
+ goto parse_ipv6;
+ if (tmp_5 == 0x0081 || tmp_5 == 0xA888) {
+ printt("not support layer-3 vlan");
+ goto parse_cvlan;
+ } else
+ goto ovs_tbl_4;
+ }
+ }
+ parse_cvlan: {
+ if (skb_load_bytes(skb, offset, &ebpf_headers.cvlan, 4) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ printt("parsing cvlan\n");
+ offset += 4;
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= CVLAN_VALID;
+ u32 tmp_5 = ebpf_headers.cvlan.etherType;
+ if (tmp_5 == 0x0608)
+ goto parse_arp;
+ if (tmp_5 == 0x0008)
+ goto parse_ipv4;
+ if (tmp_5 == 0xDD86)
+ goto parse_ipv6;
+ if (tmp_5 == 0x0081) {
+ ebpf_error = p4_pe_too_many_encap;
+ goto end;
+ }
+ else
+ goto ovs_tbl_4;
+ }
+ parse_arp: {
+ struct arp_rarp_t *arp = &ebpf_headers.arp;
+ if (skb_load_bytes(skb, offset, arp, sizeof ebpf_headers.arp) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ if (arp->ar_hrd == 0x0100 &&
+ arp->ar_pro == 0x0008 &&
+ arp->ar_hln == 6 &&
+ arp->ar_pln == 4) {
+
+ printt("valid arp\n");
+ } else {
+ printt("Invalid arp\n");
+ }
+ offset += sizeof ebpf_headers.arp;
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= ARP_VALID;
+ goto ovs_tbl_4;
+ }
+ parse_ipv4: {
+ struct iphdr nh;
+ if (skb_load_bytes(skb, offset, &nh, 20) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ offset += nh.ihl * 4;
+ ebpf_headers.ipv4.ttl = nh.ttl;
+ ebpf_headers.ipv4.protocol = nh.protocol;
+ ebpf_headers.ipv4.srcAddr = nh.saddr;
+ ebpf_headers.ipv4.dstAddr = nh.daddr;
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= IPV4_VALID;
+ u32 tmp_6 = ebpf_headers.ipv4.protocol;
+ if (tmp_6 == 6)
+ goto parse_tcp;
+ if (tmp_6 == 17)
+ goto parse_udp;
+ if (tmp_6 == 1)
+ goto parse_icmp;
+ else
+ goto ovs_tbl_4;
+ }
+ parse_ipv6: {
+#ifdef BPF_ENABLE_IPV6
+ if (skb->len < BYTES(skbOffsetInBits + 4)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.ipv6.version = ((load_byte(skb, (skbOffsetInBits + 0) / 8)) >> (4)) & EBPF_MASK(u8, 4);
+ skbOffsetInBits += 4;
+ if (skb->len < BYTES(skbOffsetInBits + 8)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ //ebpf_headers.ipv6.trafficClass = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (4)) & EBPF_MASK(u16, 8);
+ ebpf_headers.ipv6.trafficClass = 0;
+ skbOffsetInBits += 8;
+ if (skb->len < BYTES(skbOffsetInBits + 20)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.ipv6.flowLabel = ((load_word(skb, (skbOffsetInBits + 0) / 8)) >> (8)) & EBPF_MASK(u32, 20);
+ skbOffsetInBits += 20;
+ if (skb->len < BYTES(skbOffsetInBits + 16)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ //ebpf_headers.ipv6.payloadLen = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ ebpf_headers.ipv6.payloadLen = 0;
+ skbOffsetInBits += 16;
+ if (skb->len < BYTES(skbOffsetInBits + 8)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.ipv6.nextHdr = ((load_byte(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ skbOffsetInBits += 8;
+ if (skb->len < BYTES(skbOffsetInBits + 8)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ //ebpf_headers.ipv6.hopLimit = ((load_byte(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ ebpf_headers.ipv6.hopLimit = 0;
+ skbOffsetInBits += 8;
+ if (skb->len < BYTES(skbOffsetInBits + 8*16*2)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ if (skb_load_bytes(skb, skbOffsetInBits/8, &ebpf_headers.ipv6.srcAddr, 32) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ skbOffsetInBits += 8*16*2;;
+ ebpf_headers.valid |= IPV6_VALID;
+ u32 tmp_7 = ebpf_headers.ipv6.nextHdr;
+ printt("ipv6 proto %d\n", tmp_7);
+ if (tmp_7 == 6)
+ goto parse_tcp;
+ if (tmp_7 == 17)
+ goto parse_udp;
+ if (tmp_7 == 58)
+ goto parse_icmpv6;
+ if (tmp_7 == 41 || tmp_7 == 43 || tmp_7 == 44 || tmp_7 == 51) {
+ printt("icmpv6 extension header not support");
+ return TC_ACT_SHOT;
+ }
+ else {
+ printt("ipv6 proto %x not parsed\n");
+ goto ovs_tbl_4;
+ }
+#else
+ ebpf_error = p4_pe_ipv6_disabled;
+ goto end;
+#endif
+ }
+ parse_tcp: {
+ if (skb_load_bytes(skb, offset, &ebpf_headers.tcp, 4) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ offset += sizeof ebpf_headers.tcp - 1;
+
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= TCP_VALID;
+ goto ovs_tbl_4;
+ }
+ parse_udp: {
+ if (skb->len < BYTES(skbOffsetInBits + 16)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.udp.srcPort = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ skbOffsetInBits += 16;
+ if (skb->len < BYTES(skbOffsetInBits + 16)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ ebpf_headers.udp.dstPort = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ skbOffsetInBits += 16;
+ if (skb->len < BYTES(skbOffsetInBits + 16)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ //ebpf_headers.udp.length_ = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ ebpf_headers.udp.length_ = 0;
+ skbOffsetInBits += 16;
+ if (skb->len < BYTES(skbOffsetInBits + 16)) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ // Remove from key
+ // ebpf_headers.udp.checksum = ((load_half(skb, (skbOffsetInBits + 0) / 8)) >> (0));
+ ebpf_headers.udp.checksum = 0;
+ skbOffsetInBits += 16;
+ ebpf_headers.valid |= UDP_VALID;
+ goto ovs_tbl_4;
+ }
+ parse_icmp: {
+ if (skb_load_bytes(skb, offset, &ebpf_headers.icmp, 2) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ printt("icmp type = %x code = %x\n", ebpf_headers.icmp.type,
+ ebpf_headers.icmp.code);
+
+#if 0 /* the ICMP packet might be ip fragment */
+ if (ebpf_headers.ipv4.flags & IP_FRAGMENT) {
+ ebpf_headers.icmp.type = 0;
+ ebpf_headers.icmp.code = 0;
+ }
+#endif
+ offset += 8;
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= ICMP_VALID;
+ goto ovs_tbl_4;
+ }
+#ifdef BPF_ENABLE_IPV6
+ parse_icmpv6: {
+ if (skb_load_bytes(skb, offset, &ebpf_headers.icmpv6,
+ sizeof(struct icmpv6_t)) < 0) {
+ ebpf_error = p4_pe_header_too_short;
+ goto end;
+ }
+ printt("icmpv6 type = %x code = %x\n", ebpf_headers.icmpv6.type,
+ ebpf_headers.icmpv6.code);
+
+ offset += 16;
+ skbOffsetInBits = offset * 8;
+ ebpf_headers.valid |= ICMPV6_VALID;
+ goto ovs_tbl_4;
+ }
+#endif
+
+ /* Most of the code are generated by P4C-EBPF
+ Manual code starts here */
+ ovs_tbl_4:
+ {
+ int ret;
+ struct bpf_tunnel_key key;
+
+ ebpf_metadata.md.skb_priority = skb->priority;
+
+ /* Don't use ovs_cb_get_ifindex(), that gets optimized into something
+ * that can't be verified. >:( */
+ if (skb->cb[OVS_CB_INGRESS]) {
+ ebpf_metadata.md.in_port = skb->ingress_ifindex;
+ }
+ if (!skb->cb[OVS_CB_INGRESS]) {
+ ebpf_metadata.md.in_port = skb->ifindex;
+ }
+ ebpf_metadata.md.pkt_mark = skb->mark;
+ ebpf_metadata.md.packet_length = skb->len;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (!ret) {
+ printt("bpf_skb_get_tunnel_key id = %d ipv4\n", key.tunnel_id);
+ ebpf_metadata.tnl_md.tun_id = key.tunnel_id;
+ ebpf_metadata.tnl_md.ip4.ip_src = key.remote_ipv4;
+ ebpf_metadata.tnl_md.ip_tos = key.tunnel_tos;
+ ebpf_metadata.tnl_md.ip_ttl = key.tunnel_ttl;
+ ebpf_metadata.tnl_md.use_ipv6 = 0;
+ ebpf_metadata.tnl_md.flags = 0;
+#ifdef BPF_ENABLE_IPV6
+ } else if (ret == -EPROTO) {
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (!ret) {
+ printt("bpf_skb_get_tunnel_key id = %d ipv6\n", key.tunnel_id);
+ ebpf_metadata.tnl_md.tun_id = key.tunnel_id;
+ memcpy(&ebpf_metadata.tnl_md.ip6.ipv6_src, &key.remote_ipv4, 16);
+ ebpf_metadata.tnl_md.ip_tos = key.tunnel_tos;
+ ebpf_metadata.tnl_md.ip_ttl = key.tunnel_ttl;
+ ebpf_metadata.tnl_md.use_ipv6 = 1;
+ ebpf_metadata.tnl_md.flags = 0;
+ }
+#endif
+ }
+
+ if (!ret) {
+ ret = bpf_skb_get_tunnel_opt(skb, &ebpf_metadata.tnl_md.gnvopt,
+ sizeof ebpf_metadata.tnl_md.gnvopt);
+ if (ret > 0)
+ ebpf_metadata.tnl_md.gnvopt_valid = 1;
+ printt("bpf_skb_get_tunnel_opt ret = %d\n", ret);
+ }
+ }
+
+end:
+ if (ebpf_error != p4_pe_no_error) {
+ printt("parse error, drop\n";);
+ return TC_ACT_SHOT;
+ }
+
+ /* write flow key and md to key map */
+ printt("Parser: updating flow key\n");
+ bpf_map_update_elem(&percpu_headers,
+ &ebpf_zero, &ebpf_headers, BPF_ANY);
+
+ if (ovs_cb_is_initial_parse(skb)) {
+ bpf_map_update_elem(&percpu_metadata,
+ &ebpf_zero, &ebpf_metadata, BPF_ANY);
+ }
+ skb->cb[OVS_CB_ACT_IDX] = 0;
+
+ /* tail call next stage */
+ printt("tail call match + lookup stage\n");
+ bpf_tail_call(skb, &tailcalls, MATCH_ACTION_CALL);
+
+ printt("[ERROR] missing tail call\n");
+ return TC_ACT_OK;
+}
diff --git a/bpf/xdp.h b/bpf/xdp.h
new file mode 100644
index 000000000000..2d2102a6ba28
--- /dev/null
+++ b/bpf/xdp.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+
+__section("xdp")
+static int xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
+{
+ /* TODO: see p4c-xdp project */
+ printt("return XDP_PASS\n");
+ return XDP_PASS;
+}
+
+__section("af_xdp")
+static int af_xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
+{
+ /* TODO: see xdpsock_kern.c ans xdpsock_user.c */
+ return XDP_PASS;
+}
--
2.7.4