diff --git a/kernel/samples/bpf/Makefile b/kernel/samples/bpf/Makefile index a05a623..6c39177 100644 --- a/kernel/samples/bpf/Makefile +++ b/kernel/samples/bpf/Makefile @@ -5,6 +5,8 @@ # Still depend on a kernel source tree. # TARGETS := xdp_ddos01_blacklist +TARGETS += xdp_5tuple_blacklist +TARGETS += xdp_stateful TARGETS += xdp_ttl TARGETS += xdp_bench01_mem_access_cost TARGETS += xdp_bench02_drop_pattern @@ -28,6 +30,8 @@ TARGETS += xdp_vlan01 TARGETS += xdp_redirect_cpu CMDLINE_TOOLS := xdp_ddos01_blacklist_cmdline +CMDLINE_TOOLS += xdp_5tuple_blacklist_cmdline +CMDLINE_TOOLS += xdp_stateful_cmdline COMMON_H = ${CMDLINE_TOOLS:_cmdline=_common.h} # Targets that use the library bpf/libbpf @@ -131,6 +135,7 @@ clean: rm -f $(TARGETS_ALL) rm -f $(KERN_OBJECTS) rm -f $(USER_OBJECTS) + rm -f $(CMDLINE_TOOLS) make -C $(TOOLS_PATH)/lib/bpf clean dependencies: verify_llvm_target_bpf linux-src-devel-headers diff --git a/kernel/samples/bpf/xdp_5tuple_blacklist_cmdline.c b/kernel/samples/bpf/xdp_5tuple_blacklist_cmdline.c new file mode 100755 index 0000000..99f37ee --- /dev/null +++ b/kernel/samples/bpf/xdp_5tuple_blacklist_cmdline.c @@ -0,0 +1,327 @@ +/* Copyright(c) 2018 Justin Iurman + */ +static const char *__doc__= + " XDP 5tuple: command line tool"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* libbpf.h defines bpf_* function helpers for syscalls, + * indirectly via ./tools/lib/bpf/bpf.h */ +#include "libbpf.h" + +#include "bpf_util.h" + +#include "xdp_5tuple_blacklist_common.h" + +enum { + ACTION_NONE = 0, + ACTION_ADD, + ACTION_LIST, + ACTION_FLUSH +}; + +static const char *xdp_proto_filter_names[DDOS_FILTER_MAX] = { + [DDOS_FILTER_TCP] = "TCP", + [DDOS_FILTER_UDP] = "UDP", +}; + +#define DEFINED_PROTOCOL 1 +#define DEFINED_IP_SOURCE 2 +#define DEFINED_IP_DESTINATION 4 +#define DEFINED_PORT_SOURCE 8 +#define DEFINED_PORT_DESTINATION 16 + +#define DEFINED_ALL (DEFINED_PROTOCOL | DEFINED_IP_SOURCE | DEFINED_IP_DESTINATION | DEFINED_PORT_SOURCE | DEFINED_PORT_DESTINATION) + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"add", no_argument, NULL, 'a' }, + {"udp", no_argument, NULL, 'u' }, + {"tcp", no_argument, NULL, 't' }, + {"ips", required_argument, NULL, 'i' }, + {"ipd", required_argument, NULL, 'j' }, + {"sport", required_argument, NULL, 's' }, + {"dport", required_argument, NULL, 'd' }, + {"list", no_argument, NULL, 'l' }, + {"flush", no_argument, NULL, 'f' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +int open_bpf_map(const char *file) +{ + int fd; + + fd = bpf_obj_get(file); + if (fd < 0) { + printf("ERR: Failed to open bpf map file:%s err(%d):%s\n", + file, errno, strerror(errno)); + exit(EXIT_FAIL_MAP_FILE); + } + return fd; +} + +int blacklist_tuple_add(int fd, char *ip_source, char *ip_destination, int port_source, int port_destination, int protocol) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus]; + int res; + struct five_tuple key_tuple = {}; + + memset(values, 0, sizeof(__u64) * nr_cpus); + + /* Convert IP-string into 32-bit network byte-order value */ + res = inet_pton(AF_INET, ip_source, &(key_tuple.ip_source)); + res &= inet_pton(AF_INET, ip_destination, &(key_tuple.ip_destination)); + if (res <= 0) { + fprintf(stderr, + "ERR: either IPv4 \"%s\" or \"%s\" not in presentation format\n", + ip_source, ip_destination); + return EXIT_FAIL_IP; + } + + if (port_source > 65535 || port_destination > 65535) { + fprintf(stderr, + "ERR: source port \"%d\" or destination port \"%d\" invalid\n", + port_source, port_destination); + return EXIT_FAIL_PORT; + } + + /* TODO ntohl for src/dst IPs end up with no matching -> why ? */ + key_tuple.port_source = port_source; + key_tuple.port_destination = port_destination; + key_tuple.protocol = protocol; + + res = bpf_map_update_elem(fd, &key_tuple, values, BPF_NOEXIST); + if (res != 0) { /* 0 == success */ + res = (protocol == IPPROTO_UDP) ? DDOS_FILTER_UDP : DDOS_FILTER_TCP; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s sport:%d dport:%d proto:%s errno(%d/%s)", + __func__, ip_source, ip_destination, port_source, port_destination, xdp_proto_filter_names[res], errno, strerror(errno)); + + if (errno == 17) { + fprintf(stderr, ": Already in blacklist\n"); + return EXIT_OK; + } + fprintf(stderr, "\n"); + return EXIT_FAIL_MAP_KEY; + } + if (verbose) { + res = (protocol == IPPROTO_UDP) ? DDOS_FILTER_UDP : DDOS_FILTER_TCP; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s sport:%d dport:%d proto:%s\n", + __func__, ip_source, ip_destination, port_source, port_destination, xdp_proto_filter_names[res]); + } + + return EXIT_OK; +} + +static __u64 get_value64_percpu(int fd, struct five_tuple key) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus]; + __u64 sum = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed\n"); + return 0; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum += values[i]; + } + return sum; +} + +static void blacklist_print_tuple(struct five_tuple tuple, __u64 count) +{ + int res; + char ip_src[INET_ADDRSTRLEN] = {0}, ip_dst[INET_ADDRSTRLEN] = {0}; + + if (inet_ntop(AF_INET, &(tuple.ip_source), ip_src, sizeof(ip_src)) == NULL + || inet_ntop(AF_INET, &(tuple.ip_destination), ip_dst, sizeof(ip_dst)) == NULL) { + fprintf(stderr, "Error while reading current 5-tuple data\n"); + } + else { + res = (tuple.protocol == IPPROTO_UDP) ? DDOS_FILTER_UDP : DDOS_FILTER_TCP; + printf("(%s) <%s> <%s> <%hu> <%hu> : %llu\n", + xdp_proto_filter_names[res], ip_src, ip_dst, tuple.port_source, tuple.port_destination, count); + } +} + +static void blacklist_print_tuples(int fd) +{ + struct five_tuple key, next_key; + __u64 value; + + printf("(Protocol) : DROP_COUNT\n\n"); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + value = get_value64_percpu(fd, key); + blacklist_print_tuple(key, value); + } +} + +static void blacklist_flush(int fd) +{ + int res; + struct five_tuple key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + res = bpf_map_delete_elem(fd, &key); + if (res != 0) { + fprintf(stderr, "Error while deleting a tuple, flushing stopped\n"); + break; + } + } +} + +int main(int argc, char **argv) +{ + int opt; + int longindex; + unsigned int action = ACTION_NONE; + int proto, sport, dport, tmp; + __u8 defined = 0; +#define STR_MAX 16 // For trivial input validation + char _ip_src_buf[STR_MAX] = {}, _ip_dst_buf[STR_MAX] = {}; + char *ip_src, *ip_dst; + int fd_blacklist; + + while ((opt = getopt_long(argc, argv, "ahltufi:j:s:d:", + long_options, &longindex)) != -1) { + switch (opt) { + case 'a': + action = ACTION_ADD; + break; + case 'i': + case 'j': + if (!optarg || strlen(optarg) > STR_MAX) { + printf("ERR: src and/or dst ip too long or empty\n"); + goto fail_opt; + } + + if (opt == 'i') { + ip_src = (char *)&_ip_src_buf; + strncpy(ip_src, optarg, STR_MAX); + defined |= DEFINED_IP_SOURCE; + } else { + ip_dst = (char *)&_ip_dst_buf; + strncpy(ip_dst, optarg, STR_MAX); + defined |= DEFINED_IP_DESTINATION; + } + break; + case 's': + case 'd': + if (!optarg) { + printf("ERR: source and/or destination port is empty\n"); + goto fail_opt; + } + + tmp = atoi(optarg); + if (tmp < 0 || tmp > 65535) { + printf("ERR: source and/or destination port is invalid\n"); + goto fail_opt; + } + + if (opt == 's') { + sport = tmp; + defined |= DEFINED_PORT_SOURCE; + } else { + dport = tmp; + defined |= DEFINED_PORT_DESTINATION; + } + break; + case 'u': + proto = IPPROTO_UDP; + defined |= DEFINED_PROTOCOL; + break; + case 't': + proto = IPPROTO_TCP; + defined |= DEFINED_PROTOCOL; + break; + case 'l': + action = ACTION_LIST; + break; + case 'f': + action = ACTION_FLUSH; + break; + case 'h': + fail_opt: + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + + // Catch non-option arguments + if (argv[optind] != NULL) { + fprintf(stderr, "ERR: Unknown non-option argument: %s\n", + argv[optind]); + goto fail_opt; + } + + if (action == ACTION_ADD) { + if (defined != DEFINED_ALL) { + fprintf(stderr, + "ERR: missing fields in the 5-tuple\n"); + goto fail_opt; + } + + fd_blacklist = open_bpf_map(file_blacklist); + tmp = blacklist_tuple_add(fd_blacklist, ip_src, ip_dst, sport, dport, proto); + close(fd_blacklist); + return tmp; + } + else if (action == ACTION_LIST) { + fd_blacklist = open_bpf_map(file_blacklist); + blacklist_print_tuples(fd_blacklist); + close(fd_blacklist); + } + else if (action == ACTION_FLUSH) { + fd_blacklist = open_bpf_map(file_blacklist); + blacklist_flush(fd_blacklist); + close(fd_blacklist); + } + + return 0; +} diff --git a/kernel/samples/bpf/xdp_5tuple_blacklist_common.h b/kernel/samples/bpf/xdp_5tuple_blacklist_common.h new file mode 100755 index 0000000..5ad22ac --- /dev/null +++ b/kernel/samples/bpf/xdp_5tuple_blacklist_common.h @@ -0,0 +1,38 @@ +#ifndef __XDP_5TUPLE_BLACKLIST_COMMON_H +#define __XDP_5TUPLE_BLACKLIST_COMMON_H + +/* Exit return codes */ +#define EXIT_OK 0 +#define EXIT_FAIL 1 +#define EXIT_FAIL_OPTION 2 +#define EXIT_FAIL_XDP 3 +#define EXIT_FAIL_MAP 20 +#define EXIT_FAIL_MAP_KEY 21 +#define EXIT_FAIL_MAP_FILE 22 +#define EXIT_FAIL_MAP_FS 23 +#define EXIT_FAIL_IP 30 +#define EXIT_FAIL_PORT 31 + +struct five_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; + __u16 port_source; + __u16 port_destination; +}; + +enum { + DDOS_FILTER_TCP = 0, + DDOS_FILTER_UDP, + DDOS_FILTER_MAX +}; + +static int verbose = 0; + +/* Export eBPF map for 5-tuples blacklist as a file + * Gotcha need to mount: + * mount -t bpf bpf /sys/fs/bpf/ + */ +static const char *file_blacklist = "/sys/fs/bpf/5tuple_blacklist"; + +#endif diff --git a/kernel/samples/bpf/xdp_5tuple_blacklist_kern.c b/kernel/samples/bpf/xdp_5tuple_blacklist_kern.c new file mode 100755 index 0000000..68ccbc6 --- /dev/null +++ b/kernel/samples/bpf/xdp_5tuple_blacklist_kern.c @@ -0,0 +1,211 @@ +/* XDP example: DDoS protection via 5-tuple blacklist + * + * Copyright(c) 2018 Justin Iurman + */ +#define KBUILD_MODNAME "5tuple" +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct five_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; + __u16 port_source; + __u16 port_destination; +}; + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct bpf_map_def SEC("maps") blacklist_5tuple = { + .type = BPF_MAP_TYPE_PERCPU_HASH, + .key_size = sizeof(struct five_tuple), + .value_size = sizeof(u64), /* Drop counter */ + .max_entries = 100000, + .map_flags = BPF_F_NO_PREALLOC, +}; + +//#define DEBUG 1 +#ifdef DEBUG +/* Only use this for debug output. Notice output from bpf_trace_printk() + * end-up in /sys/kernel/debug/tracing/trace_pipe + */ +#define bpf_debug(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) +#else +#define bpf_debug(fmt, ...) { } while (0) +#endif + +/* Parse Ethernet layer 2, extract network layer 3 offset and protocol + * + * Returns false on error and non-supported ether-type + */ +static __always_inline +bool parse_eth(struct ethhdr *eth, void *data_end, + u16 *eth_proto, u64 *l3_offset) +{ + u16 eth_type; + u64 offset; + + offset = sizeof(*eth); + if ((void *)eth + offset > data_end) + return false; + + eth_type = eth->h_proto; + bpf_debug("Debug: eth_type:0x%x\n", ntohs(eth_type)); + + /* Skip non 802.3 Ethertypes */ + if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) + return false; + + /* Handle VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + /* Handle double VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + + *eth_proto = ntohs(eth_type); + *l3_offset = offset; + return true; +} + +bool parse_port(struct xdp_md *ctx, u8 proto, void *hdr, struct five_tuple *key_tuple) +{ + void *data_end = (void *)(long)ctx->data_end; + struct udphdr *udph; + struct tcphdr *tcph; + + switch (proto) { + case IPPROTO_UDP: + udph = hdr; + if (udph + 1 > data_end) { + bpf_debug("Invalid UDPv4 packet: L4off:%llu\n", + sizeof(struct iphdr) + sizeof(struct udphdr)); + return false; + } + key_tuple->port_source = ntohs(udph->source); + key_tuple->port_destination = ntohs(udph->dest); + break; + case IPPROTO_TCP: + tcph = hdr; + if (tcph + 1 > data_end) { + bpf_debug("Invalid TCPv4 packet: L4off:%llu\n", + sizeof(struct iphdr) + sizeof(struct tcphdr)); + return false; + } + key_tuple->port_source = ntohs(tcph->source); + key_tuple->port_destination = ntohs(tcph->dest); + break; + default: + return true; + } + + return true; +} + +static __always_inline +u32 parse_ipv4(struct xdp_md *ctx, u64 l3_offset) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + l3_offset; + struct five_tuple key_tuple = {}; /* type need to match map */ + u64 *value; /* DROP counter */ + bool check_map; + + /* Hint: +1 is sizeof(struct iphdr) */ + if (iph + 1 > data_end) { + bpf_debug("Invalid IPv4 packet: L3off:%llu\n", l3_offset); + return XDP_ABORTED; + } + + /* Extract key */ + key_tuple.protocol = iph->protocol; + key_tuple.ip_source = ntohl(iph->saddr); + key_tuple.ip_destination = ntohl(iph->daddr); + key_tuple.port_source = 0; + key_tuple.port_destination = 0; + + bpf_debug("Valid IPv4 packet: raw saddr:0x%x\n", ip_src); + + check_map = parse_port(ctx, iph->protocol, iph + 1, &key_tuple); + if (!check_map) + return XDP_PASS; + + value = bpf_map_lookup_elem(&blacklist_5tuple, &key_tuple); + if (value) { + /* Don't need __sync_fetch_and_add(); as percpu map */ + *value += 1; /* Keep a counter for drop matches */ + return XDP_DROP; + } + + return XDP_PASS; +} + +static __always_inline +u32 handle_eth_protocol(struct xdp_md *ctx, u16 eth_proto, u64 l3_offset) +{ + switch (eth_proto) { + case ETH_P_IP: + return parse_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: /* Not handler for IPv6 yet*/ + case ETH_P_ARP: /* Let OS handle ARP */ + /* Fall-through */ + default: + bpf_debug("Not handling eth_proto:0x%x\n", eth_proto); + return XDP_PASS; + } + return XDP_PASS; +} + +SEC("xdp_prog") +int xdp_program(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 action; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) { + bpf_debug("Cannot parse L2: L3off:%llu proto:0x%x\n", + l3_offset, eth_proto); + return XDP_PASS; /* Skip */ + } + bpf_debug("Reached L3: L3off:%llu proto:0x%x\n", l3_offset, eth_proto); + + action = handle_eth_protocol(ctx, eth_proto, l3_offset); + return action; +} + +char _license[] SEC("license") = "GPL"; diff --git a/kernel/samples/bpf/xdp_5tuple_blacklist_user.c b/kernel/samples/bpf/xdp_5tuple_blacklist_user.c new file mode 100755 index 0000000..b6e5156 --- /dev/null +++ b/kernel/samples/bpf/xdp_5tuple_blacklist_user.c @@ -0,0 +1,356 @@ +/* Copyright(c) 2018 Justin Iurman + */ +static const char *__doc__= + " XDP: DDoS protection via 5-tuples blacklist\n" + "\n" + "This program loads the XDP eBPF program into the kernel.\n" + "Use the cmdline tool for add/removing 5-tuples to the blacklist\n" + ; + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include /* dirname */ + +#include +#include + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +#include "xdp_5tuple_blacklist_common.h" + +static char ifname_buf[IF_NAMESIZE]; +static char *ifname = NULL; +static int ifindex = -1; + +#define NR_MAPS 1 +int maps_marked_for_export[MAX_MAPS] = { 0 }; + +static const char* map_idx_to_export_filename(int idx) +{ + const char *file = NULL; + + /* Mapping map_fd[idx] to export filenames */ + switch (idx) { + case 0: /* map_fd[0]: blacklist */ + file = file_blacklist; + break; + default: + break; + } + return file; +} + +static void remove_xdp_program(int ifindex, const char *ifname, __u32 xdp_flags) +{ + int i; + fprintf(stderr, "Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + if (ifindex > -1) + set_link_xdp_fd(ifindex, -1, xdp_flags); + + /* Remove all exported map file */ + for (i = 0; i < NR_MAPS; i++) { + const char *file = map_idx_to_export_filename(i); + + if (unlink(file) < 0) { + printf("WARN: cannot rm map(%s) file:%s err(%d):%s\n", + map_data[i].name, file, errno, strerror(errno)); + } + } +} + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"remove", no_argument, NULL, 'r' }, + {"dev", required_argument, NULL, 'd' }, + {"quiet", no_argument, NULL, 'q' }, + {"owner", required_argument, NULL, 'o' }, + {"skb-mode", no_argument, NULL, 'S' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#ifndef BPF_FS_MAGIC +# define BPF_FS_MAGIC 0xcafe4a11 +#endif + +/* Verify BPF-filesystem is mounted on given file path */ +static int bpf_fs_check_path(const char *path) +{ + struct statfs st_fs; + char *dname, *dir; + int err = 0; + + if (path == NULL) + return -EINVAL; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (statfs(dir, &st_fs)) { + fprintf(stderr, "ERR: failed to statfs %s: (%d)%s\n", + dir, errno, strerror(errno)); + err = -errno; + } + free(dname); + + if (!err && st_fs.f_type != BPF_FS_MAGIC) { + fprintf(stderr, + "ERR: specified path %s is not on BPF FS\n\n" + " You need to mount the BPF filesystem type like:\n" + " mount -t bpf bpf /sys/fs/bpf/\n\n", + path); + err = -EINVAL; + } + + return err; +} + +/* Load existing map via filesystem, if possible */ +int load_map_file(const char *file, struct bpf_map_data *map_data) +{ + int fd; + + if (bpf_fs_check_path(file) < 0) { + exit(EXIT_FAIL_MAP_FS); + } + + fd = bpf_obj_get(file); + if (fd > 0) { /* Great: map file already existed use it */ + // FIXME: Verify map size etc is the same before returning it! + // data available via map->def.XXX and fdinfo + if (verbose) + printf(" - Loaded bpf-map:%-30s from file:%s\n", + map_data->name, file); + return fd; + } + return -1; +} + +/* Map callback + * ------------ + * The bpf-ELF loader (bpf_load.c) got support[1] for a callback, just + * before creating the map (via bpf_create_map()). It allow assigning + * another FD and skips map creation. + * + * Using this to load map FD from via filesystem, if possible. One + * problem, cannot handle exporting the map here, as creation happens + * after this step. + * + * [1] kernel commit 6979bcc731f9 ("samples/bpf: load_bpf.c make + * callback fixup more flexible") + */ +void pre_load_maps_via_fs(struct bpf_map_data *map_data, int idx) +{ + /* This callback gets invoked for every map in ELF file */ + const char *file; + int fd; + + file = map_idx_to_export_filename(idx); + fd = load_map_file(file, map_data); + + if (fd > 0) { + /* Makes bpf_load.c skip creating map */ + map_data->fd = fd; + } else { + /* When map was NOT loaded from filesystem, then + * bpf_load.c will create it. Mark map idx to get + * it exported later + */ + maps_marked_for_export[idx] = 1; + } +} + +int export_map_idx(int map_idx) +{ + const char *file; + + file = map_idx_to_export_filename(map_idx); + + /* Export map as a file */ + if (bpf_obj_pin(map_fd[map_idx], file) != 0) { + fprintf(stderr, "ERR: Cannot pin map(%s) file:%s err(%d):%s\n", + map_data[map_idx].name, file, errno, strerror(errno)); + return EXIT_FAIL_MAP; + } + if (verbose) + printf(" - Export bpf-map:%-30s to file:%s\n", + map_data[map_idx].name, file); + return 0; +} + +void export_maps(void) +{ + int i; + + for (i = 0; i < NR_MAPS; i++) { + if (maps_marked_for_export[i] == 1) + export_map_idx(i); + } +} + +void chown_maps(uid_t owner, gid_t group) +{ + const char *file; + int i; + + for (i = 0; i < NR_MAPS; i++) { + file = map_idx_to_export_filename(i); + + /* Change permissions and user for the map file, as this allow + * an unpriviliged user to operate the cmdline tool. + */ + if (chown(file, owner, group) < 0) + fprintf(stderr, + "WARN: Cannot chown file:%s err(%d):%s\n", + file, errno, strerror(errno)); + } +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + bool rm_xdp_prog = false; + struct passwd *pwd = NULL; + __u32 xdp_flags = 0; + char filename[256]; + int longindex = 0; + uid_t owner = -1; /* -1 result in no-change of owner */ + gid_t group = -1; + int opt; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hSrqd:", + long_options, &longindex)) != -1) { + switch (opt) { + case 'q': + verbose = 0; + break; + case 'r': + rm_xdp_prog = true; + break; + case 'o': /* extract owner and group from username */ + if (!(pwd = getpwnam(optarg))) { + fprintf(stderr, + "ERR: unknown owner:%s err(%d):%s\n", + optarg, errno, strerror(errno)); + goto error; + } + owner = pwd->pw_uid; + group = pwd->pw_gid; + break; + case 'd': + if (strlen(optarg) >= IF_NAMESIZE) { + fprintf(stderr, "ERR: --dev name too long\n"); + goto error; + } + ifname = (char *)&ifname_buf; + strncpy(ifname, optarg, IF_NAMESIZE); + ifindex = if_nametoindex(ifname); + if (ifindex == 0) { + fprintf(stderr, + "ERR: --dev name unknown err(%d):%s\n", + errno, strerror(errno)); + goto error; + } + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'h': + error: + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + /* Required options */ + if (ifindex == -1) { + printf("ERR: required option --dev missing"); + usage(argv); + return EXIT_FAIL_OPTION; + } + if (rm_xdp_prog) { + remove_xdp_program(ifindex, ifname, xdp_flags); + return EXIT_OK; + } + if (verbose) { + printf("Documentation:\n%s\n", __doc__); + printf(" - Attached to device:%s (ifindex:%d)\n", + ifname, ifindex); + } + + /* Increase resource limits */ + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + + /* Load bpf-ELF file with callback for loading maps via filesystem */ + if (load_bpf_file_fixup_map(filename, pre_load_maps_via_fs)) { + fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + return EXIT_FAIL; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + /* Export maps that were not loaded from filesystem */ + export_maps(); + + if (owner >= 0) + chown_maps(owner, group); + + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return EXIT_FAIL_XDP; + } + + return EXIT_OK; +} diff --git a/kernel/samples/bpf/xdp_stateful_cmdline.c b/kernel/samples/bpf/xdp_stateful_cmdline.c new file mode 100755 index 0000000..9a76cb0 --- /dev/null +++ b/kernel/samples/bpf/xdp_stateful_cmdline.c @@ -0,0 +1,462 @@ +/* Copyright(c) 2018 Justin Iurman + */ +static const char *__doc__= + " XDP stateful: command line tool"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* libbpf.h defines bpf_* function helpers for syscalls, + * indirectly via ./tools/lib/bpf/bpf.h */ +#include "libbpf.h" + +#include "bpf_util.h" + +#include "xdp_stateful_common.h" + +enum { + ACTION_NONE = 0, + ACTION_ADD, + ACTION_LIST, + ACTION_LIST_RULES, + ACTION_FLUSH +}; + +static const char *xdp_proto_filter_names[PROTO_FILTER_MAX] = { + [PROTO_FILTER_TCP] = "TCP", + [PROTO_FILTER_UDP] = "UDP", + [PROTO_FILTER_OTHER] = "Other", +}; + +#define DEFINED_PROTOCOL 1 +#define DEFINED_IP_SOURCE 2 +#define DEFINED_IP_DESTINATION 4 +#define DEFINED_PORT_SOURCE 8 +#define DEFINED_PORT_DESTINATION 16 + +#define DEFINED_3TUPLE (DEFINED_PROTOCOL | DEFINED_IP_SOURCE | DEFINED_IP_DESTINATION) +#define DEFINED_5TUPLE (DEFINED_3TUPLE | DEFINED_PORT_SOURCE | DEFINED_PORT_DESTINATION) + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"add", no_argument, NULL, 'a' }, + {"udp", no_argument, NULL, 'u' }, + {"tcp", no_argument, NULL, 't' }, + {"ips", required_argument, NULL, 'i' }, + {"ipd", required_argument, NULL, 'j' }, + {"sport", required_argument, NULL, 's' }, + {"dport", required_argument, NULL, 'd' }, + {"list", no_argument, NULL, 'l' }, + {"rules", no_argument, NULL, 'r' }, + {"flush", no_argument, NULL, 'f' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +int open_bpf_map(const char *file) +{ + int fd; + + fd = bpf_obj_get(file); + if (fd < 0) { + printf("ERR: Failed to open bpf map file:%s err(%d):%s\n", + file, errno, strerror(errno)); + exit(EXIT_FAIL_MAP_FILE); + } + return fd; +} + +int add_3tuple(int fd, char *ip_source, char *ip_destination, int protocol) +{ + int res; + struct three_tuple key_tuple = {}; + __u8 action = TARGET_ACCEPT; + + /* Convert IP-string into 32-bit network byte-order value */ + res = inet_pton(AF_INET, ip_source, &(key_tuple.ip_source)); + res &= inet_pton(AF_INET, ip_destination, &(key_tuple.ip_destination)); + if (res <= 0) { + fprintf(stderr, + "ERR: either IPv4 \"%s\" or \"%s\" not in presentation format\n", + ip_source, ip_destination); + return EXIT_FAIL_IP; + } + + key_tuple.protocol = protocol; + + res = bpf_map_update_elem(fd, &key_tuple, &action, BPF_NOEXIST); + if (res != 0) { /* 0 == success */ + res = (protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s proto:%s errno(%d/%s)", + __func__, ip_source, ip_destination, xdp_proto_filter_names[res], errno, strerror(errno)); + + if (errno == 17) { + fprintf(stderr, ": Already in 3-tuples\n"); + return EXIT_OK; + } + fprintf(stderr, "\n"); + return EXIT_FAIL_MAP_KEY; + } + if (verbose) { + res = (protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s proto:%s\n", + __func__, ip_source, ip_destination, xdp_proto_filter_names[res]); + } + + return EXIT_OK; +} + +int add_5tuple(int fd, char *ip_source, char *ip_destination, int port_source, int port_destination, int protocol) +{ + int res; + struct five_tuple key_tuple = {}; + __u8 action = TARGET_DROP; + + /* Convert IP-string into 32-bit network byte-order value */ + res = inet_pton(AF_INET, ip_source, &(key_tuple.ip_source)); + res &= inet_pton(AF_INET, ip_destination, &(key_tuple.ip_destination)); + if (res <= 0) { + fprintf(stderr, + "ERR: either IPv4 \"%s\" or \"%s\" not in presentation format\n", + ip_source, ip_destination); + return EXIT_FAIL_IP; + } + + if (port_source > 65535 || port_destination > 65535) { + fprintf(stderr, + "ERR: source port \"%d\" or destination port \"%d\" invalid\n", + port_source, port_destination); + return EXIT_FAIL_PORT; + } + + /* TODO ntohl for src/dst IPs end up with no matching -> why ? */ + key_tuple.port_source = port_source; + key_tuple.port_destination = port_destination; + key_tuple.protocol = protocol; + + res = bpf_map_update_elem(fd, &key_tuple, &action, BPF_NOEXIST); + if (res != 0) { /* 0 == success */ + res = (protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s sport:%d dport:%d proto:%s errno(%d/%s)", + __func__, ip_source, ip_destination, port_source, port_destination, xdp_proto_filter_names[res], errno, strerror(errno)); + + if (errno == 17) { + fprintf(stderr, ": Already in 5-tuples\n"); + return EXIT_OK; + } + fprintf(stderr, "\n"); + return EXIT_FAIL_MAP_KEY; + } + if (verbose) { + res = (protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + fprintf(stderr, + "%s() IPsource:%s IPdest:%s sport:%d dport:%d proto:%s\n", + __func__, ip_source, ip_destination, port_source, port_destination, xdp_proto_filter_names[res]); + } + + return EXIT_OK; +} + +static void print_3tuple(struct three_tuple tuple, __u8 action) +{ + int res; + char ip_src[INET_ADDRSTRLEN] = {0}, ip_dst[INET_ADDRSTRLEN] = {0}; + + if (inet_ntop(AF_INET, &(tuple.ip_source), ip_src, sizeof(ip_src)) == NULL + || inet_ntop(AF_INET, &(tuple.ip_destination), ip_dst, sizeof(ip_dst)) == NULL) { + fprintf(stderr, "Error while reading current 3-tuple data\n"); + } + else { + res = (tuple.protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (tuple.protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + printf("(%s) <%s> <%s> : %s\n", + xdp_proto_filter_names[res], ip_src, ip_dst, action == TARGET_DROP ? "XDP_DROP" : "XDP_PASS"); + } +} + +static void print_5tuple(struct five_tuple tuple, __u8 action) +{ + int res; + char ip_src[INET_ADDRSTRLEN] = {0}, ip_dst[INET_ADDRSTRLEN] = {0}; + + if (inet_ntop(AF_INET, &(tuple.ip_source), ip_src, sizeof(ip_src)) == NULL + || inet_ntop(AF_INET, &(tuple.ip_destination), ip_dst, sizeof(ip_dst)) == NULL) { + fprintf(stderr, "Error while reading current 5-tuple data\n"); + } + else { + res = (tuple.protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (tuple.protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + printf("(%s) <%s> <%s> <%hu> <%hu> : %s\n", + xdp_proto_filter_names[res], ip_src, ip_dst, tuple.port_source, tuple.port_destination, action == TARGET_DROP ? "XDP_DROP" : "XDP_PASS"); + } +} + +static void print_conntrack(struct five_tuple tuple, struct flow_state state) +{ + int res; + char ip_src[INET_ADDRSTRLEN] = {0}, ip_dst[INET_ADDRSTRLEN] = {0}; + + if (inet_ntop(AF_INET, &(tuple.ip_source), ip_src, sizeof(ip_src)) == NULL + || inet_ntop(AF_INET, &(tuple.ip_destination), ip_dst, sizeof(ip_dst)) == NULL) { + fprintf(stderr, "Error while reading current conntrack 5-tuple data\n"); + } + else { + res = (tuple.protocol == IPPROTO_UDP) ? PROTO_FILTER_UDP : (tuple.protocol == IPPROTO_TCP) ? PROTO_FILTER_TCP : PROTO_FILTER_OTHER; + printf("(%s) <%s> <%s> <%hu> <%hu> : %llu (%llu) %u\n", + xdp_proto_filter_names[res], ip_src, ip_dst, tuple.port_source, tuple.port_destination, state.counter, state.timestamp, state.tcp_flags); + } +} + +static void print_3tuples(int fd) +{ + struct three_tuple key, next_key; + __u8 action; + + printf("(Protocol) : ACTION\n"); + printf("===============================================\n"); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + bpf_map_lookup_elem(fd, &key, &action); + print_3tuple(key, action); + } +} + +static void print_5tuples(int fd) +{ + struct five_tuple key, next_key; + __u8 action; + + printf("\n(Protocol) : ACTION\n"); + printf("=====================================================================\n"); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + bpf_map_lookup_elem(fd, &key, &action); + print_5tuple(key, action); + } +} + +static void print_conntracks(int fd) +{ + struct five_tuple key, next_key; + struct flow_state state; + + printf("\n(Protocol) : COUNT (TIMESTAMP) TCP_FLAGS\n"); + printf("================================================================================\n"); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + bpf_map_lookup_elem(fd, &key, &state); + print_conntrack(key, state); + } +} + +static void flush_3tuples(int fd) +{ + struct three_tuple key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + if (bpf_map_delete_elem(fd, &key) != 0) { + fprintf(stderr, "Error while deleting a 3-tuple, flushing stopped\n"); + break; + } + } +} + +static void flush_5tuples(int fd) +{ + struct five_tuple key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + if (bpf_map_delete_elem(fd, &key) != 0) { + fprintf(stderr, "Error while deleting a 5-tuple, flushing stopped\n"); + break; + } + } +} + +static void flush_conntrack(int fd) +{ + struct five_tuple key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + key = next_key; + if (bpf_map_delete_elem(fd, &key) != 0) { + fprintf(stderr, "Error while deleting a conn track, flushing stopped\n"); + break; + } + } +} + +int main(int argc, char **argv) +{ + int opt; + int longindex; + unsigned int action = ACTION_NONE; + int proto, sport, dport, tmp; + __u8 defined = 0; +#define STR_MAX 16 // For trivial input validation + char _ip_src_buf[STR_MAX] = {}, _ip_dst_buf[STR_MAX] = {}; + char *ip_src, *ip_dst; + int fd; + + while ((opt = getopt_long(argc, argv, "ahlrtufi:j:s:d:", + long_options, &longindex)) != -1) { + switch (opt) { + case 'a': + action = ACTION_ADD; + break; + case 'i': + case 'j': + if (!optarg || strlen(optarg) > STR_MAX) { + printf("ERR: src and/or dst ip too long or empty\n"); + goto fail_opt; + } + + if (opt == 'i') { + ip_src = (char *)&_ip_src_buf; + strncpy(ip_src, optarg, STR_MAX); + defined |= DEFINED_IP_SOURCE; + } else { + ip_dst = (char *)&_ip_dst_buf; + strncpy(ip_dst, optarg, STR_MAX); + defined |= DEFINED_IP_DESTINATION; + } + break; + case 's': + case 'd': + if (!optarg) { + printf("ERR: source and/or destination port is empty\n"); + goto fail_opt; + } + + tmp = atoi(optarg); + if (tmp < 0 || tmp > 65535) { + printf("ERR: source and/or destination port is invalid\n"); + goto fail_opt; + } + + if (opt == 's') { + sport = tmp; + defined |= DEFINED_PORT_SOURCE; + } else { + dport = tmp; + defined |= DEFINED_PORT_DESTINATION; + } + break; + case 'u': + proto = IPPROTO_UDP; + defined |= DEFINED_PROTOCOL; + break; + case 't': + proto = IPPROTO_TCP; + defined |= DEFINED_PROTOCOL; + break; + case 'l': + action = ACTION_LIST; + break; + case 'r': + action = ACTION_LIST_RULES; + break; + case 'f': + action = ACTION_FLUSH; + break; + case 'h': + fail_opt: + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + + // Catch non-option arguments + if (argv[optind] != NULL) { + fprintf(stderr, "ERR: Unknown non-option argument: %s\n", + argv[optind]); + goto fail_opt; + } + + if (action == ACTION_ADD) { + if (defined != DEFINED_3TUPLE && defined != DEFINED_5TUPLE) { + fprintf(stderr, + "ERR: missing fields in the 3-tuple or 5-tuple\n"); + goto fail_opt; + } + + if (defined == DEFINED_3TUPLE) { + fd = open_bpf_map(file_three_tuple); + tmp = add_3tuple(fd, ip_src, ip_dst, proto); + tmp = add_3tuple(fd, ip_dst, ip_src, proto); + } else { + fd = open_bpf_map(file_five_tuple); + tmp = add_5tuple(fd, ip_src, ip_dst, sport, dport, proto); + tmp = add_5tuple(fd, ip_dst, ip_src, dport, sport, proto); + } + + close(fd); + return tmp; + } + else if (action == ACTION_LIST) { + fd = open_bpf_map(file_conn_track); + print_conntracks(fd); + close(fd); + } + else if (action == ACTION_LIST_RULES) { + fd = open_bpf_map(file_three_tuple); + print_3tuples(fd); + close(fd); + + fd = open_bpf_map(file_five_tuple); + print_5tuples(fd); + close(fd); + } + else if (action == ACTION_FLUSH) { + fd = open_bpf_map(file_conn_track); + flush_conntrack(fd); + close(fd); + + fd = open_bpf_map(file_three_tuple); + flush_3tuples(fd); + close(fd); + + fd = open_bpf_map(file_five_tuple); + flush_5tuples(fd); + close(fd); + } + + return 0; +} diff --git a/kernel/samples/bpf/xdp_stateful_common.h b/kernel/samples/bpf/xdp_stateful_common.h new file mode 100755 index 0000000..6f63165 --- /dev/null +++ b/kernel/samples/bpf/xdp_stateful_common.h @@ -0,0 +1,59 @@ +#ifndef __XDP_STATEFUL_COMMON_H +#define __XDP_STATEFUL_COMMON_H + +/* Exit return codes */ +#define EXIT_OK 0 +#define EXIT_FAIL 1 +#define EXIT_FAIL_OPTION 2 +#define EXIT_FAIL_XDP 3 +#define EXIT_FAIL_MAP 20 +#define EXIT_FAIL_MAP_KEY 21 +#define EXIT_FAIL_MAP_FILE 22 +#define EXIT_FAIL_MAP_FS 23 +#define EXIT_FAIL_IP 30 +#define EXIT_FAIL_PORT 31 + +struct three_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; +}; + +struct five_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; + __u16 port_source; + __u16 port_destination; +}; + +struct flow_state { + __u64 timestamp; + __u8 tcp_flags; + __u64 counter; +}; + +enum { + PROTO_FILTER_TCP = 0, + PROTO_FILTER_UDP, + PROTO_FILTER_OTHER, + PROTO_FILTER_MAX +}; + +enum { + TARGET_DROP = 0, + TARGET_ACCEPT, + TARGET_MAX +}; + +static int verbose = 0; + +/* Export eBPF map for stateful 3-tuple and 5-tuple as a file + * Gotcha need to mount: + * mount -t bpf bpf /sys/fs/bpf/ + */ +static const char *file_conn_track = "/sys/fs/bpf/stateful_conn_track"; +static const char *file_three_tuple = "/sys/fs/bpf/stateful_three_tuple"; +static const char *file_five_tuple = "/sys/fs/bpf/stateful_five_tuple"; + +#endif diff --git a/kernel/samples/bpf/xdp_stateful_kern.c b/kernel/samples/bpf/xdp_stateful_kern.c new file mode 100755 index 0000000..f55c294 --- /dev/null +++ b/kernel/samples/bpf/xdp_stateful_kern.c @@ -0,0 +1,315 @@ +/* XDP Stateful + * + * Copyright(c) 2018 Justin Iurman + */ +#define KBUILD_MODNAME "stateful" +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +enum { + TARGET_DROP = 0, + TARGET_ACCEPT, + TARGET_MAX +}; + +struct three_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; +}; + +struct five_tuple { + __u8 protocol; + __u32 ip_source; + __u32 ip_destination; + __u16 port_source; + __u16 port_destination; +}; + +struct flow_state { + __u64 timestamp; + __u8 tcp_flags; + __u64 counter; +}; + +struct tmp_ports { + __u16 src; + __u16 dst; +}; + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct bpf_map_def SEC("maps") stateful_conn_track = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct five_tuple), + .value_size = sizeof(struct flow_state), + .max_entries = 100000, + .map_flags = BPF_F_NO_PREALLOC, +}; + +struct bpf_map_def SEC("maps") stateful_three_tuple = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct three_tuple), + .value_size = sizeof(__u8), + .max_entries = 100000, + .map_flags = BPF_F_NO_PREALLOC, +}; + +struct bpf_map_def SEC("maps") stateful_five_tuple = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct five_tuple), + .value_size = sizeof(__u8), + .max_entries = 100000, + .map_flags = BPF_F_NO_PREALLOC, +}; + +//#define DEBUG 1 +#ifdef DEBUG +/* Only use this for debug output. Notice output from bpf_trace_printk() + * end-up in /sys/kernel/debug/tracing/trace_pipe + */ +#define bpf_debug(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) +#else +#define bpf_debug(fmt, ...) { } +#endif + +/* Parse Ethernet layer 2, extract network layer 3 offset and protocol + * + * Returns false on error and non-supported ether-type + */ +static __always_inline +bool parse_eth(struct ethhdr *eth, void *data_end, + u16 *eth_proto, u64 *l3_offset) +{ + u16 eth_type; + u64 offset; + + offset = sizeof(*eth); + if ((void *)eth + offset > data_end) + return false; + + eth_type = eth->h_proto; + //bpf_debug("Debug: eth_type:0x%x\n", ntohs(eth_type)); + + /* Skip non 802.3 Ethertypes */ + if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) + return false; + + /* Handle VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + /* Handle double VLAN tagged packet */ + if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + + *eth_proto = ntohs(eth_type); + *l3_offset = offset; + return true; +} + +bool extract_l4_data(struct xdp_md *ctx, u8 proto, void *hdr, struct tmp_ports *ports, u8 *tcp_flags) +{ + void *data_end = (void *)(long)ctx->data_end; + struct udphdr *udph; + struct tcphdr *tcph; + + switch (proto) { + case IPPROTO_UDP: + udph = hdr; + if (udph + 1 > data_end) { + bpf_debug("Invalid UDPv4 packet: L4off:%llu\n", + sizeof(struct iphdr) + sizeof(struct udphdr)); + return false; + } + ports->src = ntohs(udph->source); + ports->dst = ntohs(udph->dest); + *tcp_flags = 0; + break; + case IPPROTO_TCP: + tcph = hdr; + if (tcph + 1 > data_end) { + bpf_debug("Invalid TCPv4 packet: L4off:%llu\n", + sizeof(struct iphdr) + sizeof(struct tcphdr)); + return false; + } + ports->src = ntohs(tcph->source); + ports->dst = ntohs(tcph->dest); + *tcp_flags = ((u8 *)tcph)[13]; + break; + default: + return true; + } + + return true; +} + +bool lookup_flow(struct five_tuple *key, u8 tcp_flags) +{ + struct flow_state *state = bpf_map_lookup_elem(&stateful_conn_track, key); + if (state) + { + state->timestamp = bpf_ktime_get_ns(); + state->tcp_flags |= tcp_flags; + state->counter++; + + return true; + } + + return false; +} + +bool lookup_match(struct bpf_map_def *map, void *key, u8 *action) +{ + u8 *target = bpf_map_lookup_elem(map, key); + if (target) + { + if (*target != *action) + *action = *target; + + return true; + } + + return false; +} + +void add_flow_entry(struct five_tuple key, u8 tcp_flags) +{ + u16 tmp_port = key.port_source; + u32 tmp_ip = key.ip_source; + + struct flow_state state = {}; + state.timestamp = bpf_ktime_get_ns(); + state.tcp_flags |= tcp_flags; + state.counter = 1L; + + bpf_map_update_elem(&stateful_conn_track, &key, &state, BPF_ANY); + + key.port_source = key.port_destination; + key.ip_source = key.ip_destination; + key.port_destination = tmp_port; + key.ip_destination = tmp_ip; + + state.timestamp = 0L; + state.tcp_flags = 0; + state.counter = 0L; + + bpf_map_update_elem(&stateful_conn_track, &key, &state, BPF_ANY); +} + +static __always_inline +u32 parse_ipv4(struct xdp_md *ctx, u64 l3_offset) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + l3_offset; + struct three_tuple key_three_tuple = {}; + struct five_tuple key_five_tuple = {}; + u8 action = TARGET_ACCEPT; + struct tmp_ports ports = {}; + u32 srcip=0, dstip=0; + u8 tcp_flags = 0; + bool matched; + + /* Hint: +1 is sizeof(struct iphdr) */ + if (iph + 1 > data_end) { + bpf_debug("Invalid IPv4 packet: L3off:%llu\n", l3_offset); + return XDP_ABORTED; + } + + srcip = ntohl(iph->saddr); + dstip = ntohl(iph->daddr); + + if (!extract_l4_data(ctx, iph->protocol, iph + 1, &ports, &tcp_flags)) + return XDP_ABORTED; + + bpf_debug("Packet: (proto %u) sport = %u, dport = %u\n", iph->protocol, ports.src, ports.dst); + + // 3-tuple (both sides) + key_three_tuple.protocol = iph->protocol; + key_three_tuple.ip_source = srcip; + key_three_tuple.ip_destination = dstip; + matched = lookup_match(&stateful_three_tuple, &key_three_tuple, &action); + + // 5-tuple (both sides) + key_five_tuple.protocol = iph->protocol; + key_five_tuple.ip_source = srcip; + key_five_tuple.ip_destination = dstip; + key_five_tuple.port_source = ports.src; + key_five_tuple.port_destination = ports.dst; + matched |= lookup_match(&stateful_five_tuple, &key_five_tuple, &action); + + // If matched, Flow tracking based on 5-tuple + if (matched && !lookup_flow(&key_five_tuple, tcp_flags)) + { + add_flow_entry(key_five_tuple, tcp_flags); + } + + return (action == TARGET_DROP) ? XDP_DROP : XDP_PASS; +} + +static __always_inline +u32 handle_eth_protocol(struct xdp_md *ctx, u16 eth_proto, u64 l3_offset) +{ + switch (eth_proto) { + case ETH_P_IP: + return parse_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: /* Not handler for IPv6 yet*/ + case ETH_P_ARP: /* Let OS handle ARP */ + /* Fall-through */ + default: + //bpf_debug("Not handling eth_proto:0x%x\n", eth_proto); + return XDP_PASS; + } + return XDP_PASS; +} + +SEC("xdp_prog") +int xdp_program(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u16 eth_proto; + u64 l3_offset; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) { + bpf_debug("Cannot parse L2: L3off:%llu proto:0x%x\n", + l3_offset, eth_proto); + return XDP_PASS; /* Skip */ + } + //bpf_debug("Reached L3: L3off:%llu proto:0x%x\n", l3_offset, eth_proto); + + return handle_eth_protocol(ctx, eth_proto, l3_offset); +} + +char _license[] SEC("license") = "GPL"; diff --git a/kernel/samples/bpf/xdp_stateful_user.c b/kernel/samples/bpf/xdp_stateful_user.c new file mode 100755 index 0000000..651f5f2 --- /dev/null +++ b/kernel/samples/bpf/xdp_stateful_user.c @@ -0,0 +1,371 @@ +/* Copyright(c) 2018 Justin Iurman + */ +static const char *__doc__= + " XDP: Stateful\n" + "\n" + "This program loads the XDP eBPF program into the kernel.\n" + "Use the cmdline tool for add/removing rules\n" + ; + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include /* dirname */ + +#include +#include + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +#include "xdp_stateful_common.h" + +#define MAX_NB_INTF 4 +static int ifindex[MAX_NB_INTF]; +static char interfaces[MAX_NB_INTF][IF_NAMESIZE]; +static int nb_intf; + +#define NR_MAPS 3 +int maps_marked_for_export[MAX_MAPS] = { 0 }; + +static const char* map_idx_to_export_filename(int idx) +{ + const char *file = NULL; + + /* Mapping map_fd[idx] to export filenames */ + switch (idx) { + case 0: + file = file_conn_track; + break; + case 1: + file = file_three_tuple; + break; + case 2: + file = file_five_tuple; + break; + default: + break; + } + return file; +} + +static void remove_xdp_program(__u32 xdp_flags) +{ + int i; + for(i=0; i -1) + set_link_xdp_fd(ifindex[i], -1, xdp_flags); + } + + /* Remove all exported map file */ + for (i = 0; i < NR_MAPS; i++) { + const char *file = map_idx_to_export_filename(i); + + if (unlink(file) < 0) { + printf("WARN: cannot rm map(%s) file:%s err(%d):%s\n", + map_data[i].name, file, errno, strerror(errno)); + } + } +} + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"remove", no_argument, NULL, 'r' }, + {"dev", required_argument, NULL, 'd' }, + {"quiet", no_argument, NULL, 'q' }, + {"owner", required_argument, NULL, 'o' }, + {"skb-mode", no_argument, NULL, 'S' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#ifndef BPF_FS_MAGIC +# define BPF_FS_MAGIC 0xcafe4a11 +#endif + +/* Verify BPF-filesystem is mounted on given file path */ +static int bpf_fs_check_path(const char *path) +{ + struct statfs st_fs; + char *dname, *dir; + int err = 0; + + if (path == NULL) + return -EINVAL; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (statfs(dir, &st_fs)) { + fprintf(stderr, "ERR: failed to statfs %s: (%d)%s\n", + dir, errno, strerror(errno)); + err = -errno; + } + free(dname); + + if (!err && st_fs.f_type != BPF_FS_MAGIC) { + fprintf(stderr, + "ERR: specified path %s is not on BPF FS\n\n" + " You need to mount the BPF filesystem type like:\n" + " mount -t bpf bpf /sys/fs/bpf/\n\n", + path); + err = -EINVAL; + } + + return err; +} + +/* Load existing map via filesystem, if possible */ +int load_map_file(const char *file, struct bpf_map_data *map_data) +{ + int fd; + + if (bpf_fs_check_path(file) < 0) { + exit(EXIT_FAIL_MAP_FS); + } + + fd = bpf_obj_get(file); + if (fd > 0) { /* Great: map file already existed use it */ + // FIXME: Verify map size etc is the same before returning it! + // data available via map->def.XXX and fdinfo + if (verbose) + printf(" - Loaded bpf-map:%-30s from file:%s\n", + map_data->name, file); + return fd; + } + return -1; +} + +/* Map callback + * ------------ + * The bpf-ELF loader (bpf_load.c) got support[1] for a callback, just + * before creating the map (via bpf_create_map()). It allow assigning + * another FD and skips map creation. + * + * Using this to load map FD from via filesystem, if possible. One + * problem, cannot handle exporting the map here, as creation happens + * after this step. + * + * [1] kernel commit 6979bcc731f9 ("samples/bpf: load_bpf.c make + * callback fixup more flexible") + */ +void pre_load_maps_via_fs(struct bpf_map_data *map_data, int idx) +{ + /* This callback gets invoked for every map in ELF file */ + const char *file; + int fd; + + file = map_idx_to_export_filename(idx); + fd = load_map_file(file, map_data); + + if (fd > 0) { + /* Makes bpf_load.c skip creating map */ + map_data->fd = fd; + } else { + /* When map was NOT loaded from filesystem, then + * bpf_load.c will create it. Mark map idx to get + * it exported later + */ + maps_marked_for_export[idx] = 1; + } +} + +int export_map_idx(int map_idx) +{ + const char *file; + + file = map_idx_to_export_filename(map_idx); + + /* Export map as a file */ + if (bpf_obj_pin(map_fd[map_idx], file) != 0) { + fprintf(stderr, "ERR: Cannot pin map(%s) file:%s err(%d):%s\n", + map_data[map_idx].name, file, errno, strerror(errno)); + return EXIT_FAIL_MAP; + } + if (verbose) + printf(" - Export bpf-map:%-30s to file:%s\n", + map_data[map_idx].name, file); + return 0; +} + +void export_maps(void) +{ + int i; + + for (i = 0; i < NR_MAPS; i++) { + if (maps_marked_for_export[i] == 1) + export_map_idx(i); + } +} + +void chown_maps(uid_t owner, gid_t group) +{ + const char *file; + int i; + + for (i = 0; i < NR_MAPS; i++) { + file = map_idx_to_export_filename(i); + + /* Change permissions and user for the map file, as this allow + * an unpriviliged user to operate the cmdline tool. + */ + if (chown(file, owner, group) < 0) + fprintf(stderr, + "WARN: Cannot chown file:%s err(%d):%s\n", + file, errno, strerror(errno)); + } +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + bool rm_xdp_prog = false; + struct passwd *pwd = NULL; + __u32 xdp_flags = 0; + char filename[256]; + int longindex = 0; + uid_t owner = -1; /* -1 result in no-change of owner */ + gid_t group = -1; + int opt, i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hSrqd:", + long_options, &longindex)) != -1) { + switch (opt) { + case 'q': + verbose = 0; + break; + case 'r': + rm_xdp_prog = true; + break; + case 'o': /* extract owner and group from username */ + if (!(pwd = getpwnam(optarg))) { + fprintf(stderr, + "ERR: unknown owner:%s err(%d):%s\n", + optarg, errno, strerror(errno)); + goto error; + } + owner = pwd->pw_uid; + group = pwd->pw_gid; + break; + case 'd': + if (nb_intf >= MAX_NB_INTF) { + fprintf(stderr, "ERR: --dev maximum 4 interfaces\n"); + goto error; + } else if (strlen(optarg) >= IF_NAMESIZE) { + fprintf(stderr, "ERR: --dev name too long\n"); + goto error; + } + strncpy(interfaces[nb_intf], optarg, IF_NAMESIZE); + ifindex[nb_intf] = if_nametoindex(interfaces[nb_intf]); + if (ifindex[nb_intf] == 0) { + fprintf(stderr, + "ERR: --dev name unknown err(%d):%s\n", + errno, strerror(errno)); + goto error; + } + nb_intf++; + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'h': + error: + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + /* Required options */ + if (nb_intf == 0) { + printf("ERR: required option --dev missing"); + usage(argv); + return EXIT_FAIL_OPTION; + } + if (rm_xdp_prog) { + remove_xdp_program(xdp_flags); + return EXIT_OK; + } + if (verbose) { + printf("Documentation:\n%s\n", __doc__); + for(i=0; i= 0) + chown_maps(owner, group); + + for(i=0; i