
neirac
Hi All,
I have added tests under samples/bpf, here is test calling this new helper. Inside the container ping-10619 [000] d.s1 5319.547909: 0x00000001: ns_id 4026532197 tgid 10619 pid 10619 Outside the container ping-12174 [000] d.s1 5480.582818: 0x00000001: ns_id 4026531836 tgid 12174 pid 12174
Let me know if something needs to be changed. Thanks again for your help and comments. Here is the patch
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5..34b608e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -227,6 +227,12 @@ struct bpf_event_entry { struct rcu_head rcu; }; +struct bpf_current_pidns_info { + u64 ns_id; + u32 tgid; + u32 pid; +}; + u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -375,6 +381,8 @@ extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto; + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6..c1b94fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -539,6 +539,15 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_get_current_pidns_info(void *buf, int size_of_buf) + * stores the following namespace data into + * bpf_current_pins_info struct: + * namespace id + * tgid inside namespace + * pid inside namespace + * Return: 0 on success or negative error + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -591,7 +600,9 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(get_current_pidns_info), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad5f559..c81ffa0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1379,6 +1379,9 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; +const struct bpf_func_proto bpf_get_current_pidns_info __weak; + + const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { return NULL; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3d24e23..682d623 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/uidgid.h> #include <linux/filter.h> +#include <linux/pid_namespace.h> /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -179,3 +180,45 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE, }; + +BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size) +{ + + if (!buf) + return -EINVAL; + + struct task_struct *ts = current; + struct task_struct *ns_task = NULL; + const struct cred *cred = NULL; + pid_t pid = 0; + + if (unlikely(!ts)) + return -EINVAL; + + ((struct bpf_current_pidns_info*)buf)->ns_id = + ts->nsproxy->pid_ns_for_children->ns.inum; + + pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children); + + if (!pid) + return -EINVAL; + + ns_task = find_task_by_pid_ns(pid, ts->nsproxy->pid_ns_for_children); + + if (unlikely(!ns_task)) + return -EINVAL; + + ((struct bpf_current_pidns_info*)buf)->tgid = ns_task->tgid; + ((struct bpf_current_pidns_info*)buf)->pid = ns_task->pid; + + return 0; +} + + +const struct bpf_func_proto bpf_get_current_pidns_info_proto = { + .func = bpf_get_current_pidns_info, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b6..06297e7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -487,6 +487,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_str_proto; + case BPF_FUNC_get_current_pidns_info: + return &bpf_get_current_pidns_info_proto; default: return NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87246be..0af924f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops +hostprogs-y += trace_ns_info # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o - +trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o @@ -119,6 +120,7 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += trace_ns_info_user_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_trace_ns_info += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c new file mode 100644 index 0000000..0dce083 --- /dev/null +++ b/samples/bpf/trace_ns_info_user.c @@ -0,0 +1,32 @@ +/* Copyright (c) 2017 cneirabustos@... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); + printf("loading %s\n",filename); + + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping localhost", "r"); + (void) f; + read_trace_pipe(); + return 0; +} diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c new file mode 100644 index 0000000..f8ed052 --- /dev/null +++ b/samples/bpf/trace_ns_info_user_kern.c @@ -0,0 +1,40 @@ +/* Copyright (c) 2017 cneirabustos@... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct pidns_info { + u64 ns_id; + u32 ns_tgid; + u32 ns_pid; +}; + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + struct pidns_info nsinfo; + int ok=0; + char fmt[] = "ns_id %lld tgid %d pid %d\n"; + ok = bpf_get_current_pidns_info(&nsinfo,sizeof (nsinfo)); + + if(!ok) + bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id, + nsinfo.ns_tgid, nsinfo.ns_pid); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d50ac34..202102d 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; +static int (*bpf_get_current_pidns_info)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_pidns_info; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */
|
On Thu, Sep 28, 2017 at 2:02 PM, carlos antonio neira bustos <cneirabustos@...> wrote: Hi All,
I'm still working this issue https://github.com/iovisor/bcc/issues/1329. I have added tests under samples/bpf, here is test calling this new helper.
Inside the container
ping-10619 [000] d.s1 5319.547909: 0x00000001: ns_id 4026532197 tgid 10619 pid 10619
Outside the container
ping-12174 [000] d.s1 5480.582818: 0x00000001: ns_id 4026531836 tgid 12174 pid 12174
Let me know if something needs to be changed.
Thanks again for your help and comments.
Here is the patch
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5..34b608e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -227,6 +227,12 @@ struct bpf_event_entry { struct rcu_head rcu; };
+struct bpf_current_pidns_info { + u64 ns_id; + u32 tgid; + u32 pid; +}; During linux plumbers conference, I talked to Eric Biederman (kernel namespace maintainer). The below is an example output of "stat -L /proc/self/ns/pid": -bash-4.3$ stat -L /proc/self/ns/pid File: '/proc/self/ns/pid' Size: 0 Blocks: 0 IO Block: 4096 regular empty file Device: 3h/3d Inode: 4026531836 Links: 1 Access: (0444/-r--r--r--) Uid: ( 0/ root) Gid: ( 0/ root) Context: system_u:object_r:nsfs_t:s0 Access: 2017-09-28 21:21:41.496571299 -0700 Modify: 2017-09-28 21:21:41.496571299 -0700 Change: 2017-09-28 21:21:41.496571299 -0700 Birth: - -bash-4.3$ You will notice that there is a "Device" field with major and minor number. Currently, all namespace files will have the same device. However, in the future, it is possible (maybe under really rare cases) that different pid_ns files may belong to different device. So he suggests that we should put device major/minor numbers in the pidns_info structure as well. Could you help do that as well? + u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -375,6 +381,8 @@ extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto; + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6..c1b94fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -539,6 +539,15 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_get_current_pidns_info(void *buf, int size_of_buf) + * stores the following namespace data into + * bpf_current_pins_info struct: + * namespace id + * tgid inside namespace + * pid inside namespace + * Return: 0 on success or negative error + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -591,7 +600,9 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(get_current_pidns_info), +
/* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad5f559..c81ffa0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1379,6 +1379,9 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak; + + const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { return NULL; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3d24e23..682d623 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/uidgid.h> #include <linux/filter.h> +#include <linux/pid_namespace.h>
/* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -179,3 +180,45 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE, }; + +BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size) +{ + + if (!buf) + return -EINVAL; + + struct task_struct *ts = current; + struct task_struct *ns_task = NULL; + const struct cred *cred = NULL; + pid_t pid = 0; + + if (unlikely(!ts)) + return -EINVAL; + + ((struct bpf_current_pidns_info*)buf)->ns_id = + ts->nsproxy->pid_ns_for_children->ns.inum; + + pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children); + + if (!pid) + return -EINVAL; + + ns_task = find_task_by_pid_ns(pid, ts->nsproxy->pid_ns_for_children);
I did not check the code. But pid name space could be deeply nested. Have you tested the case for more than two level nesting? + if (unlikely(!ns_task)) + return -EINVAL; + + ((struct bpf_current_pidns_info*)buf)->tgid = ns_task->tgid; + ((struct bpf_current_pidns_info*)buf)->pid = ns_task->pid; + + return 0; +} + + +const struct bpf_func_proto bpf_get_current_pidns_info_proto = { + .func = bpf_get_current_pidns_info, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b6..06297e7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -487,6 +487,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_str_proto; + case BPF_FUNC_get_current_pidns_info: + return &bpf_get_current_pidns_info_proto; default: return NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87246be..0af924f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops +hostprogs-y += trace_ns_info
# Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o - +trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o @@ -119,6 +120,7 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += trace_ns_info_user_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_trace_ns_info += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c new file mode 100644 index 0000000..0dce083 --- /dev/null +++ b/samples/bpf/trace_ns_info_user.c @@ -0,0 +1,32 @@ +/* Copyright (c) 2017 cneirabustos@... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); + printf("loading %s\n",filename); + + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping localhost", "r"); + (void) f; + read_trace_pipe(); The test does not really test return values. The following test is probably better: bpf prog: get pidns_info and put it into the map. user space app: 1. get device, nsid, pid from getpid(), "stat -L /proc/<pid>/ns/pid". I think you could use "stat" syscall to do this as well. 2. attach bpf prog to some kernel operation, for example, an tracepoint or kernel function. 3. add some operation to trigger the tracepoint or kernel funciton (there are some examples in kernel:samples/bpf you can look.) 4. get the pidns_info from the bpf map and compare the result is the same. + return 0; +} diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c new file mode 100644 index 0000000..f8ed052 --- /dev/null +++ b/samples/bpf/trace_ns_info_user_kern.c @@ -0,0 +1,40 @@ +/* Copyright (c) 2017 cneirabustos@... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct pidns_info { + u64 ns_id; + u32 ns_tgid; + u32 ns_pid; +}; + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + struct pidns_info nsinfo; + int ok=0; + char fmt[] = "ns_id %lld tgid %d pid %d\n"; + ok = bpf_get_current_pidns_info(&nsinfo,sizeof (nsinfo)); + + if(!ok) + bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id, + nsinfo.ns_tgid, nsinfo.ns_pid); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d50ac34..202102d 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt;
+static int (*bpf_get_current_pidns_info)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_pidns_info; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */
|

neirac
Hi,
I'll add the device major/minor numbers into the pidns_info structure and work on those 4 points. Thanks for your guidance on this.
Bests
toggle quoted message
Show quoted text
On Sat, Sep 30, 2017 at 9:55 PM, Y Song <ys114321@...> wrote: On Thu, Sep 28, 2017 at 2:02 PM, carlos antonio neira bustos
< cneirabustos@...> wrote:
> Hi All,
>
> I'm still working this issue https://github.com/iovisor/bcc/issues/1329.
> I have added tests under samples/bpf, here is test calling this new helper.
>
> Inside the container
>
> ping-10619 [000] d.s1 5319.547909: 0x00000001: ns_id 4026532197 tgid 10619
> pid 10619
>
> Outside the container
>
> ping-12174 [000] d.s1 5480.582818: 0x00000001: ns_id 4026531836 tgid 12174
> pid 12174
>
>
> Let me know if something needs to be changed.
>
> Thanks again for your help and comments.
>
> Here is the patch
>
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index b69e7a5..34b608e 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -227,6 +227,12 @@ struct bpf_event_entry {
> struct rcu_head rcu;
> };
>
> +struct bpf_current_pidns_info {
> + u64 ns_id;
> + u32 tgid;
> + u32 pid;
> +};
During linux plumbers conference, I talked to
Eric Biederman (kernel namespace maintainer).
The below is an example output of "stat -L /proc/self/ns/pid":
-bash-4.3$ stat -L /proc/self/ns/pid
File: '/proc/self/ns/pid'
Size: 0 Blocks: 0 IO Block: 4096 regular empty file
Device: 3h/3d Inode: 4026531836 Links: 1
Access: (0444/-r--r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Context: system_u:object_r:nsfs_t:s0
Access: 2017-09-28 21:21:41.496571299 -0700
Modify: 2017-09-28 21:21:41.496571299 -0700
Change: 2017-09-28 21:21:41.496571299 -0700
Birth: -
-bash-4.3$
You will notice that there is a "Device" field with major and minor
number. Currently, all namespace files will have the same device.
However, in the future, it is possible (maybe under really rare
cases) that different pid_ns files may belong to different device.
So he suggests that we should put device major/minor numbers
in the pidns_info structure as well. Could you help do that as well?
> +
> u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
> u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>
> @@ -375,6 +381,8 @@ extern const struct bpf_func_proto
> bpf_skb_vlan_push_proto;
> extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
> extern const struct bpf_func_proto bpf_get_stackid_proto;
>
> +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
> +
> /* Shared helpers among cBPF and eBPF. */
> void bpf_user_rnd_init_once(void);
> u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e99e3e6..c1b94fa 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -539,6 +539,15 @@ union bpf_attr {
> * @mode: operation mode (enum bpf_adj_room_mode)
> * @flags: reserved for future use
> * Return: 0 on success or negative error code
> + *
> + * int bpf_get_current_pidns_info(void *buf, int size_of_buf)
> + * stores the following namespace data into
> + * bpf_current_pins_info struct:
> + * namespace id
> + * tgid inside namespace
> + * pid inside namespace
> + * Return: 0 on success or negative error
> + *
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -591,7 +600,9 @@ union bpf_attr {
> FN(get_socket_uid), \
> FN(set_hash), \
> FN(setsockopt), \
> - FN(skb_adjust_room),
> + FN(skb_adjust_room), \
> + FN(get_current_pidns_info),
> +
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which
> helper
> * function eBPF program intends to call
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index ad5f559..c81ffa0 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1379,6 +1379,9 @@ const struct bpf_func_proto
> bpf_get_current_pid_tgid_proto __weak;
> const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> const struct bpf_func_proto bpf_get_current_comm_proto __weak;
>
> +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
> +
> +
> const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> {
> return NULL;
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 3d24e23..682d623 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,7 @@
> #include <linux/sched.h>
> #include <linux/uidgid.h>
> #include <linux/filter.h>
> +#include <linux/pid_namespace.h>
>
> /* If kernel subsystem is allowing eBPF programs to call this function,
> * inside its own verifier_ops->get_func_proto() callback it should return
> @@ -179,3 +180,45 @@ const struct bpf_func_proto bpf_get_current_comm_proto
> = {
> .arg1_type = ARG_PTR_TO_UNINIT_MEM,
> .arg2_type = ARG_CONST_SIZE,
> };
> +
> +BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size)
> +{
> +
> + if (!buf)
> + return -EINVAL;
> +
> + struct task_struct *ts = current;
> + struct task_struct *ns_task = NULL;
> + const struct cred *cred = NULL;
> + pid_t pid = 0;
> +
> + if (unlikely(!ts))
> + return -EINVAL;
> +
> + ((struct bpf_current_pidns_info*)buf)->ns_id =
> + ts->nsproxy->pid_ns_for_children->ns.inum;
> +
> + pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children);
> +
> + if (!pid)
> + return -EINVAL;
> +
> + ns_task = find_task_by_pid_ns(pid,
> ts->nsproxy->pid_ns_for_children);
>
I did not check the code. But pid name space could be deeply nested.
Have you tested the case for more than two level nesting?
> + if (unlikely(!ns_task))
> + return -EINVAL;
> +
> + ((struct bpf_current_pidns_info*)buf)-> tgid = ns_task->tgid;
> + ((struct bpf_current_pidns_info*)buf)-> pid = ns_task->pid;
> +
> + return 0;
> +}
> +
> +
> +const struct bpf_func_proto bpf_get_current_pidns_info_ proto = {
> + .func = bpf_get_current_pidns_info,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_UNINIT_MEM,
> + .arg2_type = ARG_CONST_SIZE,
> +};
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index dc498b6..06297e7 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -487,6 +487,8 @@ static const struct bpf_func_proto
> *tracing_func_proto(enum bpf_func_id func_id)
> return &bpf_get_prandom_u32_proto;
> case BPF_FUNC_probe_read_str:
> return &bpf_probe_read_str_proto;
> + case BPF_FUNC_get_current_pidns_ info:
> + return &bpf_get_current_pidns_info_ proto;
> default:
> return NULL;
> }
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 87246be..0af924f 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel
> hostprogs-y += test_map_in_map
> hostprogs-y += per_socket_stats_example
> hostprogs-y += load_sock_ops
> +hostprogs-y += trace_ns_info
>
> # Libbpf dependencies
> LIBBPF := ../../tools/lib/bpf/bpf.o
> @@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF)
> lwt_len_hist_user.o
> xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
> test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
> per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
> -
> +trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o
> # Tell kbuild to always build the programs
> always := $(hostprogs-y)
> always += sockex1_kern.o
> @@ -119,6 +120,7 @@ always += tcp_bufs_kern.o
> always += tcp_cong_kern.o
> always += tcp_iw_kern.o
> always += tcp_clamp_kern.o
> +always += trace_ns_info_user_kern.o
>
> HOSTCFLAGS += -I$(objtree)/usr/include
> HOSTCFLAGS += -I$(srctree)/tools/lib/
> @@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
> HOSTLOADLIBES_lwt_len_hist += -l elf
> HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
> HOSTLOADLIBES_test_map_in_map += -lelf
> +HOSTLOADLIBES_trace_ns_info += -lelf
>
> # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on
> cmdline:
> # make samples/bpf/ LLC=~/git/llvm/build/bin/llc
> CLANG=~/git/llvm/build/bin/ clang
> diff --git a/samples/bpf/trace_ns_info_ user.c
> b/samples/bpf/trace_ns_info_ user.c
> new file mode 100644
> index 0000000..0dce083
> --- /dev/null
> +++ b/samples/bpf/trace_ns_info_ user.c
> @@ -0,0 +1,32 @@
> +/* Copyright (c) 2017 cneirabustos@...
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +
> +#include <stdio.h>
> +#include <linux/bpf.h>
> +#include <unistd.h>
> +#include "libbpf.h"
> +#include "bpf_load.h"
> +
> +int main(int ac, char **argv)
> +{
> + FILE *f;
> + char filename[256];
> +
> + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
> + printf("loading %s\n",filename);
> +
> +
> + if (load_bpf_file(filename)) {
> + printf("%s", bpf_log_buf);
> + return 1;
> + }
> +
> + f = popen("taskset 1 ping localhost", "r");
> + (void) f;
> + read_trace_pipe();
The test does not really test return values.
The following test is probably better:
bpf prog:
get pidns_info and put it into the map.
user space app:
1. get device, nsid, pid from getpid(), "stat -L /proc/<pid>/ns/pid".
I think you could use "stat" syscall to do this as well.
2. attach bpf prog to some kernel operation, for example,
an tracepoint or kernel function.
3. add some operation to trigger the tracepoint or kernel funciton
(there are some examples in kernel:samples/bpf you can look.)
4. get the pidns_info from the bpf map and compare
the result is the same.
> + return 0;
> +}
> diff --git a/samples/bpf/trace_ns_info_ user_kern.c
> b/samples/bpf/trace_ns_info_ user_kern.c
> new file mode 100644
> index 0000000..f8ed052
> --- /dev/null
> +++ b/samples/bpf/trace_ns_info_ user_kern.c
> @@ -0,0 +1,40 @@
> +/* Copyright (c) 2017 cneirabustos@...
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#include <linux/skbuff.h>
> +#include <linux/netdevice.h>
> +#include <linux/version.h>
> +#include <uapi/linux/bpf.h>
> +#include "bpf_helpers.h"
> +
> +struct pidns_info {
> + u64 ns_id;
> + u32 ns_tgid;
> + u32 ns_pid;
> +};
> +
> +/* kprobe is NOT a stable ABI
> + * kernel functions can be removed, renamed or completely change semantics.
> + * Number of arguments and their positions can change, etc.
> + * In such case this bpf+kprobe example will no longer be meaningful
> + */
> +SEC("kprobe/__netif_receive_ skb_core")
> +int bpf_prog1(struct pt_regs *ctx)
> +{
> + struct pidns_info nsinfo;
> + int ok=0;
> + char fmt[] = "ns_id %lld tgid %d pid %d\n";
> + ok = bpf_get_current_pidns_info(& nsinfo,sizeof (nsinfo));
> +
> + if(!ok)
> + bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id,
> + nsinfo.ns_tgid, nsinfo.ns_pid);
> +
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> +u32 _version SEC("version") = LINUX_VERSION_CODE;
> diff --git a/tools/testing/selftests/bpf/ bpf_helpers.h
> b/tools/testing/selftests/bpf/ bpf_helpers.h
> index d50ac34..202102d 100644
> --- a/tools/testing/selftests/bpf/ bpf_helpers.h
> +++ b/tools/testing/selftests/bpf/ bpf_helpers.h
> @@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int
> optname, void *optval,
> int optlen) =
> (void *) BPF_FUNC_setsockopt;
>
> +static int (*bpf_get_current_pidns_info)( void *buf, int buf_size) =
> + (void *) BPF_FUNC_get_current_pidns_ info;
> /* llvm builtin functions that eBPF C program may use to
> * emit BPF_LD_ABS and BPF_LD_IND instructions
> */
|

neirac
Hi,
I have resumed work on issue https://github.com/iovisor/bcc/issues/1329. I just added the needed fields in this helper structure and also I updated the bcc code to use this helper, but I have a couple of questions, regarding my implementation.
- Should I be reading /proc/self/ns/pid using vfst_stat?.
- For Major/Minor I'm not sure if I should be getting the value from stat.dev or stat.rdev.
I'm missing to put bcc in a container and replicate the actual issue, but I wanted to share what I have at this point and check if I'm in the right direction or something else is needed in this same helper.
BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size) { struct task_struct *ts = current; struct task_struct *ns_task = NULL; struct pid_namespace *pidns = NULL; pid_t pid = 0; int res = 0;
const char ppath[] = "/proc/self/ns/pid";
mm_segment_t oldsegfs; struct kstat stat;
if (unlikely(!ts)) return -EINVAL;
pidns = task_active_pid_ns(ts);
if (unlikely(!pidns)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->ns_id = (u64) pidns->ns.inum;
pid = task_pid_nr_ns(ts, pidns);
if (unlikely(!pid)) return -EINVAL;
ns_task = find_task_by_pid_ns(pid, pidns);
if (unlikely(!ns_task)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->tgid = (s32)ns_task->tgid; ((struct bpf_current_pidns_info*)buf)->pid = (s32) ns_task->pid;
oldsegfs = get_fs(); set_fs(KERNEL_DS); res = vfs_stat((const char __user*)&ppath[0], &stat); set_fs(oldsegfs);
if(unlikely(res)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->major = (u32) MAJOR(stat.dev); ((struct bpf_current_pidns_info*)buf)->minor = (u32) MINOR(stat.dev);
return 0; }
const struct bpf_func_proto bpf_get_current_pidns_info_proto = { .func = bpf_get_current_pidns_info, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, };
|

neirac
Hi All,
Just tested the code last night, and it did not work. Now I have fixed it and tested in a container, it's finally working except for the the device major and minor info.
Here is the updated code for helper get_current_pidns_info
BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size) { struct task_struct *ts = current; struct pid_namespace *pidns = NULL; pid_t pid = 0; pid_t tgid = 0; int res = 0; const char *ppath = "/proc/self/ns/pid"; mm_segment_t oldsegfs; struct kstat stat;
if (unlikely(!ts)) return -EINVAL;
pidns = task_active_pid_ns(ts);
if (unlikely(!pidns)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->ns_id = (u64) pidns->ns.inum;
pid = task_pid_nr_ns(ts, pidns);
if (unlikely(!pid)) return -EINVAL;
tgid = task_tgid_nr_ns(ts, pidns);
if (unlikely(!tgid)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->tgid = (s32)tgid; ((struct bpf_current_pidns_info*)buf)->pid = (s32) pid;
oldsegfs = get_fs(); set_fs(KERNEL_DS); res = vfs_stat((const char __user*)ppath, &stat); set_fs(oldsegfs);
if(unlikely(res)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->major = (u32) MAJOR(stat.dev); ((struct bpf_current_pidns_info*)buf)->minor = (u32) MINOR(stat.dev);
return 0; }
const struct bpf_func_proto bpf_get_current_pidns_info_proto = { .func = bpf_get_current_pidns_info, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, };
And a BCC test program
#!/usr/bin/python
from bcc import BPF from os import getpid import sys, os
devinfo= os.lstat('/proc/self/ns/pid'); # Get major and minor device number # define BPF program prog = """ struct nsinfo_t { u64 ns; s32 pid; s32 tgid; s32 major; s32 minor; };
BPF_HASH(nsdata,struct nsinfo_t);
int collect_ns(void *ctx) { struct nsinfo_t ns = {}; u64 zero=0, *val; bpf_get_current_pidns_info(&ns,sizeof (ns)); val = nsdata.lookup_or_init(&ns,&zero); (*val)++; return 0; } """
# load BPF program b = BPF(text=prog) b.attach_kprobe(event="sys_read", fn_name="collect_ns") fo = open("./test.py", "r")
minor = os.minor(devinfo.st_dev) major = os.major(devinfo.st_dev) pid = getpid(); pidns = os.readlink('/proc/%d/ns/pid' % pid).split('[')[-1][:-1]
print ('STARTING: user space pid %s ns %s major %s minor %s' % (pid, pidns, major , minor))
#exit() while 1: try: fo.read(20) nsdata = b.get_table('nsdata'); for k, v in nsdata.items(): if k.pid == pid : print ( 'OK : reading matched pid %s ns %s major %s minor %s' % (k.pid, k.ns, k.major, k.minor )) exit() else: print ( 'MAP: pid %s and ns %s' % (k.pid, k.ns)) except ValueError: continue
For testing I created a container using lxd, and copy bcc and kernel modules needed to it, the output of this test inside the container is :
root@test:~# ./test.py
STARTING: user space pid 403 ns 4026532211 major 0 minor 49 MAP: pid 1569 and ns 4026531836 MAP: pid 935 and ns 4026531836 MAP: pid 1596 and ns 4026531836 OK : reading matched pid 403 ns 4026532211 major 0 minor 3
it also works in root namespace
neirac@debian-dev:~$ sudo ./test2.py STARTING: user space pid 1708 ns 4026531836 major 0 minor 4 MAP: pid 1553 and ns 4026531836 OK : reading matched pid 1708 ns 4026531836 major 0 minor 3
But I still I have the following doubts
- Should I be reading /proc/self/ns/pid using vfst_stat?. - I'm using the Major/Minor macros but it seems it's not working Am I pointing to the wrong dev_t ?.
I'll put the full patch on the github issue for further analysis.
Bests
toggle quoted message
Show quoted text
On Thu, Jul 26, 2018 at 6:20 PM, cnb <cneirabustos@...> wrote: Hi,
I have resumed work on issue https://github.com/iovisor/bcc/issues/1329. I just added the needed fields in this helper structure and also I updated the bcc code to use this helper, but I have a couple of questions, regarding my implementation.
- Should I be reading /proc/self/ns/pid using vfst_stat?. - For Major/Minor I'm not sure if I should be getting the value from stat.dev or stat.rdev.
I'm missing to put bcc in a container and replicate the actual issue, but I wanted to share what I have at this point and check if I'm in the right direction or something else is needed in this same helper.
BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size)
{ struct task_struct *ts = current; struct task_struct *ns_task = NULL;
struct pid_namespace *pidns = NULL; pid_t pid = 0; int res = 0; const char ppath[] = "/proc/self/ns/pid";
mm_segment_t oldsegfs; struct kstat stat;
if (unlikely(!ts)) return -EINVAL;
pidns = task_active_pid_ns(ts);
if (unlikely(!pidns)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->ns_id = (u64) pidns->ns.inum;
pid = task_pid_nr_ns(ts, pidns);
if (unlikely(!pid)) return -EINVAL;
ns_task = find_task_by_pid_ns(pid, pidns);
if (unlikely(!ns_task)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->tgid = (s32)ns_task->tgid; ((struct bpf_current_pidns_info*)buf)->pid = (s32) ns_task->pid;
oldsegfs = get_fs(); set_fs(KERNEL_DS); res = vfs_stat((const char __user*)&ppath[0], &stat); set_fs(oldsegfs);
if(unlikely(res)) return -EINVAL;
((struct bpf_current_pidns_info*)buf)->major = (u32) MAJOR(stat.dev); ((struct bpf_current_pidns_info*)buf)->minor = (u32) MINOR(stat.dev);
return 0; }
const struct bpf_func_proto bpf_get_current_pidns_info_proto = { .func = bpf_get_current_pidns_info, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, };
|