Re: New helper bpf_get_current_pidns_info


Yonghong Song
 

On Thu, Sep 28, 2017 at 2:02 PM, carlos antonio neira bustos
<cneirabustos@...> wrote:
Hi All,

I'm still working this issue https://github.com/iovisor/bcc/issues/1329.
I have added tests under samples/bpf, here is test calling this new helper.

Inside the container

ping-10619 [000] d.s1 5319.547909: 0x00000001: ns_id 4026532197 tgid 10619
pid 10619

Outside the container

ping-12174 [000] d.s1 5480.582818: 0x00000001: ns_id 4026531836 tgid 12174
pid 12174


Let me know if something needs to be changed.

Thanks again for your help and comments.

Here is the patch


diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b69e7a5..34b608e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -227,6 +227,12 @@ struct bpf_event_entry {
struct rcu_head rcu;
};

+struct bpf_current_pidns_info {
+ u64 ns_id;
+ u32 tgid;
+ u32 pid;
+};
During linux plumbers conference, I talked to
Eric Biederman (kernel namespace maintainer).
The below is an example output of "stat -L /proc/self/ns/pid":

-bash-4.3$ stat -L /proc/self/ns/pid
File: '/proc/self/ns/pid'
Size: 0 Blocks: 0 IO Block: 4096 regular empty file
Device: 3h/3d Inode: 4026531836 Links: 1
Access: (0444/-r--r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
Context: system_u:object_r:nsfs_t:s0
Access: 2017-09-28 21:21:41.496571299 -0700
Modify: 2017-09-28 21:21:41.496571299 -0700
Change: 2017-09-28 21:21:41.496571299 -0700
Birth: -
-bash-4.3$

You will notice that there is a "Device" field with major and minor
number. Currently, all namespace files will have the same device.
However, in the future, it is possible (maybe under really rare
cases) that different pid_ns files may belong to different device.

So he suggests that we should put device major/minor numbers
in the pidns_info structure as well. Could you help do that as well?

+
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

@@ -375,6 +381,8 @@ extern const struct bpf_func_proto
bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;

+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
+
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e99e3e6..c1b94fa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -539,6 +539,15 @@ union bpf_attr {
* @mode: operation mode (enum bpf_adj_room_mode)
* @flags: reserved for future use
* Return: 0 on success or negative error code
+ *
+ * int bpf_get_current_pidns_info(void *buf, int size_of_buf)
+ * stores the following namespace data into
+ * bpf_current_pins_info struct:
+ * namespace id
+ * tgid inside namespace
+ * pid inside namespace
+ * Return: 0 on success or negative error
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -591,7 +600,9 @@ union bpf_attr {
FN(get_socket_uid), \
FN(set_hash), \
FN(setsockopt), \
- FN(skb_adjust_room),
+ FN(skb_adjust_room), \
+ FN(get_current_pidns_info),
+

/* integer value in 'imm' field of BPF_CALL instruction selects which
helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ad5f559..c81ffa0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1379,6 +1379,9 @@ const struct bpf_func_proto
bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;

+const struct bpf_func_proto bpf_get_current_pidns_info __weak;
+
+
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
return NULL;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3d24e23..682d623 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -179,3 +180,45 @@ const struct bpf_func_proto bpf_get_current_comm_proto
= {
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE,
};
+
+BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size)
+{
+
+ if (!buf)
+ return -EINVAL;
+
+ struct task_struct *ts = current;
+ struct task_struct *ns_task = NULL;
+ const struct cred *cred = NULL;
+ pid_t pid = 0;
+
+ if (unlikely(!ts))
+ return -EINVAL;
+
+ ((struct bpf_current_pidns_info*)buf)->ns_id =
+ ts->nsproxy->pid_ns_for_children->ns.inum;
+
+ pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children);
+
+ if (!pid)
+ return -EINVAL;
+
+ ns_task = find_task_by_pid_ns(pid,
ts->nsproxy->pid_ns_for_children);
I did not check the code. But pid name space could be deeply nested.
Have you tested the case for more than two level nesting?

+ if (unlikely(!ns_task))
+ return -EINVAL;
+
+ ((struct bpf_current_pidns_info*)buf)->tgid = ns_task->tgid;
+ ((struct bpf_current_pidns_info*)buf)->pid = ns_task->pid;
+
+ return 0;
+}
+
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dc498b6..06297e7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -487,6 +487,8 @@ static const struct bpf_func_proto
*tracing_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 87246be..0af924f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel
hostprogs-y += test_map_in_map
hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF)
lwt_len_hist_user.o
xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
-
+trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockex1_kern.o
@@ -119,6 +120,7 @@ always += tcp_bufs_kern.o
always += tcp_cong_kern.o
always += tcp_iw_kern.o
always += tcp_clamp_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
HOSTLOADLIBES_lwt_len_hist += -l elf
HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
+HOSTLOADLIBES_trace_ns_info += -lelf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on
cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc
CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/trace_ns_info_user.c
b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 0000000..0dce083
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,32 @@
+/* Copyright (c) 2017 cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n",filename);
+
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
The test does not really test return values.
The following test is probably better:
bpf prog:
get pidns_info and put it into the map.

user space app:
1. get device, nsid, pid from getpid(), "stat -L /proc/<pid>/ns/pid".
I think you could use "stat" syscall to do this as well.
2. attach bpf prog to some kernel operation, for example,
an tracepoint or kernel function.
3. add some operation to trigger the tracepoint or kernel funciton
(there are some examples in kernel:samples/bpf you can look.)
4. get the pidns_info from the bpf map and compare
the result is the same.

+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c
b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 0000000..f8ed052
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,40 @@
+/* Copyright (c) 2017 cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct pidns_info {
+ u64 ns_id;
+ u32 ns_tgid;
+ u32 ns_pid;
+};
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct pidns_info nsinfo;
+ int ok=0;
+ char fmt[] = "ns_id %lld tgid %d pid %d\n";
+ ok = bpf_get_current_pidns_info(&nsinfo,sizeof (nsinfo));
+
+ if(!ok)
+ bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id,
+ nsinfo.ns_tgid, nsinfo.ns_pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h
b/tools/testing/selftests/bpf/bpf_helpers.h
index d50ac34..202102d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int
optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;

+static int (*bpf_get_current_pidns_info)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
*/

Join iovisor-dev@lists.iovisor.org to automatically receive all group messages.