[PATCH] BPF: helpers: New helper bpf_get_current_pidns_info to obtain namespace data from current task.


neirac
 

Hi,

I have modified this patch following suggestion on bpf-next, let me know if
this looks ok. I'm working on testing it on bpf-next current (4.20) but I'm
getting -EINVAL in bpf_load when executing and example(this used to work in 4.18).



From 7aba2dadeca5a95b0eb5c54ae102014e1e3b1210 Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Tue, 6 Nov 2018 18:00:52 -0300
Subject: [PATCH] [PATCH bpf-next] BPF: New helper to obtain namespace data
from current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>)
return 0;
Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
u32 nsid = pidns.nsid;
if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 20 ++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 68 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 ++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 42 +++++++++++++++++++
tools/include/uapi/linux/bpf.h | 22 ++++++++++
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 196 insertions(+), 1 deletion(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 33014ae73103..2abd879cca0d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -835,6 +835,7 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a..6af707a2f77e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2257,6 +2257,18 @@ union bpf_attr {
*
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2346,6 +2358,7 @@ union bpf_attr {
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
FN(sk_release), \
+ FN(get_current_pidns_info), \
FN(map_push_elem), \
FN(map_pop_elem), \
FN(map_peek_elem), \
@@ -2941,5 +2954,10 @@ struct bpf_flow_keys {
};
};
};
-
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a796e0799ec..cb447e081772 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1838,6 +1838,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info_proto __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..fe653e869da2 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,11 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/namei.h>
+#include <linux/version.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -268,3 +273,66 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
};
#endif
#endif
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *nspid = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ struct kstat ks;
+ struct path kp;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ kern_path(nspid, 0, &kp);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
+ res = vfs_getattr(&kp, &ks, STATX_ALL, 0);
+#else
+ res = vfs_getattr(&kp, &ks);
+#endif
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = ks.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 08fcfe440c63..477300fa0d60 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -565,6 +565,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..ffa0b4af0659 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -167,6 +169,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..cc53f2950114
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a..f87bbc4efc4f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2257,6 +2257,18 @@ union bpf_attr {
*
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2346,6 +2358,7 @@ union bpf_attr {
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
FN(sk_release), \
+ FN(get_current_pidns_info), \
FN(map_push_elem), \
FN(map_pop_elem), \
FN(map_peek_elem), \
@@ -2942,4 +2955,13 @@ struct bpf_flow_keys {
};
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/ns/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 686e57ce40f4..36b15ca39483 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -168,6 +168,9 @@ static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
(void *) BPF_FUNC_skb_vlan_push;
static int (*bpf_skb_vlan_pop)(void *ctx) =
(void *) BPF_FUNC_skb_vlan_pop;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0

On Wed, Aug 08, 2018 at 10:07:33PM -0700, Y Song wrote:
On Wed, Aug 8, 2018 at 6:32 PM, Carlos Neira <cneirabustos@...> wrote:
Yonghong Song,
Let me know what you think of this last patch
Looks good. Only one comment in the below. After you addressed it, you can
send the patch to netdev with patch prefix [PATCH bpf-next].



From 49d9dbcbd52c82dc44b6351ac1824538a6db155a Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Wed, 8 Aug 2018 17:40:54 -0400
Subject: [PATCH] BPF: helpers: New helper to obtain namespace data from
current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>)
return 0;

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
u32 nsid = pidns.nsid;
if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 25 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
Please move this definition to the end of file.

+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0




On Wed, 2018-08-08 at 20:01 -0400, carlos antonio neira bustos wrote:
---------- Forwarded message ---------
From: Quentin Monnet <quentin.monnet@...>
Date: Wed, Aug 8, 2018, 10:56 AM
Subject: Re: [iovisor-dev] [PATCH] BPF: helpers: New helper
bpf_get_current_pidns_info to obtain namespace data from current
task.
To: Y Song <ys114321@...>, Carlos Neira <cneirabustos@...
Cc: IO Visor - dev <iovisor-dev@...>, Eric Biederman <e
biederm@...>


2018-08-07 16:03 UTC-0700 ~ Yonghong Song <ys114321@...>
On Tue, Aug 7, 2018 at 3:47 PM, Carlos Neira <cneirabustos@...
m> wrote:
Quentin,

Thanks a lot for your feedback, I think I'm getting closer to
finish this one.
I have included the changes suggested, let me know if there is
anything else that needs to be changed or improved.

Thanks for the changes!

Best regards,
Quentin


Yonghong Song
 

On Wed, Aug 8, 2018 at 6:32 PM, Carlos Neira <cneirabustos@...> wrote:
Yonghong Song,
Let me know what you think of this last patch
Looks good. Only one comment in the below. After you addressed it, you can
send the patch to netdev with patch prefix [PATCH bpf-next].



From 49d9dbcbd52c82dc44b6351ac1824538a6db155a Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Wed, 8 Aug 2018 17:40:54 -0400
Subject: [PATCH] BPF: helpers: New helper to obtain namespace data from
current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>)
return 0;

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
u32 nsid = pidns.nsid;
if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 25 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
Please move this definition to the end of file.

+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0




On Wed, 2018-08-08 at 20:01 -0400, carlos antonio neira bustos wrote:
---------- Forwarded message ---------
From: Quentin Monnet <quentin.monnet@...>
Date: Wed, Aug 8, 2018, 10:56 AM
Subject: Re: [iovisor-dev] [PATCH] BPF: helpers: New helper
bpf_get_current_pidns_info to obtain namespace data from current
task.
To: Y Song <ys114321@...>, Carlos Neira <cneirabustos@...
Cc: IO Visor - dev <iovisor-dev@...>, Eric Biederman <e
biederm@...>


2018-08-07 16:03 UTC-0700 ~ Yonghong Song <ys114321@...>
On Tue, Aug 7, 2018 at 3:47 PM, Carlos Neira <cneirabustos@...
m> wrote:
Quentin,

Thanks a lot for your feedback, I think I'm getting closer to
finish this one.
I have included the changes suggested, let me know if there is
anything else that needs to be changed or improved.

Thanks for the changes!

Best regards,
Quentin


neirac
 

Yonghong Song,
Let me know what you think of this last patch

From 49d9dbcbd52c82dc44b6351ac1824538a6db155a Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Wed, 8 Aug 2018 17:40:54 -0400
Subject: [PATCH] BPF: helpers: New helper to obtain namespace data from
current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>)
return 0;

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
u32 nsid = pidns.nsid;
if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 25 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0

On Wed, 2018-08-08 at 20:01 -0400, carlos antonio neira bustos wrote:
---------- Forwarded message ---------
From: Quentin Monnet <quentin.monnet@...>
Date: Wed, Aug 8, 2018, 10:56 AM
Subject: Re: [iovisor-dev] [PATCH] BPF: helpers: New helper
bpf_get_current_pidns_info to obtain namespace data from current
task.
To: Y Song <ys114321@...>, Carlos Neira <cneirabustos@...
Cc: IO Visor - dev <iovisor-dev@...>, Eric Biederman <e
biederm@...>


2018-08-07 16:03 UTC-0700 ~ Yonghong Song <ys114321@...>
On Tue, Aug 7, 2018 at 3:47 PM, Carlos Neira <cneirabustos@...
m> wrote:
Quentin,

Thanks a lot for your feedback, I think I'm getting closer to
finish this one.
I have included the changes suggested, let me know if there is
anything else that needs to be changed or improved.

Thanks for the changes!

Best regards,
Quentin


Quentin Monnet
 

2018-08-07 16:03 UTC-0700 ~ Yonghong Song <ys114321@...>
On Tue, Aug 7, 2018 at 3:47 PM, Carlos Neira <cneirabustos@...> wrote:
Quentin,

Thanks a lot for your feedback, I think I'm getting closer to finish this one.
I have included the changes suggested, let me know if there is anything else that needs to be changed or improved.
Thanks for the changes!

Best regards,
Quentin


Yonghong Song
 

On Tue, Aug 7, 2018 at 3:47 PM, Carlos Neira <cneirabustos@...> wrote:
Quentin,

Thanks a lot for your feedback, I think I'm getting closer to finish this one.
I have included the changes suggested, let me know if there is anything else that needs to be changed or improved.

From fceaa121bbff332e7ad4efd41660d96c006cbb3d Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Tue, 7 Aug 2018 18:43:20 -0400
Subject: [PATCH] BPF: helpers: New helper to obtain namespace data from
current task

This helper obtains the active namespace from current and returns pid, tgid,
device major/minor and namespace as seen from that ns, allowing to instrument
process inside a container.
Major and minor are obtained from /proc/self/pid, as in the future it's
possible that different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid is used. This helper addresses this limitation
bpf_get_current_pid_tgid()

returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }
This may not enough as different name spaces could have the same tgid.
Could you add comparison for pidns.nsid as well? You also need to tell how
user can find nsid inside the container for a particular pid.


Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 25 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
It is awkward to put the definition here between bpf_attach_type
and the below MAX_BPF_ATTACH_TYPE. Please put this definition
at the end of bpf.h.

Remember to sync the include/uapi/linux/bpf.h to tools directory
after the patch.

+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..bd6b358054e6
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbattim from tracex1_user.c, it's used
verbattim -> verbatim

+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0




On Tue, 2018-08-07 at 11:45 +0100, Quentin Monnet wrote:
2018-08-006 22:55 UTC-0400 ~ cnb <cneirabustos@...>

This helper obtains the active namespace from current and returns
pid, tgid,
device major/minor and namespace id as seen from that ns, allowing
to do pid
filtering inside a container.
Major and minor are obtained from /proc/self/pid, as in the future
it's
possible that different pid_ns files may belong to different
devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux
plumbers
conference.

Currently bpf_get_current_pid_tgid() is used to do pid filtering in
bcc's
scripts but this helper returns the pid as seen by the root
namespace, which is
fine when a bcc script is not executed inside a container.
When a bcc script is executed inside a container, pid filtering
will not work
if bpf_get_current_pid_tgid is used. This helper addresses this
limitation
returning the pid as it's seen by the current namespace where the
script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as
it can be
used to do pid filtering inside a container.

For example a bcc script using bpf_get_current_pid_tgid()
(tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns_info pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct
bpf_pidns_info));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 20 +++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64
+++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45
++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 20 +++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c
[…]

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,7 +2123,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32
size_of_pidns)
+ * Description
+ * Obtains from current task values for pid,
namespace, tgid and
+ * device major/minor from /proc/self/ns/pid
+ * Return
+ * 0 on success -EINVAL on error.
Hi Carlos,

I just gave a look at the helper documentation. The text is supposed
to
be turned into a man page for users not familiar with the helper, so
we're trying to keep style consistent. Before you send your patch to
netdev, could you please specify in the Description what the
arguments
for the helper are? And while doing so, emphasize their names
*between
stars* (have a look at the descriptions for the other helpers)?
Additional nits: s/Obtains/Obtain/, and a period at the end of the
sentence if you don't mind… Thanks a lot!

*/
+
No need for a new line here.

#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
@@ -2196,7 +2213,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects
which helper
* function eBPF program intends to call
[…]

diff --git a/tools/include/uapi/linux/bpf.h
b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
(And please do not forget to report the changes to this file as
well.)

Thanks!
Quentin


neirac
 

Quentin,

Thanks a lot for your feedback, I think I'm getting closer to finish this one.
I have included the changes suggested, let me know if there is anything else that needs to be changed or improved.

From fceaa121bbff332e7ad4efd41660d96c006cbb3d Mon Sep 17 00:00:00 2001
From: cneira <cneirabustos@...>
Date: Tue, 7 Aug 2018 18:43:20 -0400
Subject: [PATCH] BPF: helpers: New helper to obtain namespace data from
current task

This helper obtains the active namespace from current and returns pid, tgid,
device major/minor and namespace as seen from that ns, allowing to instrument
process inside a container.
Major and minor are obtained from /proc/self/pid, as in the future it's
possible that different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 25 +++++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..bd6b358054e6
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbattim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..22fe6b0b6a74 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,6 +2123,18 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Copies into *pidns* pid, namespace id and tgid as seen by the
+ * current namespace and also device from /proc/self/ns/pid.
+ * *size_of_pidns* must be the size of *pidns*
+ *
+ * This helper is used when pid filtering is needed inside a
+ * container as bpf_get_current_tgid() helper returns always the
+ * pid id as seen by the root namespace.
+ * Return
+ * 0 on success -EINVAL on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2218,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0

On Tue, 2018-08-07 at 11:45 +0100, Quentin Monnet wrote:
2018-08-006 22:55 UTC-0400 ~ cnb <cneirabustos@...>

This helper obtains the active namespace from current and returns
pid, tgid,
device major/minor and namespace id as seen from that ns, allowing
to do pid
filtering inside a container.
Major and minor are obtained from /proc/self/pid, as in the future
it's
possible that different pid_ns files may belong to different
devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux
plumbers
conference.

Currently bpf_get_current_pid_tgid() is used to do pid filtering in
bcc's
scripts but this helper returns the pid as seen by the root
namespace, which is
fine when a bcc script is not executed inside a container.
When a bcc script is executed inside a container, pid filtering
will not work
if bpf_get_current_pid_tgid is used. This helper addresses this
limitation
returning the pid as it's seen by the current namespace where the
script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as
it can be
used to do pid filtering inside a container.

For example a bcc script using bpf_get_current_pid_tgid()
(tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns_info pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct
bpf_pidns_info));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 20 +++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64
+++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45
++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 20 +++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c
[…]

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,7 +2123,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32
size_of_pidns)
+ * Description
+ * Obtains from current task values for pid,
namespace, tgid and
+ * device major/minor from /proc/self/ns/pid
+ * Return
+ * 0 on success -EINVAL on error.
Hi Carlos,

I just gave a look at the helper documentation. The text is supposed
to
be turned into a man page for users not familiar with the helper, so
we're trying to keep style consistent. Before you send your patch to
netdev, could you please specify in the Description what the
arguments
for the helper are? And while doing so, emphasize their names
*between
stars* (have a look at the descriptions for the other helpers)?
Additional nits: s/Obtains/Obtain/, and a period at the end of the
sentence if you don't mind… Thanks a lot!

*/
+
No need for a new line here.

#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
@@ -2196,7 +2213,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects
which helper
* function eBPF program intends to call
[…]

diff --git a/tools/include/uapi/linux/bpf.h
b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
(And please do not forget to report the changes to this file as
well.)

Thanks!
Quentin


Quentin Monnet
 

2018-08-006 22:55 UTC-0400 ~ cnb <cneirabustos@...>

This helper obtains the active namespace from current and returns pid, tgid,
device major/minor and namespace id as seen from that ns, allowing to do pid
filtering inside a container.
Major and minor are obtained from /proc/self/pid, as in the future it's
possible that different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid() is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace, which is
fine when a bcc script is not executed inside a container.
When a bcc script is executed inside a container, pid filtering will not work
if bpf_get_current_pid_tgid is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns_info pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns_info));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 20 +++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 20 +++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c
[…]

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,7 +2123,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Obtains from current task values for pid, namespace, tgid and
+ * device major/minor from /proc/self/ns/pid
+ * Return
+ * 0 on success -EINVAL on error.
Hi Carlos,

I just gave a look at the helper documentation. The text is supposed to
be turned into a man page for users not familiar with the helper, so
we're trying to keep style consistent. Before you send your patch to
netdev, could you please specify in the Description what the arguments
for the helper are? And while doing so, emphasize their names *between
stars* (have a look at the descriptions for the other helpers)?
Additional nits: s/Obtains/Obtain/, and a period at the end of the
sentence if you don't mind… Thanks a lot!

*/
+
No need for a new line here.

#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
@@ -2196,7 +2213,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
[…]

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
(And please do not forget to report the changes to this file as well.)

Thanks!
Quentin


neirac
 

This helper obtains the active namespace from current and returns pid, tgid,
device major/minor and namespace id as seen from that ns, allowing to do pid
filtering inside a container.
Major and minor are obtained from /proc/self/pid, as in the future it's
possible that different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid() is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace, which is
fine when a bcc script is not executed inside a container.
When a bcc script is executed inside a container, pid filtering will not work
if bpf_get_current_pid_tgid is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != <pid_arg_passed_in>) { return 0; }

Could be modified to use bpf_get_current_pidns_info() as follows:

struct bpf_pidns_info pidns;
bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns_info));
u32 pid = pidns.tgid;
if (pid != <pid_arg_passed_in>) { return 0; }
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 20 +++++++++-
kernel/bpf/core.c | 1 +
kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 2 +
samples/bpf/Makefile | 3 ++
samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 20 +++++++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
10 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 samples/bpf/trace_ns_info_user.c
create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;

extern const struct bpf_func_proto bpf_get_local_storage_proto;

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,7 +2123,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Obtains from current task values for pid, namespace, tgid and
+ * device major/minor from /proc/self/ns/pid
+ * Return
+ * 0 on success -EINVAL on error.
*/
+
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
@@ -2196,7 +2213,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;

const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>

/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+ size)
+{
+ const char *ppath = "/proc/self/ns/pid";
+ struct pid_namespace *pidns = NULL;
+ mm_segment_t oldsegfs;
+ struct kstat stat;
+ pid_t tgid = 0;
+ pid_t pid = 0;
+ int res = 0;
+
+ if (unlikely(size != sizeof(struct bpf_pidns_info)))
+ goto clear;
+
+ pidns = task_active_pid_ns(current);
+
+ if (unlikely(!pidns))
+ goto clear;
+
+ pidns_info->nsid = pidns->ns.inum;
+ pid = task_pid_nr_ns(current, pidns);
+
+ if (unlikely(!pid))
+ goto clear;
+
+ tgid = task_tgid_nr_ns(current, pidns);
+
+ if (unlikely(!tgid))
+ goto clear;
+
+ pidns_info->tgid = (u32) tgid;
+ pidns_info->pid = (u32) pid;
+
+ oldsegfs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_stat((const char __user *)ppath, &stat);
+ set_fs(oldsegfs);
+
+ if (unlikely(res))
+ goto clear;
+
+ pidns_info->dev = stat.dev;
+
+ return 0;
+
+clear:
+ if (pidns_info)
+ memset((void *)pidns, 0, (size_t) size);
+
+ return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+ .func = bpf_get_current_pidns_info,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_get_current_pidns_info:
+ return &bpf_get_current_pidns_info_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info

# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..bd6b358054e6
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbattim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+ printf("loading %s\n", filename);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping localhost", "r");
+ (void) f;
+ read_trace_pipe();
+ return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
+ struct bpf_pidns_info nsinfo;
+ int ok = 0;
+
+ ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+ if (ok == 0)
+ bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+ (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..b53e5c21805a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -173,6 +173,16 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE
};

+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+ __u32 dev;
+ __u32 nsid;
+ __u32 tgid;
+ __u32 pid;
+};
+
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -2113,7 +2123,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ * Description
+ * Obtains from current task values for pid, namespace, tgid and
+ * device major/minor from /proc/self/ns/pid
+ * Return
+ * 0 on success -EINVAL on error.
*/
+
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
@@ -2196,7 +2213,8 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(get_current_pidns_info),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..fbc807620839 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info* buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_get_current_pidns_info;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
2.11.0