From d2b497a973fcb76a6b7a552f081b83a1edd91c86 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Mon, 12 Dec 2022 10:16:00 +0000 Subject: [PATCH 1/7] docs/bpf: Reword docs for BPF_MAP_TYPE_SK_STORAGE Improve the grammar of the function descriptions and highlight that the key is a socket fd. Fixes: f3212ad5b7e9 ("docs/bpf: Add documentation for BPF_MAP_TYPE_SK_STORAGE") Reported-by: Martin KaFai Lau Signed-off-by: Donald Hunter Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20221212101600.56026-1-donald.hunter@gmail.com --- Documentation/bpf/map_sk_storage.rst | 56 +++++++++++++++------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/Documentation/bpf/map_sk_storage.rst b/Documentation/bpf/map_sk_storage.rst index 047e16c8aaa8..4e9d23ab9ecd 100644 --- a/Documentation/bpf/map_sk_storage.rst +++ b/Documentation/bpf/map_sk_storage.rst @@ -34,13 +34,12 @@ bpf_sk_storage_get() void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) -Socket-local storage can be retrieved using the ``bpf_sk_storage_get()`` -helper. The helper gets the storage from ``sk`` that is associated with ``map``. -If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` flag is used then -``bpf_sk_storage_get()`` will create the storage for ``sk`` if it does not -already exist. ``value`` can be used together with -``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise it -will be zero initialized. Returns a pointer to the storage on success, or +Socket-local storage for ``map`` can be retrieved from socket ``sk`` using the +``bpf_sk_storage_get()`` helper. If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` +flag is used then ``bpf_sk_storage_get()`` will create the storage for ``sk`` +if it does not already exist. ``value`` can be used together with +``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise +it will be zero initialized. Returns a pointer to the storage on success, or ``NULL`` in case of failure. .. note:: @@ -54,9 +53,9 @@ bpf_sk_storage_delete() long bpf_sk_storage_delete(struct bpf_map *map, void *sk) -Socket-local storage can be deleted using the ``bpf_sk_storage_delete()`` -helper. The helper deletes the storage from ``sk`` that is identified by -``map``. Returns ``0`` on success, or negative error in case of failure. +Socket-local storage for ``map`` can be deleted from socket ``sk`` using the +``bpf_sk_storage_delete()`` helper. Returns ``0`` on success, or negative +error in case of failure. User space ---------- @@ -68,16 +67,20 @@ bpf_map_update_elem() int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags) -Socket-local storage for the socket identified by ``key`` belonging to -``map_fd`` can be added or updated using the ``bpf_map_update_elem()`` libbpf -function. ``key`` must be a pointer to a valid ``fd`` in the user space -program. The ``flags`` parameter can be used to control the update behaviour: +Socket-local storage for map ``map_fd`` can be added or updated locally to a +socket using the ``bpf_map_update_elem()`` libbpf function. The socket is +identified by a `socket` ``fd`` stored in the pointer ``key``. The pointer +``value`` has the data to be added or updated to the socket ``fd``. The type +and size of ``value`` should be the same as the value type of the map +definition. -- ``BPF_ANY`` will create storage for ``fd`` or update existing storage. -- ``BPF_NOEXIST`` will create storage for ``fd`` only if it did not already - exist, otherwise the call will fail with ``-EEXIST``. -- ``BPF_EXIST`` will update existing storage for ``fd`` if it already exists, - otherwise the call will fail with ``-ENOENT``. +The ``flags`` parameter can be used to control the update behaviour: + +- ``BPF_ANY`` will create storage for `socket` ``fd`` or update existing storage. +- ``BPF_NOEXIST`` will create storage for `socket` ``fd`` only if it did not + already exist, otherwise the call will fail with ``-EEXIST``. +- ``BPF_EXIST`` will update existing storage for `socket` ``fd`` if it already + exists, otherwise the call will fail with ``-ENOENT``. Returns ``0`` on success, or negative error in case of failure. @@ -88,10 +91,10 @@ bpf_map_lookup_elem() int bpf_map_lookup_elem(int map_fd, const void *key, void *value) -Socket-local storage for the socket identified by ``key`` belonging to -``map_fd`` can be retrieved using the ``bpf_map_lookup_elem()`` libbpf -function. ``key`` must be a pointer to a valid ``fd`` in the user space -program. Returns ``0`` on success, or negative error in case of failure. +Socket-local storage for map ``map_fd`` can be retrieved from a socket using +the ``bpf_map_lookup_elem()`` libbpf function. The storage is retrieved from +the socket identified by a `socket` ``fd`` stored in the pointer +``key``. Returns ``0`` on success, or negative error in case of failure. bpf_map_delete_elem() ~~~~~~~~~~~~~~~~~~~~~ @@ -100,9 +103,10 @@ bpf_map_delete_elem() int bpf_map_delete_elem(int map_fd, const void *key) -Socket-local storage for the socket identified by ``key`` belonging to -``map_fd`` can be deleted using the ``bpf_map_delete_elem()`` libbpf -function. Returns ``0`` on success, or negative error in case of failure. +Socket-local storage for map ``map_fd`` can be deleted from a socket using the +``bpf_map_delete_elem()`` libbpf function. The storage is deleted from the +socket identified by a `socket` ``fd`` stored in the pointer ``key``. Returns +``0`` on success, or negative error in case of failure. Examples ======== From ec9230b18b45853287298d70be23f8ec6bd44ff0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 12 Dec 2022 17:22:24 -0800 Subject: [PATCH 2/7] selftests/bpf: Fix a selftest compilation error with CONFIG_SMP=n Kernel test robot reported bpf selftest build failure when CONFIG_SMP is not set. The error message looks below: >> progs/rcu_read_lock.c:256:34: error: no member named 'last_wakee' in 'struct task_struct' last_wakee = task->real_parent->last_wakee; ~~~~~~~~~~~~~~~~~ ^ 1 error generated. When CONFIG_SMP is not set, the field 'last_wakee' is not available in struct 'task_struct'. Hence the above compilation failure. To fix the issue, let us choose another field 'group_leader' which is available regardless of CONFIG_SMP set or not. Fixes: fe147956fca4 ("bpf/selftests: Add selftests for new task kfuncs") Fixes: 48671232fcb8 ("selftests/bpf: Add tests for bpf_rcu_read_lock()") Reported-by: kernel test robot Signed-off-by: David Vernet Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20221213012224.379581-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/rcu_read_lock.c | 8 ++++---- tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 125f908024d3..5cecbdbbb16e 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -288,13 +288,13 @@ out: SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int task_untrusted_non_rcuptr(void *ctx) { - struct task_struct *task, *last_wakee; + struct task_struct *task, *group_leader; task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); - /* the pointer last_wakee marked as untrusted */ - last_wakee = task->real_parent->last_wakee; - (void)bpf_task_storage_get(&map_a, last_wakee, 0, 0); + /* the pointer group_leader marked as untrusted */ + group_leader = task->real_parent->group_leader; + (void)bpf_task_storage_get(&map_a, group_leader, 0, 0); bpf_rcu_read_unlock(); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 87fa1db9d9b5..1b47b94dbca0 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -73,7 +73,7 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl struct task_struct *acquired; /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ - acquired = bpf_task_acquire(task->last_wakee); + acquired = bpf_task_acquire(task->group_leader); bpf_task_release(acquired); return 0; From a8dfde09c90109e3a98af54847e91bde7dc2d5c2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 13 Dec 2022 14:05:00 -0800 Subject: [PATCH 3/7] selftests/bpf: Select CONFIG_FUNCTION_ERROR_INJECTION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However, CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent changes. As a result, we are seeing errors like the following from BPF CI: bpf_testmod_test_read() is not modifiable __x64_sys_setdomainname is not sleepable __x64_sys_getpgid is not sleepable Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the selftest config. Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection") Reported-by: Daniel Müller Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Daniel Müller Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 612f699dc4f7..63cd4ab70171 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FUNCTION_TRACER=y CONFIG_GENEVE=y CONFIG_IKCONFIG=y From e89f3edffb860a0f54a9ed16deadb7a4a1fa3862 Mon Sep 17 00:00:00 2001 From: Milan Landaverde Date: Tue, 13 Dec 2022 12:57:14 -0500 Subject: [PATCH 4/7] bpf: prevent leak of lsm program after failed attach In [0], we added the ability to bpf_prog_attach LSM programs to cgroups, but in our validation to make sure the prog is meant to be attached to BPF_LSM_CGROUP, we return too early if the check fails. This results in lack of decrementing prog's refcnt (through bpf_prog_put) leaving the LSM program alive past the point of the expected lifecycle. This fix allows for the decrement to take place. [0] https://lore.kernel.org/all/20220628174314.1216643-4-sdf@google.com/ Fixes: 69fd337a975c ("bpf: per-cgroup lsm flavor") Signed-off-by: Milan Landaverde Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20221213175714.31963-1-milan@mdaverde.com --- kernel/bpf/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 35972afb6850..64131f88c553 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3518,9 +3518,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_LSM: if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) - return -EINVAL; - - ret = cgroup_bpf_prog_attach(attr, ptype, prog); + ret = -EINVAL; + else + ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; default: ret = -EINVAL; From 4121d4481b72501aa4d22680be4ea1096d69d133 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Dec 2022 13:35:42 +0100 Subject: [PATCH 5/7] bpf: Synchronize dispatcher update with bpf_dispatcher_xdp_func Hao Sun reported crash in dispatcher image [1]. Currently we don't have any sync between bpf_dispatcher_update and bpf_dispatcher_xdp_func, so following race is possible: cpu 0: cpu 1: bpf_prog_run_xdp ... bpf_dispatcher_xdp_func in image at offset 0x0 bpf_dispatcher_update update image at offset 0x800 bpf_dispatcher_update update image at offset 0x0 in image at offset 0x0 -> crash Fixing this by synchronizing dispatcher image update (which is done in bpf_dispatcher_update function) with bpf_dispatcher_xdp_func that reads and execute the dispatcher image. Calling synchronize_rcu after updating and installing new image ensures that readers leave old image before it's changed in the next dispatcher update. The update itself is locked with dispatcher's mutex. The bpf_prog_run_xdp is called under local_bh_disable and synchronize_rcu will wait for it to leave [2]. [1] https://lore.kernel.org/bpf/Y5SFho7ZYXr9ifRn@krava/T/#m00c29ece654bc9f332a17df493bbca33e702896c [2] https://lore.kernel.org/bpf/0B62D35A-E695-4B7A-A0D4-774767544C1A@gmail.com/T/#mff43e2c003ae99f4a38f353c7969be4c7162e877 Reported-by: Hao Sun Signed-off-by: Jiri Olsa Acked-by: Yonghong Song Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20221214123542.1389719-1-jolsa@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/dispatcher.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index c19719f48ce0..fa3e9225aedc 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -125,6 +125,11 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func); + /* Make sure all the callers executing the previous/old half of the + * image leave it, so following update call can modify it safely. + */ + synchronize_rcu(); + if (new) d->image_off = noff; } From 1c123c567fb138ebd187480b7fc0610fcb0851f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 15 Dec 2022 00:02:53 +0100 Subject: [PATCH 6/7] bpf: Resolve fext program type when checking map compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_prog_map_compatible() check makes sure that BPF program types are not mixed inside BPF map types that can contain programs (tail call maps, cpumaps and devmaps). It does this by setting the fields of the map->owner struct to the values of the first program being checked against, and rejecting any subsequent programs if the values don't match. One of the values being set in the map owner struct is the program type, and since the code did not resolve the prog type for fext programs, the map owner type would be set to PROG_TYPE_EXT and subsequent loading of programs of the target type into the map would fail. This bug is seen in particular for XDP programs that are loaded as PROG_TYPE_EXT using libxdp; these cannot insert programs into devmaps and cpumaps because the check fails as described above. Fix the bug by resolving the fext program type to its target program type as elsewhere in the verifier. v3: - Add Yonghong's ACK Fixes: f45d5b6ce2e8 ("bpf: generalise tail call map compatibility check") Acked-by: Yonghong Song Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20221214230254.790066-1-toke@redhat.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7f98dec6e90f..b334f4ddc4d5 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2092,6 +2092,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { + enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; if (fp->kprobe_override) @@ -2102,12 +2103,12 @@ bool bpf_prog_map_compatible(struct bpf_map *map, /* There's no owner yet where we could check for * compatibility. */ - map->owner.type = fp->type; + map->owner.type = prog_type; map->owner.jited = fp->jited; map->owner.xdp_has_frags = fp->aux->xdp_has_frags; ret = true; } else { - ret = map->owner.type == fp->type && + ret = map->owner.type == prog_type && map->owner.jited == fp->jited && map->owner.xdp_has_frags == fp->aux->xdp_has_frags; } From f506439ec3dee11e0e77b0a1f3fb3eec22c97873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 15 Dec 2022 00:02:54 +0100 Subject: [PATCH 7/7] selftests/bpf: Add a test for using a cpumap from an freplace-to-XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a simple test for inserting an XDP program into a cpumap that is "owned" by an XDP program that was loaded as PROG_TYPE_EXT (as libxdp does). Prior to the kernel fix this would fail because the map type ownership would be set to PROG_TYPE_EXT instead of being resolved to PROG_TYPE_XDP. v5: - Fix a few nits from Andrii, add his ACK v4: - Use skeletons for selftest v3: - Update comment to better explain the cause - Add Yonghong's ACK Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20221214230254.790066-2-toke@redhat.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 48 +++++++++++++++++++ .../selftests/bpf/progs/freplace_progmap.c | 24 ++++++++++ 2 files changed, 72 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/freplace_progmap.c diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index d1e32e792536..20f5fa0fcec9 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -4,6 +4,8 @@ #include #include #include "bind4_prog.skel.h" +#include "freplace_progmap.skel.h" +#include "xdp_dummy.skel.h" typedef int (*test_cb)(struct bpf_object *obj); @@ -500,6 +502,50 @@ cleanup: bind4_prog__destroy(skel); } +static void test_func_replace_progmap(void) +{ + struct bpf_cpumap_val value = { .qsize = 1 }; + struct freplace_progmap *skel = NULL; + struct xdp_dummy *tgt_skel = NULL; + __u32 key = 0; + int err; + + skel = freplace_progmap__open(); + if (!ASSERT_OK_PTR(skel, "prog_open")) + return; + + tgt_skel = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(tgt_skel, "tgt_prog_load")) + goto out; + + err = bpf_program__set_attach_target(skel->progs.xdp_cpumap_prog, + bpf_program__fd(tgt_skel->progs.xdp_dummy_prog), + "xdp_dummy_prog"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = freplace_progmap__load(skel); + if (!ASSERT_OK(err, "obj_load")) + goto out; + + /* Prior to fixing the kernel, loading the PROG_TYPE_EXT 'redirect' + * program above will cause the map owner type of 'cpumap' to be set to + * PROG_TYPE_EXT. This in turn will cause the bpf_map_update_elem() + * below to fail, because the program we are inserting into the map is + * of PROG_TYPE_XDP. After fixing the kernel, the initial ownership will + * be correctly resolved to the *target* of the PROG_TYPE_EXT program + * (i.e., PROG_TYPE_XDP) and the map update will succeed. + */ + value.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_drop_prog); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.cpu_map), + &key, &value, 0); + ASSERT_OK(err, "map_update"); + +out: + xdp_dummy__destroy(tgt_skel); + freplace_progmap__destroy(skel); +} + /* NOTE: affect other tests, must run in serial mode */ void serial_test_fexit_bpf2bpf(void) { @@ -525,4 +571,6 @@ void serial_test_fexit_bpf2bpf(void) test_func_replace_global_func(); if (test__start_subtest("fentry_to_cgroup_bpf")) test_fentry_to_cgroup_bpf(); + if (test__start_subtest("func_replace_progmap")) + test_func_replace_progmap(); } diff --git a/tools/testing/selftests/bpf/progs/freplace_progmap.c b/tools/testing/selftests/bpf/progs/freplace_progmap.c new file mode 100644 index 000000000000..81b56b9aa7d6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_progmap.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __type(key, __u32); + __type(value, struct bpf_cpumap_val); + __uint(max_entries, 1); +} cpu_map SEC(".maps"); + +SEC("xdp/cpumap") +int xdp_drop_prog(struct xdp_md *ctx) +{ + return XDP_DROP; +} + +SEC("freplace") +int xdp_cpumap_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&cpu_map, 0, XDP_PASS); +} + +char _license[] SEC("license") = "GPL";