From 1b1a01db17af7a44cae8d8d7d7a18dbb056be40f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:13 +0800 Subject: [PATCH 01/16] bpf: Remove migrate_{disable|enable} from LPM trie Both bpf program and bpf syscall may invoke ->update or ->delete operation for LPM trie. For bpf program, its running context has already disabled migration explicitly through (migrate_disable()) or implicitly through (preempt_disable() or disable irq). For bpf syscall, the migration is disabled through the use of bpf_disable_instrumentation() before invoking the corresponding map operation callback. Therefore, it is safe to remove the migrate_{disable|enable){} pair from LPM trie. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index f8bc1e096182..e8a772e64324 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -289,16 +289,11 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) } static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie, - const void *value, - bool disable_migration) + const void *value) { struct lpm_trie_node *node; - if (disable_migration) - migrate_disable(); node = bpf_mem_cache_alloc(&trie->ma); - if (disable_migration) - migrate_enable(); if (!node) return NULL; @@ -342,10 +337,8 @@ static long trie_update_elem(struct bpf_map *map, if (key->prefixlen > trie->max_prefixlen) return -EINVAL; - /* Allocate and fill a new node. Need to disable migration before - * invoking bpf_mem_cache_alloc(). - */ - new_node = lpm_trie_node_alloc(trie, value, true); + /* Allocate and fill a new node */ + new_node = lpm_trie_node_alloc(trie, value); if (!new_node) return -ENOMEM; @@ -425,8 +418,7 @@ static long trie_update_elem(struct bpf_map *map, goto out; } - /* migration is disabled within the locked scope */ - im_node = lpm_trie_node_alloc(trie, NULL, false); + im_node = lpm_trie_node_alloc(trie, NULL); if (!im_node) { trie->n_entries--; ret = -ENOMEM; @@ -452,11 +444,9 @@ static long trie_update_elem(struct bpf_map *map, out: raw_spin_unlock_irqrestore(&trie->lock, irq_flags); - migrate_disable(); if (ret) bpf_mem_cache_free(&trie->ma, new_node); bpf_mem_cache_free_rcu(&trie->ma, free_node); - migrate_enable(); return ret; } @@ -555,10 +545,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) out: raw_spin_unlock_irqrestore(&trie->lock, irq_flags); - migrate_disable(); bpf_mem_cache_free_rcu(&trie->ma, free_parent); bpf_mem_cache_free_rcu(&trie->ma, free_node); - migrate_enable(); return ret; } From ea5b229630a631ee6a72e1f58bc40029efc1daf8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:14 +0800 Subject: [PATCH 02/16] bpf: Remove migrate_{disable|enable} in ->map_for_each_callback BPF program may call bpf_for_each_map_elem(), and it will call the ->map_for_each_callback callback of related bpf map. Considering the running context of bpf program has already disabled migration, remove the unnecessary migrate_{disable|enable} pair in the implementations of ->map_for_each_callback. To ensure the guarantee will not be voilated later, also add cant_migrate() check in the implementations. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 6 ++---- kernel/bpf/hashtab.c | 11 +++++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 6cdbb4c33d31..eb28c0f219ee 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -735,13 +735,13 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback u64 ret = 0; void *val; + cant_migrate(); + if (flags != 0) return -EINVAL; is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; array = container_of(map, struct bpf_array, map); - if (is_percpu) - migrate_disable(); for (i = 0; i < map->max_entries; i++) { if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); @@ -756,8 +756,6 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback break; } - if (is_percpu) - migrate_enable(); return num_elems; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3ec941a0ea41..42af7fac61b9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2208,17 +2208,18 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ bool is_percpu; u64 ret = 0; + cant_migrate(); + if (flags != 0) return -EINVAL; is_percpu = htab_is_percpu(htab); roundup_key_size = round_up(map->key_size, 8); - /* disable migration so percpu value prepared here will be the - * same as the one seen by the bpf program with bpf_map_lookup_elem(). + /* migration has been disabled, so percpu value prepared here will be + * the same as the one seen by the bpf program with + * bpf_map_lookup_elem(). */ - if (is_percpu) - migrate_disable(); for (i = 0; i < htab->n_buckets; i++) { b = &htab->buckets[i]; rcu_read_lock(); @@ -2244,8 +2245,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ rcu_read_unlock(); } out: - if (is_percpu) - migrate_enable(); return num_elems; } From 53f2ba0b1cc087a597b43e63d35f355e9348bd61 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:15 +0800 Subject: [PATCH 03/16] bpf: Remove migrate_{disable|enable} in htab_elem_free htab_elem_free() has two call-sites: delete_all_elements() has already disabled migration, free_htab_elem() is invoked by other 4 functions: __htab_map_lookup_and_delete_elem, __htab_map_lookup_and_delete_batch, htab_map_update_elem and htab_map_delete_elem. BPF syscall has already disabled migration before invoking ->map_update_elem, ->map_delete_elem, and ->map_lookup_and_delete_elem callbacks for hash map. __htab_map_lookup_and_delete_batch() also disables migration before invoking free_htab_elem(). ->map_update_elem() and ->map_delete_elem() of hash map may be invoked by BPF program and the running context of BPF program has already disabled migration. Therefore, it is safe to remove the migration_{disable|enable} pair in htab_elem_free() Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 42af7fac61b9..bccae537f9d2 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -897,11 +897,9 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { check_and_free_fields(htab, l); - migrate_disable(); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); bpf_mem_cache_free(&htab->ma, l); - migrate_enable(); } static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) From 25dc65f75b08281a42823673d1751c82618ce7a3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:16 +0800 Subject: [PATCH 04/16] bpf: Remove migrate_{disable|enable} from bpf_cgrp_storage_lock helpers Three callers of bpf_cgrp_storage_lock() are ->map_lookup_elem, ->map_update_elem, ->map_delete_elem from bpf syscall. BPF syscall for these three operations of cgrp storage has already disabled migration. Two call sites of bpf_cgrp_storage_trylock() are bpf_cgrp_storage_get(), and bpf_cgrp_storage_delete() helpers. The running contexts of these helpers have already disabled migration. Therefore, it is safe to remove migrate_disable() for these callers. However, bpf_cgrp_storage_free() also invokes bpf_cgrp_storage_lock() and its running context doesn't disable migration. Therefore, also add the missed migrate_{disabled|enable} in bpf_cgrp_storage_free(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_cgrp_storage.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 20f05de92e9c..d5dc65bb1755 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -15,22 +15,20 @@ static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy); static void bpf_cgrp_storage_lock(void) { - migrate_disable(); + cant_migrate(); this_cpu_inc(bpf_cgrp_storage_busy); } static void bpf_cgrp_storage_unlock(void) { this_cpu_dec(bpf_cgrp_storage_busy); - migrate_enable(); } static bool bpf_cgrp_storage_trylock(void) { - migrate_disable(); + cant_migrate(); if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) { this_cpu_dec(bpf_cgrp_storage_busy); - migrate_enable(); return false; } return true; @@ -47,17 +45,18 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; + migrate_disable(); rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); - if (!local_storage) { - rcu_read_unlock(); - return; - } + if (!local_storage) + goto out; bpf_cgrp_storage_lock(); bpf_local_storage_destroy(local_storage); bpf_cgrp_storage_unlock(); +out: rcu_read_unlock(); + migrate_enable(); } static struct bpf_local_storage_data * From 9e6c958b546692fcdd0da2c2c3b2ac1da6e0233f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:17 +0800 Subject: [PATCH 05/16] bpf: Remove migrate_{disable|enable} from bpf_task_storage_lock helpers Three callers of bpf_task_storage_lock() are ->map_lookup_elem, ->map_update_elem, ->map_delete_elem from bpf syscall. BPF syscall for these three operations of task storage has already disabled migration. Another two callers are bpf_task_storage_get() and bpf_task_storage_delete() helpers which will be used by BPF program. Two callers of bpf_task_storage_trylock() are bpf_task_storage_get() and bpf_task_storage_delete() helpers. The running contexts of these helpers have already disabled migration. Therefore, it is safe to remove migrate_{disable|enable} from task storage lock helpers for these call sites. However, bpf_task_storage_free() also invokes bpf_task_storage_lock() and its running context doesn't disable migration, therefore, add the missed migrate_{disable|enable} in bpf_task_storage_free(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-6-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index bf7fa15fdcc6..1109475953c0 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -24,22 +24,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy); static void bpf_task_storage_lock(void) { - migrate_disable(); + cant_migrate(); this_cpu_inc(bpf_task_storage_busy); } static void bpf_task_storage_unlock(void) { this_cpu_dec(bpf_task_storage_busy); - migrate_enable(); } static bool bpf_task_storage_trylock(void) { - migrate_disable(); + cant_migrate(); if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { this_cpu_dec(bpf_task_storage_busy); - migrate_enable(); return false; } return true; @@ -72,18 +70,19 @@ void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; + migrate_disable(); rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); - if (!local_storage) { - rcu_read_unlock(); - return; - } + if (!local_storage) + goto out; bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); +out: rcu_read_unlock(); + migrate_enable(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) From e319cdc8956645b6e29a3809924647953500b7e1 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:18 +0800 Subject: [PATCH 06/16] bpf: Disable migration when destroying inode storage When destroying inode storage, it invokes bpf_local_storage_destroy() to remove all storage elements saved in the inode storage. The destroy procedure will call bpf_selem_free() to free the element, and bpf_selem_free() calls bpf_obj_free_fields() to free the special fields in map value (e.g., kptr). Since kptrs may be allocated from bpf memory allocator, migrate_{disable|enable} pairs are necessary for the freeing of these kptrs. To simplify reasoning about when migrate_disable() is needed for the freeing of these dynamically-allocated kptrs, let the caller to guarantee migration is disabled before invoking bpf_obj_free_fields(). Therefore, the patch adds migrate_{disable|enable} pair in bpf_inode_storage_free(). The migrate_{disable|enable} pairs in the underlying implementation of bpf_obj_free_fields() will be removed by the following patch. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-7-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_inode_storage.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index a51c82dee1bd..15a3eb9b02d9 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -62,16 +62,17 @@ void bpf_inode_storage_free(struct inode *inode) if (!bsb) return; + migrate_disable(); rcu_read_lock(); local_storage = rcu_dereference(bsb->storage); - if (!local_storage) { - rcu_read_unlock(); - return; - } + if (!local_storage) + goto out; bpf_local_storage_destroy(local_storage); +out: rcu_read_unlock(); + migrate_enable(); } static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) From 7d1032d1e3031ef38d1a360380d755f75cb639b8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:19 +0800 Subject: [PATCH 07/16] bpf: Disable migration when destroying sock storage When destroying sock storage, it invokes bpf_local_storage_destroy() to remove all storage elements saved in the sock storage. The destroy procedure will call bpf_selem_free() to free the element, and bpf_selem_free() calls bpf_obj_free_fields() to free the special fields in map value (e.g., kptr). Since kptrs may be allocated from bpf memory allocator, migrate_{disable|enable} pairs are necessary for the freeing of these kptrs. To simplify reasoning about when migrate_disable() is needed for the freeing of these dynamically-allocated kptrs, let the caller to guarantee migration is disabled before invoking bpf_obj_free_fields(). Therefore, the patch adds migrate_{disable|enable} pair in bpf_sock_storage_free(). The migrate_{disable|enable} pairs in the underlying implementation of bpf_obj_free_fields() will be removed by The following patch. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-8-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- net/core/bpf_sk_storage.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 2f4ed83a75ae..aa536ecd5d39 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -50,15 +50,16 @@ void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; + migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); - if (!sk_storage) { - rcu_read_unlock(); - return; - } + if (!sk_storage) + goto out; bpf_local_storage_destroy(sk_storage); +out: rcu_read_unlock(); + migrate_enable(); } static void bpf_sk_storage_map_free(struct bpf_map *map) From dfccfc47bde53f5df5bf42486d12a8a2ecdcea60 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:20 +0800 Subject: [PATCH 08/16] bpf: Disable migration when cloning sock storage bpf_sk_storage_clone() will call bpf_selem_free() to free the clone element when the allocation of new sock storage fails. bpf_selem_free() will call check_and_free_fields() to free the special fields in the element. Since the allocated element is not visible to bpf syscall or bpf program when bpf_local_storage_alloc() fails, these special fields in the element must be all zero when invoking bpf_selem_free(). To be uniform with other callers of bpf_selem_free(), disabling migration when cloning sock storage. Adding migrate_{disable|enable} pair also benefits the potential switching from kzalloc to bpf memory allocator for sock storage. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-9-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- net/core/bpf_sk_storage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index aa536ecd5d39..7d41cde1bcca 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -161,6 +161,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -213,6 +214,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) out: rcu_read_unlock(); + migrate_enable(); /* In case of an error, don't free anything explicitly here, the * caller is responsible to call bpf_sk_storage_free. From 090d7f2e640b265335ac0a5a8e09a99f7f28f567 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:21 +0800 Subject: [PATCH 09/16] bpf: Disable migration in bpf_selem_free_rcu bpf_selem_free_rcu() calls bpf_obj_free_fields() to free the special fields in map value (e.g., kptr). Since kptrs may be allocated from bpf memory allocator, migrate_{disable|enable} pairs are necessary for the freeing of these kptrs. To simplify reasoning about when migrate_disable() is needed for the freeing of these dynamically-allocated kptrs, let the caller to guarantee migration is disabled before invoking bpf_obj_free_fields(). Therefore, the patch adds migrate_{disable|enable} pair in bpf_selem_free_rcu(). The migrate_{disable|enable} pairs in the underlying implementation of bpf_obj_free_fields() will be removed by the following patch. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-10-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index e94820f6b0cd..615a3034baeb 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -217,7 +217,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) selem = container_of(rcu, struct bpf_local_storage_elem, rcu); /* The bpf_local_storage_map_free will wait for rcu_barrier */ smap = rcu_dereference_check(SDATA(selem)->smap, 1); + + migrate_disable(); bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + migrate_enable(); bpf_mem_cache_raw_free(selem); } From 4b7e7cd1c105cc5881f4054805dfbb92aa24eb78 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:22 +0800 Subject: [PATCH 10/16] bpf: Disable migration before calling ops->map_free() The freeing of all map elements may invoke bpf_obj_free_fields() to free the special fields in the map value. Since these special fields may be allocated from bpf memory allocator, migrate_{disable|enable} pairs are necessary for the freeing of these special fields. To simplify reasoning about when migrate_disable() is needed for the freeing of these special fields, let the caller to guarantee migration is disabled before invoking bpf_obj_free_fields(). Therefore, disabling migration before calling ops->map_free() to simplify the freeing of map values or special fields allocated from bpf memory allocator. After disabling migration in bpf_map_free(), there is no need for additional migration_{disable|enable} pairs in these ->map_free() callbacks. Remove these redundant invocations. The migrate_{disable|enable} pairs in the underlying implementation of bpf_obj_free_fields() will be removed by the following patch. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-11-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 8 ++------ kernel/bpf/hashtab.c | 6 ++---- kernel/bpf/range_tree.c | 2 -- kernel/bpf/syscall.c | 8 +++++++- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 615a3034baeb..12cf6382175e 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -905,15 +905,11 @@ void bpf_local_storage_map_free(struct bpf_map *map, while ((selem = hlist_entry_safe( rcu_dereference_raw(hlist_first_rcu(&b->list)), struct bpf_local_storage_elem, map_node))) { - if (busy_counter) { - migrate_disable(); + if (busy_counter) this_cpu_inc(*busy_counter); - } bpf_selem_unlink(selem, true); - if (busy_counter) { + if (busy_counter) this_cpu_dec(*busy_counter); - migrate_enable(); - } cond_resched_rcu(); } rcu_read_unlock(); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index bccae537f9d2..40095dda891d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1500,10 +1500,9 @@ static void delete_all_elements(struct bpf_htab *htab) { int i; - /* It's called from a worker thread, so disable migration here, - * since bpf_mem_cache_free() relies on that. + /* It's called from a worker thread and migration has been disabled, + * therefore, it is OK to invoke bpf_mem_cache_free() directly. */ - migrate_disable(); for (i = 0; i < htab->n_buckets; i++) { struct hlist_nulls_head *head = select_bucket(htab, i); struct hlist_nulls_node *n; @@ -1515,7 +1514,6 @@ static void delete_all_elements(struct bpf_htab *htab) } cond_resched(); } - migrate_enable(); } static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab) diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c index 5bdf9aadca3a..37b80a23ae1a 100644 --- a/kernel/bpf/range_tree.c +++ b/kernel/bpf/range_tree.c @@ -259,9 +259,7 @@ void range_tree_destroy(struct range_tree *rt) while ((rn = range_it_iter_first(rt, 0, -1U))) { range_it_remove(rn, rt); - migrate_disable(); bpf_mem_free(&bpf_global_ma, rn); - migrate_enable(); } } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e88797fdbeb..5d2420492946 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -835,8 +835,14 @@ static void bpf_map_free(struct bpf_map *map) struct btf_record *rec = map->record; struct btf *btf = map->btf; - /* implementation dependent freeing */ + /* implementation dependent freeing. Disabling migration to simplify + * the free of values or special fields allocated from bpf memory + * allocator. + */ + migrate_disable(); map->ops->map_free(map); + migrate_enable(); + /* Delay freeing of btf_record for maps, as map_free * callback usually needs access to them. It is better to do it here * than require each callback to do the free itself manually. From 1d2dbe7120e89090ed4f6be03d6fbadfbfff59bf Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:23 +0800 Subject: [PATCH 11/16] bpf: Remove migrate_{disable|enable} in bpf_obj_free_fields() The callers of bpf_obj_free_fields() have already guaranteed that the migration is disabled, therefore, there is no need to invoke migrate_{disable,enable} pair in bpf_obj_free_fields()'s underly implementation. This patch removes unnecessary migrate_{disable|enable} pairs from bpf_obj_free_fields() and its callees: bpf_list_head_free() and bpf_rb_root_free(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-12-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 4 ---- kernel/bpf/syscall.c | 2 -- 2 files changed, 6 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index cd5f9884d85b..bcda671feafd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2066,9 +2066,7 @@ unlock: /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); - migrate_enable(); } } @@ -2105,9 +2103,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, obj -= field->graph_root.node_offset; - migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); - migrate_enable(); } } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5d2420492946..0daf098e3207 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -796,11 +796,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) if (!btf_is_kernel(field->kptr.btf)) { pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, field->kptr.btf_id); - migrate_disable(); __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? pointee_struct_meta->record : NULL, fields[i].type == BPF_KPTR_PERCPU); - migrate_enable(); } else { field->kptr.dtor(xchgd_field); } From 6a52b965ab6f56293dce316f382db3a9a66f0c9f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:24 +0800 Subject: [PATCH 12/16] bpf: Remove migrate_{disable,enable} in bpf_cpumask_release() When BPF program invokes bpf_cpumask_release(), the migration must have been disabled. When bpf_cpumask_release_dtor() invokes bpf_cpumask_release(), the caller bpf_obj_free_fields() also has disabled migration, therefore, it is OK to remove the unnecessary migrate_{disable|enable} pair in bpf_cpumask_release(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-13-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumask.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 33c473d676a5..cfa1c18e3a48 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -91,9 +91,7 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) if (!refcount_dec_and_test(&cpumask->usage)) return; - migrate_disable(); bpf_mem_cache_free_rcu(&bpf_cpumask_ma, cpumask); - migrate_enable(); } __bpf_kfunc void bpf_cpumask_release_dtor(void *cpumask) From 2269b32ab00e1be663b838a1eed14408cb9ba677 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:25 +0800 Subject: [PATCH 13/16] bpf: Remove migrate_{disable|enable} from bpf_selem_alloc() bpf_selem_alloc() has two callers: (1) bpf_sk_storage_clone_elem() bpf_sk_storage_clone() has already disabled migration before invoking bpf_sk_storage_clone_elem(). (2) bpf_local_storage_update() Its callers include: cgrp/task/inode/sock storage ->map_update_elem() callbacks and bpf_{cgrp|task|inode|sk}_storage_get() helpers. These running contexts have already disabled migration Therefore, there is no need to add extra migrate_{disable|enable} pair in bpf_selem_alloc(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-14-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 12cf6382175e..4b016f29f57e 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -81,9 +81,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; if (smap->bpf_ma) { - migrate_disable(); selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); - migrate_enable(); if (selem) /* Keep the original bpf_map_kzalloc behavior * before started using the bpf_mem_cache_alloc. From 4855a75ebf485f74b06ba85b16b71c4b71a4086d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:26 +0800 Subject: [PATCH 14/16] bpf: Remove migrate_{disable|enable} from bpf_local_storage_alloc() These two callers of bpf_local_storage_alloc() are the same as bpf_selem_alloc(): bpf_sk_storage_clone() and bpf_local_storage_update(). The running contexts of these two callers have already disabled migration, therefore, there is no need to add extra migrate_{disable|enable} pair in bpf_local_storage_alloc(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-15-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 4b016f29f57e..d78d53b92118 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -498,15 +498,11 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - if (smap->bpf_ma) { - migrate_disable(); + if (smap->bpf_ma) storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); - migrate_enable(); - } else { + else storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), gfp_flags | __GFP_NOWARN); - } - if (!storage) { err = -ENOMEM; goto uncharge; From 7b984359e097f30e392e4eee29be7445fc391f29 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:27 +0800 Subject: [PATCH 15/16] bpf: Remove migrate_{disable|enable} from bpf_local_storage_free() bpf_local_storage_free() has three callers: 1) bpf_local_storage_alloc() Its caller must have disabled migration. 2) bpf_local_storage_destroy() Its four callers (bpf_{cgrp|inode|task|sk}_storage_free()) have already invoked migrate_disable() before invoking bpf_local_storage_destroy(). 3) bpf_selem_unlink() Its callers include: cgrp/inode/task/sk storage ->map_delete_elem callbacks, bpf_{cgrp|inode|task|sk}_storage_delete() helpers and bpf_local_storage_map_free(). All of these callers have already disabled migration before invoking bpf_selem_unlink(). Therefore, it is OK to remove migrate_{disable|enable} pair from bpf_local_storage_free(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-16-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index d78d53b92118..7c18298b139c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -172,17 +172,14 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, return; } - if (smap) { - migrate_disable(); + if (smap) bpf_mem_cache_free(&smap->storage_ma, local_storage); - migrate_enable(); - } else { + else /* smap could be NULL if the selem that triggered * this 'local_storage' creation had been long gone. * In this case, directly do call_rcu(). */ call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); - } } /* rcu tasks trace callback for bpf_ma == false */ From d86088e2c35de1eeaae757e49f697f8f42b288fa Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 8 Jan 2025 09:07:28 +0800 Subject: [PATCH 16/16] bpf: Remove migrate_{disable|enable} from bpf_selem_free() bpf_selem_free() has the following three callers: (1) bpf_local_storage_update It will be invoked through ->map_update_elem syscall or helpers for storage map. Migration has already been disabled in these running contexts. (2) bpf_sk_storage_clone It has already disabled migration before invoking bpf_selem_free(). (3) bpf_selem_free_list bpf_selem_free_list() has three callers: bpf_selem_unlink_storage(), bpf_local_storage_update() and bpf_local_storage_destroy(). The callers of bpf_selem_unlink_storage() includes: storage map ->map_delete_elem syscall, storage map delete helpers and bpf_local_storage_map_free(). These contexts have already disabled migration when invoking bpf_selem_unlink() which invokes bpf_selem_unlink_storage() and bpf_selem_free_list() correspondingly. bpf_local_storage_update() has been analyzed as the first caller above. bpf_local_storage_destroy() is invoked when freeing the local storage for the kernel object. Now cgroup, task, inode and sock storage have already disabled migration before invoking bpf_local_storage_destroy(). After the analyses above, it is safe to remove migrate_{disable|enable} from bpf_selem_free(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20250108010728.207536-17-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 7c18298b139c..fa56c30833ff 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -254,9 +254,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, * bpf_mem_cache_free will be able to reuse selem * immediately. */ - migrate_disable(); bpf_mem_cache_free(&smap->selem_ma, selem); - migrate_enable(); return; }