diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 88845aadc47d..7888ed497432 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -210,6 +210,7 @@ struct btf_field_graph_root { struct btf_field { u32 offset; + u32 size; enum btf_field_type type; union { struct btf_field_kptr kptr; @@ -225,12 +226,6 @@ struct btf_record { struct btf_field fields[]; }; -struct btf_field_offs { - u32 cnt; - u32 field_off[BTF_FIELDS_MAX]; - u8 field_sz[BTF_FIELDS_MAX]; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -257,7 +252,6 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct btf_field_offs *field_offs; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -360,14 +354,14 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } -static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) +static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; - if (!foffs) + if (IS_ERR_OR_NULL(rec)) return; - for (i = 0; i < foffs->cnt; i++) - memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); + for (i = 0; i < rec->cnt; i++) + memset(obj + rec->fields[i].offset, 0, rec->fields[i].size); } /* 'dst' must be a temporary buffer and should not point to memory that is being @@ -379,7 +373,7 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - bpf_obj_init(map->field_offs, dst); + bpf_obj_init(map->record, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -399,14 +393,14 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, +static inline void bpf_obj_memcpy(struct btf_record *rec, void *dst, void *src, u32 size, bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { if (long_memcpy) bpf_long_memcpy(dst, src, round_up(size, 8)); else @@ -414,49 +408,49 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); + bpf_obj_memcpy(map->record, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); + bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } -static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) +static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { memset(dst, 0, size); return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memset(dst + curr_off, 0, size - curr_off); } static inline void zero_map_value(struct bpf_map *map, void *dst) { - bpf_obj_memzero(map->field_offs, dst, map->value_size); + bpf_obj_memzero(map->record, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, diff --git a/include/linux/btf.h b/include/linux/btf.h index 495250162422..813227bff58a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -113,7 +113,6 @@ struct btf_id_dtor_kfunc { struct btf_struct_meta { u32 btf_id; struct btf_record *record; - struct btf_field_offs *field_offs; }; struct btf_struct_metas { @@ -207,7 +206,6 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2c2d1fb9f410..f3c998feeccb 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1666,10 +1666,8 @@ static void btf_struct_metas_free(struct btf_struct_metas *tab) if (!tab) return; - for (i = 0; i < tab->cnt; i++) { + for (i = 0; i < tab->cnt; i++) btf_record_free(tab->types[i].record); - kfree(tab->types[i].field_offs); - } kfree(tab); } @@ -3700,12 +3698,24 @@ static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field, __alignof__(struct bpf_rb_node)); } +static int btf_field_cmp(const void *_a, const void *_b, const void *priv) +{ + const struct btf_field *a = (const struct btf_field *)_a; + const struct btf_field *b = (const struct btf_field *)_b; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + return 0; +} + struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size) { struct btf_field_info info_arr[BTF_FIELDS_MAX]; + u32 next_off = 0, field_type_size; struct btf_record *rec; - u32 next_off = 0; int ret, i, cnt; ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); @@ -3725,7 +3735,8 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; for (i = 0; i < cnt; i++) { - if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) { + field_type_size = btf_field_type_size(info_arr[i].type); + if (info_arr[i].off + field_type_size > value_size) { WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); ret = -EFAULT; goto end; @@ -3734,11 +3745,12 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ret = -EEXIST; goto end; } - next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type); + next_off = info_arr[i].off + field_type_size; rec->field_mask |= info_arr[i].type; rec->fields[i].offset = info_arr[i].off; rec->fields[i].type = info_arr[i].type; + rec->fields[i].size = field_type_size; switch (info_arr[i].type) { case BPF_SPIN_LOCK: @@ -3808,6 +3820,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type goto end; } + sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp, + NULL, rec); + return rec; end: btf_record_free(rec); @@ -3889,61 +3904,6 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) return 0; } -static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) -{ - const u32 a = *(const u32 *)_a; - const u32 b = *(const u32 *)_b; - - if (a < b) - return -1; - else if (a > b) - return 1; - return 0; -} - -static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv) -{ - struct btf_field_offs *foffs = (void *)priv; - u32 *off_base = foffs->field_off; - u32 *a = _a, *b = _b; - u8 *sz_a, *sz_b; - - sz_a = foffs->field_sz + (a - off_base); - sz_b = foffs->field_sz + (b - off_base); - - swap(*a, *b); - swap(*sz_a, *sz_b); -} - -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec) -{ - struct btf_field_offs *foffs; - u32 i, *off; - u8 *sz; - - BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz)); - if (IS_ERR_OR_NULL(rec)) - return NULL; - - foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN); - if (!foffs) - return ERR_PTR(-ENOMEM); - - off = foffs->field_off; - sz = foffs->field_sz; - for (i = 0; i < rec->cnt; i++) { - off[i] = rec->fields[i].offset; - sz[i] = btf_field_type_size(rec->fields[i].type); - } - foffs->cnt = rec->cnt; - - if (foffs->cnt == 1) - return foffs; - sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]), - btf_field_offs_cmp, btf_field_offs_swap, foffs); - return foffs; -} - static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) @@ -5386,7 +5346,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; - struct btf_field_offs *foffs; struct btf_struct_meta *type; struct btf_record *record; const struct btf_type *t; @@ -5428,17 +5387,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; goto free; } - foffs = btf_parse_field_offs(record); - /* We need the field_offs to be valid for a valid record, - * either both should be set or both should be unset. - */ - if (IS_ERR_OR_NULL(foffs)) { - btf_record_free(record); - ret = -EFAULT; - goto free; - } type->record = record; - type->field_offs = foffs; tab->cnt++; } return tab; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f04e60a4847f..e989b6460147 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1893,7 +1893,7 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) if (!p) return NULL; if (meta) - bpf_obj_init(meta->field_offs, p); + bpf_obj_init(meta->record, p); return p; } diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 38136ec4e095..2c5c64c2a53b 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -56,18 +56,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) ret = PTR_ERR(inner_map_meta->record); goto free; } - if (inner_map_meta->record) { - struct btf_field_offs *field_offs; - /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always - * valid. - */ - field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN); - if (!field_offs) { - ret = -ENOMEM; - goto free_rec; - } - inner_map_meta->field_offs = field_offs; - } /* Note: We must use the same BTF, as we also used btf_record_dup above * which relies on BTF being same for both maps, as some members like * record->fields.list_head have pointers like value_rec pointing into @@ -88,8 +76,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) fdput(f); return inner_map_meta; -free_rec: - btf_record_free(inner_map_meta->record); free: kfree(inner_map_meta); put: @@ -99,7 +85,6 @@ put: void bpf_map_meta_free(struct bpf_map *map_meta) { - kfree(map_meta->field_offs); bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6d575505f89c..c08b7933bf8f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -717,14 +717,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - struct btf_field_offs *foffs = map->field_offs; struct btf_record *rec = map->record; security_bpf_map_free(map); bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); - /* Delay freeing of field_offs and btf_record for maps, as map_free + /* Delay freeing of btf_record for maps, as map_free * callback usually needs access to them. It is better to do it here * than require each callback to do the free itself manually. * @@ -733,7 +732,6 @@ static void bpf_map_free_deferred(struct work_struct *work) * eventually calls bpf_map_free_meta, since inner_map_meta is only a * template bpf_map struct used during verification. */ - kfree(foffs); btf_record_free(rec); } @@ -1125,7 +1123,6 @@ free_map_tab: static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); - struct btf_field_offs *foffs; struct bpf_map *map; int f_flags; int err; @@ -1205,17 +1202,9 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - - foffs = btf_parse_field_offs(map->record); - if (IS_ERR(foffs)) { - err = PTR_ERR(foffs); - goto free_map; - } - map->field_offs = foffs; - err = security_bpf_map_alloc(map); if (err) - goto free_map_field_offs; + goto free_map; err = bpf_map_alloc_id(map); if (err) @@ -1239,8 +1228,6 @@ static int map_create(union bpf_attr *attr) free_map_sec: security_bpf_map_free(map); -free_map_field_offs: - kfree(map->field_offs); free_map: btf_put(map->btf); map->ops->map_free(map);