2020-05-09 10:59:09 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/* Copyright (c) 2020 Facebook */
|
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <linux/kernel.h>
|
2020-07-20 09:34:03 -07:00
|
|
|
#include <linux/btf_ids.h>
|
2020-05-09 10:59:09 -07:00
|
|
|
|
|
|
|
struct bpf_iter_seq_map_info {
|
2020-07-22 12:51:56 -07:00
|
|
|
u32 map_id;
|
2020-05-09 10:59:09 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct bpf_iter_seq_map_info *info = seq->private;
|
|
|
|
struct bpf_map *map;
|
|
|
|
|
2020-07-22 12:51:56 -07:00
|
|
|
map = bpf_map_get_curr_or_next(&info->map_id);
|
2020-05-09 10:59:09 -07:00
|
|
|
if (!map)
|
|
|
|
return NULL;
|
|
|
|
|
2020-07-22 12:51:56 -07:00
|
|
|
if (*pos == 0)
|
|
|
|
++*pos;
|
2020-05-09 10:59:09 -07:00
|
|
|
return map;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct bpf_iter_seq_map_info *info = seq->private;
|
|
|
|
|
|
|
|
++*pos;
|
2020-07-22 12:51:56 -07:00
|
|
|
++info->map_id;
|
2020-05-09 10:59:09 -07:00
|
|
|
bpf_map_put((struct bpf_map *)v);
|
2020-07-22 12:51:56 -07:00
|
|
|
return bpf_map_get_curr_or_next(&info->map_id);
|
2020-05-09 10:59:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
struct bpf_iter__bpf_map {
|
|
|
|
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
|
|
|
__bpf_md_ptr(struct bpf_map *, map);
|
|
|
|
};
|
|
|
|
|
|
|
|
DEFINE_BPF_ITER_FUNC(bpf_map, struct bpf_iter_meta *meta, struct bpf_map *map)
|
|
|
|
|
|
|
|
static int __bpf_map_seq_show(struct seq_file *seq, void *v, bool in_stop)
|
|
|
|
{
|
|
|
|
struct bpf_iter__bpf_map ctx;
|
|
|
|
struct bpf_iter_meta meta;
|
|
|
|
struct bpf_prog *prog;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ctx.meta = &meta;
|
|
|
|
ctx.map = v;
|
|
|
|
meta.seq = seq;
|
|
|
|
prog = bpf_iter_get_info(&meta, in_stop);
|
|
|
|
if (prog)
|
|
|
|
ret = bpf_iter_run_prog(prog, &ctx);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bpf_map_seq_show(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
return __bpf_map_seq_show(seq, v, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bpf_map_seq_stop(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
if (!v)
|
|
|
|
(void)__bpf_map_seq_show(seq, v, true);
|
|
|
|
else
|
|
|
|
bpf_map_put((struct bpf_map *)v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct seq_operations bpf_map_seq_ops = {
|
|
|
|
.start = bpf_map_seq_start,
|
|
|
|
.next = bpf_map_seq_next,
|
|
|
|
.stop = bpf_map_seq_stop,
|
|
|
|
.show = bpf_map_seq_show,
|
|
|
|
};
|
|
|
|
|
2020-07-20 09:34:03 -07:00
|
|
|
BTF_ID_LIST(btf_bpf_map_id)
|
|
|
|
BTF_ID(struct, bpf_map)
|
|
|
|
|
2020-07-23 11:41:09 -07:00
|
|
|
static const struct bpf_iter_seq_info bpf_map_seq_info = {
|
2020-05-13 11:02:19 -07:00
|
|
|
.seq_ops = &bpf_map_seq_ops,
|
|
|
|
.init_seq_private = NULL,
|
|
|
|
.fini_seq_private = NULL,
|
|
|
|
.seq_priv_size = sizeof(struct bpf_iter_seq_map_info),
|
2020-07-23 11:41:09 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct bpf_iter_reg bpf_map_reg_info = {
|
|
|
|
.target = "bpf_map",
|
2020-05-13 11:02:21 -07:00
|
|
|
.ctx_arg_info_size = 1,
|
|
|
|
.ctx_arg_info = {
|
|
|
|
{ offsetof(struct bpf_iter__bpf_map, map),
|
|
|
|
PTR_TO_BTF_ID_OR_NULL },
|
|
|
|
},
|
2020-07-23 11:41:09 -07:00
|
|
|
.seq_info = &bpf_map_seq_info,
|
2020-05-13 11:02:19 -07:00
|
|
|
};
|
|
|
|
|
bpf: Implement bpf iterator for map elements
The bpf iterator for map elements are implemented.
The bpf program will receive four parameters:
bpf_iter_meta *meta: the meta data
bpf_map *map: the bpf_map whose elements are traversed
void *key: the key of one element
void *value: the value of the same element
Here, meta and map pointers are always valid, and
key has register type PTR_TO_RDONLY_BUF_OR_NULL and
value has register type PTR_TO_RDWR_BUF_OR_NULL.
The kernel will track the access range of key and value
during verification time. Later, these values will be compared
against the values in the actual map to ensure all accesses
are within range.
A new field iter_seq_info is added to bpf_map_ops which
is used to add map type specific information, i.e., seq_ops,
init/fini seq_file func and seq_file private data size.
Subsequent patches will have actual implementation
for bpf_map_ops->iter_seq_info.
In user space, BPF_ITER_LINK_MAP_FD needs to be
specified in prog attr->link_create.flags, which indicates
that attr->link_create.target_fd is a map_fd.
The reason for such an explicit flag is for possible
future cases where one bpf iterator may allow more than
one possible customization, e.g., pid and cgroup id for
task_file.
Current kernel internal implementation only allows
the target to register at most one required bpf_iter_link_info.
To support the above case, optional bpf_iter_link_info's
are needed, the target can be extended to register such link
infos, and user provided link_info needs to match one of
target supported ones.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com
2020-07-23 11:41:12 -07:00
|
|
|
static int bpf_iter_check_map(struct bpf_prog *prog,
|
|
|
|
struct bpf_iter_aux_info *aux)
|
|
|
|
{
|
bpf: Implement bpf iterator for hash maps
The bpf iterators for hash, percpu hash, lru hash
and lru percpu hash are implemented. During link time,
bpf_iter_reg->check_target() will check map type
and ensure the program access key/value region is
within the map defined key/value size limit.
For percpu hash and lru hash maps, the bpf program
will receive values for all cpus. The map element
bpf iterator infrastructure will prepare value
properly before passing the value pointer to the
bpf program.
This patch set supports readonly map keys and
read/write map values. It does not support deleting
map elements, e.g., from hash tables. If there is
a user case for this, the following mechanism can
be used to support map deletion for hashtab, etc.
- permit a new bpf program return value, e.g., 2,
to let bpf iterator know the map element should
be removed.
- since bucket lock is taken, the map element will be
queued.
- once bucket lock is released after all elements under
this bucket are traversed, all to-be-deleted map
elements can be deleted.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184114.590470-1-yhs@fb.com
2020-07-23 11:41:14 -07:00
|
|
|
u32 key_acc_size, value_acc_size, key_size, value_size;
|
|
|
|
struct bpf_map *map = aux->map;
|
|
|
|
bool is_percpu = false;
|
|
|
|
|
|
|
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
|
|
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH)
|
|
|
|
is_percpu = true;
|
|
|
|
else if (map->map_type != BPF_MAP_TYPE_HASH &&
|
|
|
|
map->map_type != BPF_MAP_TYPE_LRU_HASH)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
key_acc_size = prog->aux->max_rdonly_access;
|
|
|
|
value_acc_size = prog->aux->max_rdwr_access;
|
|
|
|
key_size = map->key_size;
|
|
|
|
if (!is_percpu)
|
|
|
|
value_size = map->value_size;
|
|
|
|
else
|
|
|
|
value_size = round_up(map->value_size, 8) * num_possible_cpus();
|
|
|
|
|
|
|
|
if (key_acc_size > key_size || value_acc_size > value_size)
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
return 0;
|
bpf: Implement bpf iterator for map elements
The bpf iterator for map elements are implemented.
The bpf program will receive four parameters:
bpf_iter_meta *meta: the meta data
bpf_map *map: the bpf_map whose elements are traversed
void *key: the key of one element
void *value: the value of the same element
Here, meta and map pointers are always valid, and
key has register type PTR_TO_RDONLY_BUF_OR_NULL and
value has register type PTR_TO_RDWR_BUF_OR_NULL.
The kernel will track the access range of key and value
during verification time. Later, these values will be compared
against the values in the actual map to ensure all accesses
are within range.
A new field iter_seq_info is added to bpf_map_ops which
is used to add map type specific information, i.e., seq_ops,
init/fini seq_file func and seq_file private data size.
Subsequent patches will have actual implementation
for bpf_map_ops->iter_seq_info.
In user space, BPF_ITER_LINK_MAP_FD needs to be
specified in prog attr->link_create.flags, which indicates
that attr->link_create.target_fd is a map_fd.
The reason for such an explicit flag is for possible
future cases where one bpf iterator may allow more than
one possible customization, e.g., pid and cgroup id for
task_file.
Current kernel internal implementation only allows
the target to register at most one required bpf_iter_link_info.
To support the above case, optional bpf_iter_link_info's
are needed, the target can be extended to register such link
infos, and user provided link_info needs to match one of
target supported ones.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com
2020-07-23 11:41:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
|
|
|
|
struct bpf_map *map, void *key, void *value)
|
|
|
|
|
|
|
|
static const struct bpf_iter_reg bpf_map_elem_reg_info = {
|
|
|
|
.target = "bpf_map_elem",
|
|
|
|
.check_target = bpf_iter_check_map,
|
|
|
|
.req_linfo = BPF_ITER_LINK_MAP_FD,
|
|
|
|
.ctx_arg_info_size = 2,
|
|
|
|
.ctx_arg_info = {
|
|
|
|
{ offsetof(struct bpf_iter__bpf_map_elem, key),
|
|
|
|
PTR_TO_RDONLY_BUF_OR_NULL },
|
|
|
|
{ offsetof(struct bpf_iter__bpf_map_elem, value),
|
|
|
|
PTR_TO_RDWR_BUF_OR_NULL },
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2020-05-09 10:59:09 -07:00
|
|
|
static int __init bpf_map_iter_init(void)
|
|
|
|
{
|
bpf: Implement bpf iterator for map elements
The bpf iterator for map elements are implemented.
The bpf program will receive four parameters:
bpf_iter_meta *meta: the meta data
bpf_map *map: the bpf_map whose elements are traversed
void *key: the key of one element
void *value: the value of the same element
Here, meta and map pointers are always valid, and
key has register type PTR_TO_RDONLY_BUF_OR_NULL and
value has register type PTR_TO_RDWR_BUF_OR_NULL.
The kernel will track the access range of key and value
during verification time. Later, these values will be compared
against the values in the actual map to ensure all accesses
are within range.
A new field iter_seq_info is added to bpf_map_ops which
is used to add map type specific information, i.e., seq_ops,
init/fini seq_file func and seq_file private data size.
Subsequent patches will have actual implementation
for bpf_map_ops->iter_seq_info.
In user space, BPF_ITER_LINK_MAP_FD needs to be
specified in prog attr->link_create.flags, which indicates
that attr->link_create.target_fd is a map_fd.
The reason for such an explicit flag is for possible
future cases where one bpf iterator may allow more than
one possible customization, e.g., pid and cgroup id for
task_file.
Current kernel internal implementation only allows
the target to register at most one required bpf_iter_link_info.
To support the above case, optional bpf_iter_link_info's
are needed, the target can be extended to register such link
infos, and user provided link_info needs to match one of
target supported ones.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com
2020-07-23 11:41:12 -07:00
|
|
|
int ret;
|
|
|
|
|
2020-07-20 09:34:03 -07:00
|
|
|
bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
|
bpf: Implement bpf iterator for map elements
The bpf iterator for map elements are implemented.
The bpf program will receive four parameters:
bpf_iter_meta *meta: the meta data
bpf_map *map: the bpf_map whose elements are traversed
void *key: the key of one element
void *value: the value of the same element
Here, meta and map pointers are always valid, and
key has register type PTR_TO_RDONLY_BUF_OR_NULL and
value has register type PTR_TO_RDWR_BUF_OR_NULL.
The kernel will track the access range of key and value
during verification time. Later, these values will be compared
against the values in the actual map to ensure all accesses
are within range.
A new field iter_seq_info is added to bpf_map_ops which
is used to add map type specific information, i.e., seq_ops,
init/fini seq_file func and seq_file private data size.
Subsequent patches will have actual implementation
for bpf_map_ops->iter_seq_info.
In user space, BPF_ITER_LINK_MAP_FD needs to be
specified in prog attr->link_create.flags, which indicates
that attr->link_create.target_fd is a map_fd.
The reason for such an explicit flag is for possible
future cases where one bpf iterator may allow more than
one possible customization, e.g., pid and cgroup id for
task_file.
Current kernel internal implementation only allows
the target to register at most one required bpf_iter_link_info.
To support the above case, optional bpf_iter_link_info's
are needed, the target can be extended to register such link
infos, and user provided link_info needs to match one of
target supported ones.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com
2020-07-23 11:41:12 -07:00
|
|
|
ret = bpf_iter_reg_target(&bpf_map_reg_info);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
return bpf_iter_reg_target(&bpf_map_elem_reg_info);
|
2020-05-09 10:59:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
late_initcall(bpf_map_iter_init);
|