mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-02-28 The following pull-request contains BPF updates for your *net-next* tree. We've added 41 non-merge commits during the last 7 day(s) which contain a total of 49 files changed, 1383 insertions(+), 499 deletions(-). The main changes are: 1) BPF and Real-Time nicely co-exist. 2) bpftool feature improvements. 3) retrieve bpf_sk_storage via INET_DIAG. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
9f0ca0c1a5
@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
|
|||||||
struct receive_queue *rq,
|
struct receive_queue *rq,
|
||||||
struct page *page, unsigned int offset,
|
struct page *page, unsigned int offset,
|
||||||
unsigned int len, unsigned int truesize,
|
unsigned int len, unsigned int truesize,
|
||||||
bool hdr_valid)
|
bool hdr_valid, unsigned int metasize)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
||||||
@ -393,6 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
|
|||||||
else
|
else
|
||||||
hdr_padded_len = sizeof(struct padded_vnet_hdr);
|
hdr_padded_len = sizeof(struct padded_vnet_hdr);
|
||||||
|
|
||||||
|
/* hdr_valid means no XDP, so we can copy the vnet header */
|
||||||
if (hdr_valid)
|
if (hdr_valid)
|
||||||
memcpy(hdr, p, hdr_len);
|
memcpy(hdr, p, hdr_len);
|
||||||
|
|
||||||
@ -405,6 +406,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
|
|||||||
copy = skb_tailroom(skb);
|
copy = skb_tailroom(skb);
|
||||||
skb_put_data(skb, p, copy);
|
skb_put_data(skb, p, copy);
|
||||||
|
|
||||||
|
if (metasize) {
|
||||||
|
__skb_pull(skb, metasize);
|
||||||
|
skb_metadata_set(skb, metasize);
|
||||||
|
}
|
||||||
|
|
||||||
len -= copy;
|
len -= copy;
|
||||||
offset += copy;
|
offset += copy;
|
||||||
|
|
||||||
@ -450,10 +456,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
|
|||||||
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/* virtqueue want to use data area in-front of packet */
|
|
||||||
if (unlikely(xdpf->metasize > 0))
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
|
|
||||||
if (unlikely(xdpf->headroom < vi->hdr_len))
|
if (unlikely(xdpf->headroom < vi->hdr_len))
|
||||||
return -EOVERFLOW;
|
return -EOVERFLOW;
|
||||||
|
|
||||||
@ -644,6 +646,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||||||
unsigned int delta = 0;
|
unsigned int delta = 0;
|
||||||
struct page *xdp_page;
|
struct page *xdp_page;
|
||||||
int err;
|
int err;
|
||||||
|
unsigned int metasize = 0;
|
||||||
|
|
||||||
len -= vi->hdr_len;
|
len -= vi->hdr_len;
|
||||||
stats->bytes += len;
|
stats->bytes += len;
|
||||||
@ -683,8 +686,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||||||
|
|
||||||
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
|
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
|
||||||
xdp.data = xdp.data_hard_start + xdp_headroom;
|
xdp.data = xdp.data_hard_start + xdp_headroom;
|
||||||
xdp_set_data_meta_invalid(&xdp);
|
|
||||||
xdp.data_end = xdp.data + len;
|
xdp.data_end = xdp.data + len;
|
||||||
|
xdp.data_meta = xdp.data;
|
||||||
xdp.rxq = &rq->xdp_rxq;
|
xdp.rxq = &rq->xdp_rxq;
|
||||||
orig_data = xdp.data;
|
orig_data = xdp.data;
|
||||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||||
@ -695,6 +698,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||||||
/* Recalculate length in case bpf program changed it */
|
/* Recalculate length in case bpf program changed it */
|
||||||
delta = orig_data - xdp.data;
|
delta = orig_data - xdp.data;
|
||||||
len = xdp.data_end - xdp.data;
|
len = xdp.data_end - xdp.data;
|
||||||
|
metasize = xdp.data - xdp.data_meta;
|
||||||
break;
|
break;
|
||||||
case XDP_TX:
|
case XDP_TX:
|
||||||
stats->xdp_tx++;
|
stats->xdp_tx++;
|
||||||
@ -735,10 +739,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||||||
}
|
}
|
||||||
skb_reserve(skb, headroom - delta);
|
skb_reserve(skb, headroom - delta);
|
||||||
skb_put(skb, len);
|
skb_put(skb, len);
|
||||||
if (!delta) {
|
if (!xdp_prog) {
|
||||||
buf += header_offset;
|
buf += header_offset;
|
||||||
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
|
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
|
||||||
} /* keep zeroed vnet hdr since packet was changed by bpf */
|
} /* keep zeroed vnet hdr since XDP is loaded */
|
||||||
|
|
||||||
|
if (metasize)
|
||||||
|
skb_metadata_set(skb, metasize);
|
||||||
|
|
||||||
err:
|
err:
|
||||||
return skb;
|
return skb;
|
||||||
@ -760,8 +767,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
|
|||||||
struct virtnet_rq_stats *stats)
|
struct virtnet_rq_stats *stats)
|
||||||
{
|
{
|
||||||
struct page *page = buf;
|
struct page *page = buf;
|
||||||
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
|
struct sk_buff *skb =
|
||||||
PAGE_SIZE, true);
|
page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
|
||||||
|
|
||||||
stats->bytes += len - vi->hdr_len;
|
stats->bytes += len - vi->hdr_len;
|
||||||
if (unlikely(!skb))
|
if (unlikely(!skb))
|
||||||
@ -793,6 +800,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||||||
unsigned int truesize;
|
unsigned int truesize;
|
||||||
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
|
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
|
||||||
int err;
|
int err;
|
||||||
|
unsigned int metasize = 0;
|
||||||
|
|
||||||
head_skb = NULL;
|
head_skb = NULL;
|
||||||
stats->bytes += len - vi->hdr_len;
|
stats->bytes += len - vi->hdr_len;
|
||||||
@ -839,8 +847,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||||||
data = page_address(xdp_page) + offset;
|
data = page_address(xdp_page) + offset;
|
||||||
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
|
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
|
||||||
xdp.data = data + vi->hdr_len;
|
xdp.data = data + vi->hdr_len;
|
||||||
xdp_set_data_meta_invalid(&xdp);
|
|
||||||
xdp.data_end = xdp.data + (len - vi->hdr_len);
|
xdp.data_end = xdp.data + (len - vi->hdr_len);
|
||||||
|
xdp.data_meta = xdp.data;
|
||||||
xdp.rxq = &rq->xdp_rxq;
|
xdp.rxq = &rq->xdp_rxq;
|
||||||
|
|
||||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||||
@ -848,24 +856,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||||||
|
|
||||||
switch (act) {
|
switch (act) {
|
||||||
case XDP_PASS:
|
case XDP_PASS:
|
||||||
/* recalculate offset to account for any header
|
metasize = xdp.data - xdp.data_meta;
|
||||||
* adjustments. Note other cases do not build an
|
|
||||||
* skb and avoid using offset
|
|
||||||
*/
|
|
||||||
offset = xdp.data -
|
|
||||||
page_address(xdp_page) - vi->hdr_len;
|
|
||||||
|
|
||||||
/* recalculate len if xdp.data or xdp.data_end were
|
/* recalculate offset to account for any header
|
||||||
* adjusted
|
* adjustments and minus the metasize to copy the
|
||||||
|
* metadata in page_to_skb(). Note other cases do not
|
||||||
|
* build an skb and avoid using offset
|
||||||
*/
|
*/
|
||||||
len = xdp.data_end - xdp.data + vi->hdr_len;
|
offset = xdp.data - page_address(xdp_page) -
|
||||||
|
vi->hdr_len - metasize;
|
||||||
|
|
||||||
|
/* recalculate len if xdp.data, xdp.data_end or
|
||||||
|
* xdp.data_meta were adjusted
|
||||||
|
*/
|
||||||
|
len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
|
||||||
/* We can only create skb based on xdp_page. */
|
/* We can only create skb based on xdp_page. */
|
||||||
if (unlikely(xdp_page != page)) {
|
if (unlikely(xdp_page != page)) {
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
put_page(page);
|
put_page(page);
|
||||||
head_skb = page_to_skb(vi, rq, xdp_page,
|
head_skb = page_to_skb(vi, rq, xdp_page, offset,
|
||||||
offset, len,
|
len, PAGE_SIZE, false,
|
||||||
PAGE_SIZE, false);
|
metasize);
|
||||||
return head_skb;
|
return head_skb;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -921,7 +932,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||||||
goto err_skb;
|
goto err_skb;
|
||||||
}
|
}
|
||||||
|
|
||||||
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
|
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
|
||||||
|
metasize);
|
||||||
curr_skb = head_skb;
|
curr_skb = head_skb;
|
||||||
|
|
||||||
if (unlikely(!curr_skb))
|
if (unlikely(!curr_skb))
|
||||||
|
@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map;
|
|||||||
|
|
||||||
struct bpf_storage_buffer {
|
struct bpf_storage_buffer {
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
char data[0];
|
char data[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_cgroup_storage {
|
struct bpf_cgroup_storage {
|
||||||
|
@ -859,7 +859,7 @@ struct bpf_prog_array_item {
|
|||||||
|
|
||||||
struct bpf_prog_array {
|
struct bpf_prog_array {
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
struct bpf_prog_array_item items[0];
|
struct bpf_prog_array_item items[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
||||||
@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
|||||||
struct bpf_prog *_prog; \
|
struct bpf_prog *_prog; \
|
||||||
struct bpf_prog_array *_array; \
|
struct bpf_prog_array *_array; \
|
||||||
u32 _ret = 1; \
|
u32 _ret = 1; \
|
||||||
preempt_disable(); \
|
migrate_disable(); \
|
||||||
rcu_read_lock(); \
|
rcu_read_lock(); \
|
||||||
_array = rcu_dereference(array); \
|
_array = rcu_dereference(array); \
|
||||||
if (unlikely(check_non_null && !_array))\
|
if (unlikely(check_non_null && !_array))\
|
||||||
@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
|
|||||||
} \
|
} \
|
||||||
_out: \
|
_out: \
|
||||||
rcu_read_unlock(); \
|
rcu_read_unlock(); \
|
||||||
preempt_enable(); \
|
migrate_enable(); \
|
||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -932,7 +932,7 @@ _out: \
|
|||||||
u32 ret; \
|
u32 ret; \
|
||||||
u32 _ret = 1; \
|
u32 _ret = 1; \
|
||||||
u32 _cn = 0; \
|
u32 _cn = 0; \
|
||||||
preempt_disable(); \
|
migrate_disable(); \
|
||||||
rcu_read_lock(); \
|
rcu_read_lock(); \
|
||||||
_array = rcu_dereference(array); \
|
_array = rcu_dereference(array); \
|
||||||
_item = &_array->items[0]; \
|
_item = &_array->items[0]; \
|
||||||
@ -944,7 +944,7 @@ _out: \
|
|||||||
_item++; \
|
_item++; \
|
||||||
} \
|
} \
|
||||||
rcu_read_unlock(); \
|
rcu_read_unlock(); \
|
||||||
preempt_enable(); \
|
migrate_enable(); \
|
||||||
if (_ret) \
|
if (_ret) \
|
||||||
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
|
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
|
||||||
else \
|
else \
|
||||||
@ -961,6 +961,36 @@ _out: \
|
|||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block execution of BPF programs attached to instrumentation (perf,
|
||||||
|
* kprobes, tracepoints) to prevent deadlocks on map operations as any of
|
||||||
|
* these events can happen inside a region which holds a map bucket lock
|
||||||
|
* and can deadlock on it.
|
||||||
|
*
|
||||||
|
* Use the preemption safe inc/dec variants on RT because migrate disable
|
||||||
|
* is preemptible on RT and preemption in the middle of the RMW operation
|
||||||
|
* might lead to inconsistent state. Use the raw variants for non RT
|
||||||
|
* kernels as migrate_disable() maps to preempt_disable() so the slightly
|
||||||
|
* more expensive save operation can be avoided.
|
||||||
|
*/
|
||||||
|
static inline void bpf_disable_instrumentation(void)
|
||||||
|
{
|
||||||
|
migrate_disable();
|
||||||
|
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||||
|
this_cpu_inc(bpf_prog_active);
|
||||||
|
else
|
||||||
|
__this_cpu_inc(bpf_prog_active);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void bpf_enable_instrumentation(void)
|
||||||
|
{
|
||||||
|
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||||
|
this_cpu_dec(bpf_prog_active);
|
||||||
|
else
|
||||||
|
__this_cpu_dec(bpf_prog_active);
|
||||||
|
migrate_enable();
|
||||||
|
}
|
||||||
|
|
||||||
extern const struct file_operations bpf_map_fops;
|
extern const struct file_operations bpf_map_fops;
|
||||||
extern const struct file_operations bpf_prog_fops;
|
extern const struct file_operations bpf_prog_fops;
|
||||||
|
|
||||||
@ -993,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
|
|||||||
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
|
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
|
||||||
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
|
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
|
||||||
|
|
||||||
|
struct bpf_map *bpf_map_get(u32 ufd);
|
||||||
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
|
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
|
||||||
struct bpf_map *__bpf_map_get(struct fd f);
|
struct bpf_map *__bpf_map_get(struct fd f);
|
||||||
void bpf_map_inc(struct bpf_map *map);
|
void bpf_map_inc(struct bpf_map *map);
|
||||||
|
@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
|
|||||||
|
|
||||||
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
|
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
|
||||||
u32 ret; \
|
u32 ret; \
|
||||||
cant_sleep(); \
|
cant_migrate(); \
|
||||||
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
|
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
|
||||||
struct bpf_prog_stats *stats; \
|
struct bpf_prog_stats *stats; \
|
||||||
u64 start = sched_clock(); \
|
u64 start = sched_clock(); \
|
||||||
@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
|
|||||||
} \
|
} \
|
||||||
ret; })
|
ret; })
|
||||||
|
|
||||||
#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
|
#define BPF_PROG_RUN(prog, ctx) \
|
||||||
bpf_dispatcher_nopfunc)
|
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use in preemptible and therefore migratable context to make sure that
|
||||||
|
* the execution of the BPF program runs on one CPU.
|
||||||
|
*
|
||||||
|
* This uses migrate_disable/enable() explicitly to document that the
|
||||||
|
* invocation of a BPF program does not require reentrancy protection
|
||||||
|
* against a BPF program which is invoked from a preempting task.
|
||||||
|
*
|
||||||
|
* For non RT enabled kernels migrate_disable/enable() maps to
|
||||||
|
* preempt_disable/enable(), i.e. it disables also preemption.
|
||||||
|
*/
|
||||||
|
static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
|
||||||
|
const void *ctx)
|
||||||
|
{
|
||||||
|
u32 ret;
|
||||||
|
|
||||||
|
migrate_disable();
|
||||||
|
ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
|
||||||
|
migrate_enable();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
|
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
|
||||||
|
|
||||||
@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
|
|||||||
return qdisc_skb_cb(skb)->data;
|
return qdisc_skb_cb(skb)->data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Must be invoked with migration disabled */
|
||||||
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
|
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
|
||||||
struct sk_buff *skb)
|
struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
|
|||||||
{
|
{
|
||||||
u32 res;
|
u32 res;
|
||||||
|
|
||||||
preempt_disable();
|
migrate_disable();
|
||||||
res = __bpf_prog_run_save_cb(prog, skb);
|
res = __bpf_prog_run_save_cb(prog, skb);
|
||||||
preempt_enable();
|
migrate_enable();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
|
|||||||
if (unlikely(prog->cb_access))
|
if (unlikely(prog->cb_access))
|
||||||
memset(cb_data, 0, BPF_SKB_CB_LEN);
|
memset(cb_data, 0, BPF_SKB_CB_LEN);
|
||||||
|
|
||||||
preempt_disable();
|
res = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||||
res = BPF_PROG_RUN(prog, skb);
|
|
||||||
preempt_enable();
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,11 +15,9 @@ struct netlink_callback;
|
|||||||
struct inet_diag_handler {
|
struct inet_diag_handler {
|
||||||
void (*dump)(struct sk_buff *skb,
|
void (*dump)(struct sk_buff *skb,
|
||||||
struct netlink_callback *cb,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r,
|
const struct inet_diag_req_v2 *r);
|
||||||
struct nlattr *bc);
|
|
||||||
|
|
||||||
int (*dump_one)(struct sk_buff *in_skb,
|
int (*dump_one)(struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *nlh,
|
|
||||||
const struct inet_diag_req_v2 *req);
|
const struct inet_diag_req_v2 *req);
|
||||||
|
|
||||||
void (*idiag_get_info)(struct sock *sk,
|
void (*idiag_get_info)(struct sock *sk,
|
||||||
@ -40,18 +38,25 @@ struct inet_diag_handler {
|
|||||||
__u16 idiag_info_size;
|
__u16 idiag_info_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_sk_storage_diag;
|
||||||
|
struct inet_diag_dump_data {
|
||||||
|
struct nlattr *req_nlas[__INET_DIAG_REQ_MAX];
|
||||||
|
#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE]
|
||||||
|
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
|
||||||
|
|
||||||
|
struct bpf_sk_storage_diag *bpf_stg_diag;
|
||||||
|
};
|
||||||
|
|
||||||
struct inet_connection_sock;
|
struct inet_connection_sock;
|
||||||
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
||||||
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
|
struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
struct user_namespace *user_ns,
|
const struct inet_diag_req_v2 *req,
|
||||||
u32 pid, u32 seq, u16 nlmsg_flags,
|
u16 nlmsg_flags, bool net_admin);
|
||||||
const struct nlmsghdr *unlh, bool net_admin);
|
|
||||||
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
|
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
|
||||||
struct netlink_callback *cb,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r,
|
const struct inet_diag_req_v2 *r);
|
||||||
struct nlattr *bc);
|
|
||||||
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
|
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
|
||||||
struct sk_buff *in_skb, const struct nlmsghdr *nlh,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *req);
|
const struct inet_diag_req_v2 *req);
|
||||||
|
|
||||||
struct sock *inet_diag_find_one_icsk(struct net *net,
|
struct sock *inet_diag_find_one_icsk(struct net *net,
|
||||||
|
@ -257,6 +257,13 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
|
|||||||
|
|
||||||
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
|
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
|
||||||
|
|
||||||
|
#ifndef CONFIG_PREEMPT_RT
|
||||||
|
# define cant_migrate() cant_sleep()
|
||||||
|
#else
|
||||||
|
/* Placeholder for now */
|
||||||
|
# define cant_migrate() do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* abs - return absolute value of an argument
|
* abs - return absolute value of an argument
|
||||||
* @x: the value. If it is unsigned type, it is converted to signed type first.
|
* @x: the value. If it is unsigned type, it is converted to signed type first.
|
||||||
|
@ -188,10 +188,10 @@ struct netlink_callback {
|
|||||||
struct module *module;
|
struct module *module;
|
||||||
struct netlink_ext_ack *extack;
|
struct netlink_ext_ack *extack;
|
||||||
u16 family;
|
u16 family;
|
||||||
u16 min_dump_alloc;
|
|
||||||
bool strict_check;
|
|
||||||
u16 answer_flags;
|
u16 answer_flags;
|
||||||
|
u32 min_dump_alloc;
|
||||||
unsigned int prev_seq, seq;
|
unsigned int prev_seq, seq;
|
||||||
|
bool strict_check;
|
||||||
union {
|
union {
|
||||||
u8 ctx[48];
|
u8 ctx[48];
|
||||||
|
|
||||||
|
@ -322,4 +322,34 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* migrate_disable - Prevent migration of the current task
|
||||||
|
*
|
||||||
|
* Maps to preempt_disable() which also disables preemption. Use
|
||||||
|
* migrate_disable() to annotate that the intent is to prevent migration,
|
||||||
|
* but not necessarily preemption.
|
||||||
|
*
|
||||||
|
* Can be invoked nested like preempt_disable() and needs the corresponding
|
||||||
|
* number of migrate_enable() invocations.
|
||||||
|
*/
|
||||||
|
static __always_inline void migrate_disable(void)
|
||||||
|
{
|
||||||
|
preempt_disable();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* migrate_enable - Allow migration of the current task
|
||||||
|
*
|
||||||
|
* Counterpart to migrate_disable().
|
||||||
|
*
|
||||||
|
* As migrate_disable() can be invoked nested, only the outermost invocation
|
||||||
|
* reenables migration.
|
||||||
|
*
|
||||||
|
* Currently mapped to preempt_enable().
|
||||||
|
*/
|
||||||
|
static __always_inline void migrate_enable(void)
|
||||||
|
{
|
||||||
|
preempt_enable();
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __LINUX_PREEMPT_H */
|
#endif /* __LINUX_PREEMPT_H */
|
||||||
|
@ -10,14 +10,41 @@ void bpf_sk_storage_free(struct sock *sk);
|
|||||||
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
|
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
|
||||||
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
|
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
|
||||||
|
|
||||||
|
struct bpf_sk_storage_diag;
|
||||||
|
struct sk_buff;
|
||||||
|
struct nlattr;
|
||||||
|
struct sock;
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
|
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
|
||||||
|
struct bpf_sk_storage_diag *
|
||||||
|
bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs);
|
||||||
|
void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag);
|
||||||
|
int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
|
||||||
|
struct sock *sk, struct sk_buff *skb,
|
||||||
|
int stg_array_type,
|
||||||
|
unsigned int *res_diag_size);
|
||||||
#else
|
#else
|
||||||
static inline int bpf_sk_storage_clone(const struct sock *sk,
|
static inline int bpf_sk_storage_clone(const struct sock *sk,
|
||||||
struct sock *newsk)
|
struct sock *newsk)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static inline struct bpf_sk_storage_diag *
|
||||||
|
bpf_sk_storage_diag_alloc(const struct nlattr *nla)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static inline void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
|
||||||
|
struct sock *sk, struct sk_buff *skb,
|
||||||
|
int stg_array_type,
|
||||||
|
unsigned int *res_diag_size)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* _BPF_SK_STORAGE_H */
|
#endif /* _BPF_SK_STORAGE_H */
|
||||||
|
@ -73,7 +73,7 @@ struct bpf_insn {
|
|||||||
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
|
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
|
||||||
struct bpf_lpm_trie_key {
|
struct bpf_lpm_trie_key {
|
||||||
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
|
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
|
||||||
__u8 data[0]; /* Arbitrary size */
|
__u8 data[]; /* Arbitrary size */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_cgroup_storage_key {
|
struct bpf_cgroup_storage_key {
|
||||||
|
@ -64,9 +64,11 @@ struct inet_diag_req_raw {
|
|||||||
enum {
|
enum {
|
||||||
INET_DIAG_REQ_NONE,
|
INET_DIAG_REQ_NONE,
|
||||||
INET_DIAG_REQ_BYTECODE,
|
INET_DIAG_REQ_BYTECODE,
|
||||||
|
INET_DIAG_REQ_SK_BPF_STORAGES,
|
||||||
|
__INET_DIAG_REQ_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE
|
#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1)
|
||||||
|
|
||||||
/* Bytecode is sequence of 4 byte commands followed by variable arguments.
|
/* Bytecode is sequence of 4 byte commands followed by variable arguments.
|
||||||
* All the commands identified by "code" are conditional jumps forward:
|
* All the commands identified by "code" are conditional jumps forward:
|
||||||
@ -154,6 +156,7 @@ enum {
|
|||||||
INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */
|
INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */
|
||||||
INET_DIAG_MD5SIG,
|
INET_DIAG_MD5SIG,
|
||||||
INET_DIAG_ULP_INFO,
|
INET_DIAG_ULP_INFO,
|
||||||
|
INET_DIAG_SK_BPF_STORAGES,
|
||||||
__INET_DIAG_MAX,
|
__INET_DIAG_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -36,4 +36,30 @@ enum sknetlink_groups {
|
|||||||
};
|
};
|
||||||
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
|
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
SK_DIAG_BPF_STORAGE_REQ_NONE,
|
||||||
|
SK_DIAG_BPF_STORAGE_REQ_MAP_FD,
|
||||||
|
__SK_DIAG_BPF_STORAGE_REQ_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SK_DIAG_BPF_STORAGE_REQ_MAX (__SK_DIAG_BPF_STORAGE_REQ_MAX - 1)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
SK_DIAG_BPF_STORAGE_REP_NONE,
|
||||||
|
SK_DIAG_BPF_STORAGE,
|
||||||
|
__SK_DIAG_BPF_STORAGE_REP_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SK_DIAB_BPF_STORAGE_REP_MAX (__SK_DIAG_BPF_STORAGE_REP_MAX - 1)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
SK_DIAG_BPF_STORAGE_NONE,
|
||||||
|
SK_DIAG_BPF_STORAGE_PAD,
|
||||||
|
SK_DIAG_BPF_STORAGE_MAP_ID,
|
||||||
|
SK_DIAG_BPF_STORAGE_MAP_VALUE,
|
||||||
|
__SK_DIAG_BPF_STORAGE_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SK_DIAG_BPF_STORAGE_MAX (__SK_DIAG_BPF_STORAGE_MAX - 1)
|
||||||
|
|
||||||
#endif /* _UAPI__SOCK_DIAG_H__ */
|
#endif /* _UAPI__SOCK_DIAG_H__ */
|
||||||
|
@ -23,7 +23,7 @@ enum bpf_struct_ops_state {
|
|||||||
|
|
||||||
struct bpf_struct_ops_value {
|
struct bpf_struct_ops_value {
|
||||||
BPF_STRUCT_OPS_COMMON_VALUE;
|
BPF_STRUCT_OPS_COMMON_VALUE;
|
||||||
char data[0] ____cacheline_aligned_in_smp;
|
char data[] ____cacheline_aligned_in_smp;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_struct_ops_map {
|
struct bpf_struct_ops_map {
|
||||||
|
@ -27,9 +27,62 @@
|
|||||||
.map_delete_batch = \
|
.map_delete_batch = \
|
||||||
generic_map_delete_batch
|
generic_map_delete_batch
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The bucket lock has two protection scopes:
|
||||||
|
*
|
||||||
|
* 1) Serializing concurrent operations from BPF programs on differrent
|
||||||
|
* CPUs
|
||||||
|
*
|
||||||
|
* 2) Serializing concurrent operations from BPF programs and sys_bpf()
|
||||||
|
*
|
||||||
|
* BPF programs can execute in any context including perf, kprobes and
|
||||||
|
* tracing. As there are almost no limits where perf, kprobes and tracing
|
||||||
|
* can be invoked from the lock operations need to be protected against
|
||||||
|
* deadlocks. Deadlocks can be caused by recursion and by an invocation in
|
||||||
|
* the lock held section when functions which acquire this lock are invoked
|
||||||
|
* from sys_bpf(). BPF recursion is prevented by incrementing the per CPU
|
||||||
|
* variable bpf_prog_active, which prevents BPF programs attached to perf
|
||||||
|
* events, kprobes and tracing to be invoked before the prior invocation
|
||||||
|
* from one of these contexts completed. sys_bpf() uses the same mechanism
|
||||||
|
* by pinning the task to the current CPU and incrementing the recursion
|
||||||
|
* protection accross the map operation.
|
||||||
|
*
|
||||||
|
* This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
|
||||||
|
* operations like memory allocations (even with GFP_ATOMIC) from atomic
|
||||||
|
* contexts. This is required because even with GFP_ATOMIC the memory
|
||||||
|
* allocator calls into code pathes which acquire locks with long held lock
|
||||||
|
* sections. To ensure the deterministic behaviour these locks are regular
|
||||||
|
* spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
|
||||||
|
* true atomic contexts on an RT kernel are the low level hardware
|
||||||
|
* handling, scheduling, low level interrupt handling, NMIs etc. None of
|
||||||
|
* these contexts should ever do memory allocations.
|
||||||
|
*
|
||||||
|
* As regular device interrupt handlers and soft interrupts are forced into
|
||||||
|
* thread context, the existing code which does
|
||||||
|
* spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*();
|
||||||
|
* just works.
|
||||||
|
*
|
||||||
|
* In theory the BPF locks could be converted to regular spinlocks as well,
|
||||||
|
* but the bucket locks and percpu_freelist locks can be taken from
|
||||||
|
* arbitrary contexts (perf, kprobes, tracepoints) which are required to be
|
||||||
|
* atomic contexts even on RT. These mechanisms require preallocated maps,
|
||||||
|
* so there is no need to invoke memory allocations within the lock held
|
||||||
|
* sections.
|
||||||
|
*
|
||||||
|
* BPF maps which need dynamic allocation are only used from (forced)
|
||||||
|
* thread context on RT and can therefore use regular spinlocks which in
|
||||||
|
* turn allows to invoke memory allocations from the lock held section.
|
||||||
|
*
|
||||||
|
* On a non RT kernel this distinction is neither possible nor required.
|
||||||
|
* spinlock maps to raw_spinlock and the extra code is optimized out by the
|
||||||
|
* compiler.
|
||||||
|
*/
|
||||||
struct bucket {
|
struct bucket {
|
||||||
struct hlist_nulls_head head;
|
struct hlist_nulls_head head;
|
||||||
raw_spinlock_t lock;
|
union {
|
||||||
|
raw_spinlock_t raw_lock;
|
||||||
|
spinlock_t lock;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_htab {
|
struct bpf_htab {
|
||||||
@ -65,9 +118,54 @@ struct htab_elem {
|
|||||||
struct bpf_lru_node lru_node;
|
struct bpf_lru_node lru_node;
|
||||||
};
|
};
|
||||||
u32 hash;
|
u32 hash;
|
||||||
char key[0] __aligned(8);
|
char key[] __aligned(8);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline bool htab_is_prealloc(const struct bpf_htab *htab)
|
||||||
|
{
|
||||||
|
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
|
||||||
|
{
|
||||||
|
return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void htab_init_buckets(struct bpf_htab *htab)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i = 0; i < htab->n_buckets; i++) {
|
||||||
|
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
|
||||||
|
if (htab_use_raw_lock(htab))
|
||||||
|
raw_spin_lock_init(&htab->buckets[i].raw_lock);
|
||||||
|
else
|
||||||
|
spin_lock_init(&htab->buckets[i].lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
|
||||||
|
struct bucket *b)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (htab_use_raw_lock(htab))
|
||||||
|
raw_spin_lock_irqsave(&b->raw_lock, flags);
|
||||||
|
else
|
||||||
|
spin_lock_irqsave(&b->lock, flags);
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void htab_unlock_bucket(const struct bpf_htab *htab,
|
||||||
|
struct bucket *b,
|
||||||
|
unsigned long flags)
|
||||||
|
{
|
||||||
|
if (htab_use_raw_lock(htab))
|
||||||
|
raw_spin_unlock_irqrestore(&b->raw_lock, flags);
|
||||||
|
else
|
||||||
|
spin_unlock_irqrestore(&b->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
|
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
|
||||||
|
|
||||||
static bool htab_is_lru(const struct bpf_htab *htab)
|
static bool htab_is_lru(const struct bpf_htab *htab)
|
||||||
@ -82,11 +180,6 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
|
|||||||
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool htab_is_prealloc(const struct bpf_htab *htab)
|
|
||||||
{
|
|
||||||
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
|
||||||
void __percpu *pptr)
|
void __percpu *pptr)
|
||||||
{
|
{
|
||||||
@ -328,8 +421,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|||||||
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
|
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
|
||||||
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
|
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
|
||||||
struct bpf_htab *htab;
|
struct bpf_htab *htab;
|
||||||
int err, i;
|
|
||||||
u64 cost;
|
u64 cost;
|
||||||
|
int err;
|
||||||
|
|
||||||
htab = kzalloc(sizeof(*htab), GFP_USER);
|
htab = kzalloc(sizeof(*htab), GFP_USER);
|
||||||
if (!htab)
|
if (!htab)
|
||||||
@ -391,10 +484,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
|||||||
else
|
else
|
||||||
htab->hashrnd = get_random_int();
|
htab->hashrnd = get_random_int();
|
||||||
|
|
||||||
for (i = 0; i < htab->n_buckets; i++) {
|
htab_init_buckets(htab);
|
||||||
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
|
|
||||||
raw_spin_lock_init(&htab->buckets[i].lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prealloc) {
|
if (prealloc) {
|
||||||
err = prealloc_init(htab);
|
err = prealloc_init(htab);
|
||||||
@ -602,7 +692,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
|
|||||||
b = __select_bucket(htab, tgt_l->hash);
|
b = __select_bucket(htab, tgt_l->hash);
|
||||||
head = &b->head;
|
head = &b->head;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
flags = htab_lock_bucket(htab, b);
|
||||||
|
|
||||||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||||
if (l == tgt_l) {
|
if (l == tgt_l) {
|
||||||
@ -610,7 +700,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
|
|
||||||
return l == tgt_l;
|
return l == tgt_l;
|
||||||
}
|
}
|
||||||
@ -686,15 +776,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
|
|||||||
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
|
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
|
||||||
struct bpf_htab *htab = l->htab;
|
struct bpf_htab *htab = l->htab;
|
||||||
|
|
||||||
/* must increment bpf_prog_active to avoid kprobe+bpf triggering while
|
|
||||||
* we're calling kfree, otherwise deadlock is possible if kprobes
|
|
||||||
* are placed somewhere inside of slub
|
|
||||||
*/
|
|
||||||
preempt_disable();
|
|
||||||
__this_cpu_inc(bpf_prog_active);
|
|
||||||
htab_elem_free(htab, l);
|
htab_elem_free(htab, l);
|
||||||
__this_cpu_dec(bpf_prog_active);
|
|
||||||
preempt_enable();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||||
@ -884,8 +966,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bpf_map_update_elem() can be called in_irq() */
|
flags = htab_lock_bucket(htab, b);
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
|
||||||
|
|
||||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -926,7 +1007,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
err:
|
err:
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -964,8 +1045,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
|
memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
|
||||||
|
|
||||||
/* bpf_map_update_elem() can be called in_irq() */
|
flags = htab_lock_bucket(htab, b);
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
|
||||||
|
|
||||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -984,7 +1064,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||||
@ -1019,8 +1099,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|||||||
b = __select_bucket(htab, hash);
|
b = __select_bucket(htab, hash);
|
||||||
head = &b->head;
|
head = &b->head;
|
||||||
|
|
||||||
/* bpf_map_update_elem() can be called in_irq() */
|
flags = htab_lock_bucket(htab, b);
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
|
||||||
|
|
||||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -1043,7 +1122,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
err:
|
err:
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1083,8 +1162,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bpf_map_update_elem() can be called in_irq() */
|
flags = htab_lock_bucket(htab, b);
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
|
||||||
|
|
||||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -1106,7 +1184,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
|||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
err:
|
err:
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
if (l_new)
|
if (l_new)
|
||||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1144,7 +1222,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
|||||||
b = __select_bucket(htab, hash);
|
b = __select_bucket(htab, hash);
|
||||||
head = &b->head;
|
head = &b->head;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
flags = htab_lock_bucket(htab, b);
|
||||||
|
|
||||||
l = lookup_elem_raw(head, hash, key, key_size);
|
l = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -1154,7 +1232,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1176,7 +1254,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
|||||||
b = __select_bucket(htab, hash);
|
b = __select_bucket(htab, hash);
|
||||||
head = &b->head;
|
head = &b->head;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
flags = htab_lock_bucket(htab, b);
|
||||||
|
|
||||||
l = lookup_elem_raw(head, hash, key, key_size);
|
l = lookup_elem_raw(head, hash, key, key_size);
|
||||||
|
|
||||||
@ -1185,7 +1263,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
if (l)
|
if (l)
|
||||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1325,8 +1403,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
}
|
}
|
||||||
|
|
||||||
again:
|
again:
|
||||||
preempt_disable();
|
bpf_disable_instrumentation();
|
||||||
this_cpu_inc(bpf_prog_active);
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
again_nocopy:
|
again_nocopy:
|
||||||
dst_key = keys;
|
dst_key = keys;
|
||||||
@ -1335,7 +1412,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
head = &b->head;
|
head = &b->head;
|
||||||
/* do not grab the lock unless need it (bucket_cnt > 0). */
|
/* do not grab the lock unless need it (bucket_cnt > 0). */
|
||||||
if (locked)
|
if (locked)
|
||||||
raw_spin_lock_irqsave(&b->lock, flags);
|
flags = htab_lock_bucket(htab, b);
|
||||||
|
|
||||||
bucket_cnt = 0;
|
bucket_cnt = 0;
|
||||||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||||
@ -1352,10 +1429,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||||
* that the locked was grabbed, so release it.
|
* that the locked was grabbed, so release it.
|
||||||
*/
|
*/
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
goto after_loop;
|
goto after_loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1364,10 +1440,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||||
* that the locked was grabbed, so release it.
|
* that the locked was grabbed, so release it.
|
||||||
*/
|
*/
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
kvfree(keys);
|
kvfree(keys);
|
||||||
kvfree(values);
|
kvfree(values);
|
||||||
goto alloc;
|
goto alloc;
|
||||||
@ -1418,7 +1493,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
dst_val += value_size;
|
dst_val += value_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
htab_unlock_bucket(htab, b, flags);
|
||||||
locked = false;
|
locked = false;
|
||||||
|
|
||||||
while (node_to_free) {
|
while (node_to_free) {
|
||||||
@ -1437,8 +1512,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
|||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
|
if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
|
||||||
key_size * bucket_cnt) ||
|
key_size * bucket_cnt) ||
|
||||||
copy_to_user(uvalues + total * value_size, values,
|
copy_to_user(uvalues + total * value_size, values,
|
||||||
|
@ -25,7 +25,7 @@ struct lpm_trie_node {
|
|||||||
struct lpm_trie_node __rcu *child[2];
|
struct lpm_trie_node __rcu *child[2];
|
||||||
u32 prefixlen;
|
u32 prefixlen;
|
||||||
u32 flags;
|
u32 flags;
|
||||||
u8 data[0];
|
u8 data[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct lpm_trie {
|
struct lpm_trie {
|
||||||
@ -34,7 +34,7 @@ struct lpm_trie {
|
|||||||
size_t n_entries;
|
size_t n_entries;
|
||||||
size_t max_prefixlen;
|
size_t max_prefixlen;
|
||||||
size_t data_size;
|
size_t data_size;
|
||||||
raw_spinlock_t lock;
|
spinlock_t lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This trie implements a longest prefix match algorithm that can be used to
|
/* This trie implements a longest prefix match algorithm that can be used to
|
||||||
@ -315,7 +315,7 @@ static int trie_update_elem(struct bpf_map *map,
|
|||||||
if (key->prefixlen > trie->max_prefixlen)
|
if (key->prefixlen > trie->max_prefixlen)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&trie->lock, irq_flags);
|
spin_lock_irqsave(&trie->lock, irq_flags);
|
||||||
|
|
||||||
/* Allocate and fill a new node */
|
/* Allocate and fill a new node */
|
||||||
|
|
||||||
@ -422,7 +422,7 @@ static int trie_update_elem(struct bpf_map *map,
|
|||||||
kfree(im_node);
|
kfree(im_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
|
spin_unlock_irqrestore(&trie->lock, irq_flags);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -442,7 +442,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
|
|||||||
if (key->prefixlen > trie->max_prefixlen)
|
if (key->prefixlen > trie->max_prefixlen)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&trie->lock, irq_flags);
|
spin_lock_irqsave(&trie->lock, irq_flags);
|
||||||
|
|
||||||
/* Walk the tree looking for an exact key/length match and keeping
|
/* Walk the tree looking for an exact key/length match and keeping
|
||||||
* track of the path we traverse. We will need to know the node
|
* track of the path we traverse. We will need to know the node
|
||||||
@ -518,7 +518,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
|
|||||||
kfree_rcu(node, rcu);
|
kfree_rcu(node, rcu);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
|
spin_unlock_irqrestore(&trie->lock, irq_flags);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -575,7 +575,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
|
|
||||||
raw_spin_lock_init(&trie->lock);
|
spin_lock_init(&trie->lock);
|
||||||
|
|
||||||
return &trie->map;
|
return &trie->map;
|
||||||
out_err:
|
out_err:
|
||||||
|
@ -25,12 +25,18 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
|
|||||||
free_percpu(s->freelist);
|
free_percpu(s->freelist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head,
|
||||||
|
struct pcpu_freelist_node *node)
|
||||||
|
{
|
||||||
|
node->next = head->first;
|
||||||
|
head->first = node;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
|
static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
|
||||||
struct pcpu_freelist_node *node)
|
struct pcpu_freelist_node *node)
|
||||||
{
|
{
|
||||||
raw_spin_lock(&head->lock);
|
raw_spin_lock(&head->lock);
|
||||||
node->next = head->first;
|
pcpu_freelist_push_node(head, node);
|
||||||
head->first = node;
|
|
||||||
raw_spin_unlock(&head->lock);
|
raw_spin_unlock(&head->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,21 +62,16 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
|
|||||||
u32 nr_elems)
|
u32 nr_elems)
|
||||||
{
|
{
|
||||||
struct pcpu_freelist_head *head;
|
struct pcpu_freelist_head *head;
|
||||||
unsigned long flags;
|
|
||||||
int i, cpu, pcpu_entries;
|
int i, cpu, pcpu_entries;
|
||||||
|
|
||||||
pcpu_entries = nr_elems / num_possible_cpus() + 1;
|
pcpu_entries = nr_elems / num_possible_cpus() + 1;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
/* disable irq to workaround lockdep false positive
|
|
||||||
* in bpf usage pcpu_freelist_populate() will never race
|
|
||||||
* with pcpu_freelist_push()
|
|
||||||
*/
|
|
||||||
local_irq_save(flags);
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
again:
|
again:
|
||||||
head = per_cpu_ptr(s->freelist, cpu);
|
head = per_cpu_ptr(s->freelist, cpu);
|
||||||
___pcpu_freelist_push(head, buf);
|
/* No locking required as this is not visible yet. */
|
||||||
|
pcpu_freelist_push_node(head, buf);
|
||||||
i++;
|
i++;
|
||||||
buf += elem_size;
|
buf += elem_size;
|
||||||
if (i == nr_elems)
|
if (i == nr_elems)
|
||||||
@ -78,7 +79,6 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
|
|||||||
if (i % pcpu_entries)
|
if (i % pcpu_entries)
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
|
||||||
|
@ -40,6 +40,9 @@ static void do_up_read(struct irq_work *entry)
|
|||||||
{
|
{
|
||||||
struct stack_map_irq_work *work;
|
struct stack_map_irq_work *work;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
|
||||||
|
return;
|
||||||
|
|
||||||
work = container_of(entry, struct stack_map_irq_work, irq_work);
|
work = container_of(entry, struct stack_map_irq_work, irq_work);
|
||||||
up_read_non_owner(work->sem);
|
up_read_non_owner(work->sem);
|
||||||
work->sem = NULL;
|
work->sem = NULL;
|
||||||
@ -288,10 +291,19 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
|||||||
struct stack_map_irq_work *work = NULL;
|
struct stack_map_irq_work *work = NULL;
|
||||||
|
|
||||||
if (irqs_disabled()) {
|
if (irqs_disabled()) {
|
||||||
work = this_cpu_ptr(&up_read_work);
|
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||||
if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
|
work = this_cpu_ptr(&up_read_work);
|
||||||
/* cannot queue more up_read, fallback */
|
if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) {
|
||||||
|
/* cannot queue more up_read, fallback */
|
||||||
|
irq_work_busy = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* PREEMPT_RT does not allow to trylock mmap sem in
|
||||||
|
* interrupt disabled context. Force the fallback code.
|
||||||
|
*/
|
||||||
irq_work_busy = true;
|
irq_work_busy = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -171,11 +171,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
|
|||||||
flags);
|
flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
|
bpf_disable_instrumentation();
|
||||||
* inside bpf map update or delete otherwise deadlocks are possible
|
|
||||||
*/
|
|
||||||
preempt_disable();
|
|
||||||
__this_cpu_inc(bpf_prog_active);
|
|
||||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
||||||
err = bpf_percpu_hash_update(map, key, value, flags);
|
err = bpf_percpu_hash_update(map, key, value, flags);
|
||||||
@ -206,8 +202,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
|
|||||||
err = map->ops->map_update_elem(map, key, value, flags);
|
err = map->ops->map_update_elem(map, key, value, flags);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
__this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
maybe_wait_bpf_programs(map);
|
maybe_wait_bpf_programs(map);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
@ -222,8 +217,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
|
|||||||
if (bpf_map_is_dev_bound(map))
|
if (bpf_map_is_dev_bound(map))
|
||||||
return bpf_map_offload_lookup_elem(map, key, value);
|
return bpf_map_offload_lookup_elem(map, key, value);
|
||||||
|
|
||||||
preempt_disable();
|
bpf_disable_instrumentation();
|
||||||
this_cpu_inc(bpf_prog_active);
|
|
||||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
||||||
err = bpf_percpu_hash_copy(map, key, value);
|
err = bpf_percpu_hash_copy(map, key, value);
|
||||||
@ -268,8 +262,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
maybe_wait_bpf_programs(map);
|
maybe_wait_bpf_programs(map);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
@ -909,6 +902,21 @@ void bpf_map_inc_with_uref(struct bpf_map *map)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
|
EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
|
||||||
|
|
||||||
|
struct bpf_map *bpf_map_get(u32 ufd)
|
||||||
|
{
|
||||||
|
struct fd f = fdget(ufd);
|
||||||
|
struct bpf_map *map;
|
||||||
|
|
||||||
|
map = __bpf_map_get(f);
|
||||||
|
if (IS_ERR(map))
|
||||||
|
return map;
|
||||||
|
|
||||||
|
bpf_map_inc(map);
|
||||||
|
fdput(f);
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
||||||
{
|
{
|
||||||
struct fd f = fdget(ufd);
|
struct fd f = fdget(ufd);
|
||||||
@ -1136,13 +1144,11 @@ static int map_delete_elem(union bpf_attr *attr)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
bpf_disable_instrumentation();
|
||||||
__this_cpu_inc(bpf_prog_active);
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
err = map->ops->map_delete_elem(map, key);
|
err = map->ops->map_delete_elem(map, key);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
__this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
maybe_wait_bpf_programs(map);
|
maybe_wait_bpf_programs(map);
|
||||||
out:
|
out:
|
||||||
kfree(key);
|
kfree(key);
|
||||||
@ -1254,13 +1260,11 @@ int generic_map_delete_batch(struct bpf_map *map,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
bpf_disable_instrumentation();
|
||||||
__this_cpu_inc(bpf_prog_active);
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
err = map->ops->map_delete_elem(map, key);
|
err = map->ops->map_delete_elem(map, key);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
__this_cpu_dec(bpf_prog_active);
|
bpf_enable_instrumentation();
|
||||||
preempt_enable();
|
|
||||||
maybe_wait_bpf_programs(map);
|
maybe_wait_bpf_programs(map);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
|
@ -367,8 +367,9 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
|
|||||||
mutex_unlock(&trampoline_mutex);
|
mutex_unlock(&trampoline_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
|
/* The logic is similar to BPF_PROG_RUN, but with an explicit
|
||||||
* are needed for trampoline. The macro is split into
|
* rcu_read_lock() and migrate_disable() which are required
|
||||||
|
* for the trampoline. The macro is split into
|
||||||
* call _bpf_prog_enter
|
* call _bpf_prog_enter
|
||||||
* call prog->bpf_func
|
* call prog->bpf_func
|
||||||
* call __bpf_prog_exit
|
* call __bpf_prog_exit
|
||||||
@ -378,7 +379,7 @@ u64 notrace __bpf_prog_enter(void)
|
|||||||
u64 start = 0;
|
u64 start = 0;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
preempt_disable();
|
migrate_disable();
|
||||||
if (static_branch_unlikely(&bpf_stats_enabled_key))
|
if (static_branch_unlikely(&bpf_stats_enabled_key))
|
||||||
start = sched_clock();
|
start = sched_clock();
|
||||||
return start;
|
return start;
|
||||||
@ -401,7 +402,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
|||||||
stats->nsecs += sched_clock() - start;
|
stats->nsecs += sched_clock() - start;
|
||||||
u64_stats_update_end(&stats->syncp);
|
u64_stats_update_end(&stats->syncp);
|
||||||
}
|
}
|
||||||
preempt_enable();
|
migrate_enable();
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8143,26 +8143,48 @@ static bool is_tracing_prog_type(enum bpf_prog_type type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_preallocated_map(struct bpf_map *map)
|
||||||
|
{
|
||||||
|
if (!check_map_prealloc(map))
|
||||||
|
return false;
|
||||||
|
if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
||||||
struct bpf_map *map,
|
struct bpf_map *map,
|
||||||
struct bpf_prog *prog)
|
struct bpf_prog *prog)
|
||||||
|
|
||||||
{
|
{
|
||||||
/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
|
/*
|
||||||
* preallocated hash maps, since doing memory allocation
|
* Validate that trace type programs use preallocated hash maps.
|
||||||
* in overflow_handler can crash depending on where nmi got
|
*
|
||||||
* triggered.
|
* For programs attached to PERF events this is mandatory as the
|
||||||
|
* perf NMI can hit any arbitrary code sequence.
|
||||||
|
*
|
||||||
|
* All other trace types using preallocated hash maps are unsafe as
|
||||||
|
* well because tracepoint or kprobes can be inside locked regions
|
||||||
|
* of the memory allocator or at a place where a recursion into the
|
||||||
|
* memory allocator would see inconsistent state.
|
||||||
|
*
|
||||||
|
* On RT enabled kernels run-time allocation of all trace type
|
||||||
|
* programs is strictly prohibited due to lock type constraints. On
|
||||||
|
* !RT kernels it is allowed for backwards compatibility reasons for
|
||||||
|
* now, but warnings are emitted so developers are made aware of
|
||||||
|
* the unsafety and can fix their programs before this is enforced.
|
||||||
*/
|
*/
|
||||||
if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
|
if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) {
|
||||||
if (!check_map_prealloc(map)) {
|
if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
|
||||||
verbose(env, "perf_event programs can only use preallocated hash map\n");
|
verbose(env, "perf_event programs can only use preallocated hash map\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (map->inner_map_meta &&
|
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||||
!check_map_prealloc(map->inner_map_meta)) {
|
verbose(env, "trace type programs can only use preallocated hash map\n");
|
||||||
verbose(env, "perf_event programs can only use preallocated inner hash map\n");
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
|
||||||
|
verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((is_tracing_prog_type(prog->type) ||
|
if ((is_tracing_prog_type(prog->type) ||
|
||||||
|
@ -9206,7 +9206,6 @@ static void bpf_overflow_handler(struct perf_event *event,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
|
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
|
||||||
preempt_disable();
|
|
||||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
|
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
|
||||||
goto out;
|
goto out;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
@ -9214,7 +9213,6 @@ static void bpf_overflow_handler(struct perf_event *event,
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
out:
|
out:
|
||||||
__this_cpu_dec(bpf_prog_active);
|
__this_cpu_dec(bpf_prog_active);
|
||||||
preempt_enable();
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -268,16 +268,14 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
|
|||||||
* All filters in the list are evaluated and the lowest BPF return
|
* All filters in the list are evaluated and the lowest BPF return
|
||||||
* value always takes priority (ignoring the DATA).
|
* value always takes priority (ignoring the DATA).
|
||||||
*/
|
*/
|
||||||
preempt_disable();
|
|
||||||
for (; f; f = f->prev) {
|
for (; f; f = f->prev) {
|
||||||
u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
|
u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
|
||||||
|
|
||||||
if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
|
if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
|
||||||
ret = cur_ret;
|
ret = cur_ret;
|
||||||
*match = f;
|
*match = f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
preempt_enable();
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SECCOMP_FILTER */
|
#endif /* CONFIG_SECCOMP_FILTER */
|
||||||
|
@ -83,7 +83,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
|||||||
if (in_nmi()) /* not supported yet */
|
if (in_nmi()) /* not supported yet */
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
preempt_disable();
|
cant_sleep();
|
||||||
|
|
||||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
|
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
|
||||||
/*
|
/*
|
||||||
@ -115,11 +115,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
__this_cpu_dec(bpf_prog_active);
|
__this_cpu_dec(bpf_prog_active);
|
||||||
preempt_enable();
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(trace_call_bpf);
|
|
||||||
|
|
||||||
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
|
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
|
||||||
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
|
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
|
||||||
@ -1516,10 +1514,9 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
|
|||||||
static __always_inline
|
static __always_inline
|
||||||
void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
|
void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
|
||||||
{
|
{
|
||||||
|
cant_sleep();
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
preempt_disable();
|
|
||||||
(void) BPF_PROG_RUN(prog, args);
|
(void) BPF_PROG_RUN(prog, args);
|
||||||
preempt_enable();
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1333,8 +1333,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
|
|||||||
int size, esize;
|
int size, esize;
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
|
if (bpf_prog_array_valid(call)) {
|
||||||
return;
|
u32 ret;
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
ret = trace_call_bpf(call, regs);
|
||||||
|
preempt_enable();
|
||||||
|
if (!ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||||
|
|
||||||
|
@ -6660,14 +6660,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
|
|||||||
u64 start, finish;
|
u64 start, finish;
|
||||||
int ret = 0, i;
|
int ret = 0, i;
|
||||||
|
|
||||||
preempt_disable();
|
migrate_disable();
|
||||||
start = ktime_get_ns();
|
start = ktime_get_ns();
|
||||||
|
|
||||||
for (i = 0; i < runs; i++)
|
for (i = 0; i < runs; i++)
|
||||||
ret = BPF_PROG_RUN(fp, data);
|
ret = BPF_PROG_RUN(fp, data);
|
||||||
|
|
||||||
finish = ktime_get_ns();
|
finish = ktime_get_ns();
|
||||||
preempt_enable();
|
migrate_enable();
|
||||||
|
|
||||||
*duration = finish - start;
|
*duration = finish - start;
|
||||||
do_div(*duration, runs);
|
do_div(*duration, runs);
|
||||||
|
@ -37,7 +37,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
|||||||
repeat = 1;
|
repeat = 1;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
preempt_disable();
|
migrate_disable();
|
||||||
time_start = ktime_get_ns();
|
time_start = ktime_get_ns();
|
||||||
for (i = 0; i < repeat; i++) {
|
for (i = 0; i < repeat; i++) {
|
||||||
bpf_cgroup_storage_set(storage);
|
bpf_cgroup_storage_set(storage);
|
||||||
@ -54,18 +54,18 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
|||||||
|
|
||||||
if (need_resched()) {
|
if (need_resched()) {
|
||||||
time_spent += ktime_get_ns() - time_start;
|
time_spent += ktime_get_ns() - time_start;
|
||||||
preempt_enable();
|
migrate_enable();
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
preempt_disable();
|
migrate_disable();
|
||||||
time_start = ktime_get_ns();
|
time_start = ktime_get_ns();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
time_spent += ktime_get_ns() - time_start;
|
time_spent += ktime_get_ns() - time_start;
|
||||||
preempt_enable();
|
migrate_enable();
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
do_div(time_spent, repeat);
|
do_div(time_spent, repeat);
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <net/bpf_sk_storage.h>
|
#include <net/bpf_sk_storage.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
|
#include <uapi/linux/sock_diag.h>
|
||||||
#include <uapi/linux/btf.h>
|
#include <uapi/linux/btf.h>
|
||||||
|
|
||||||
static atomic_t cache_idx;
|
static atomic_t cache_idx;
|
||||||
@ -606,6 +607,14 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
|
|||||||
kfree(map);
|
kfree(map);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* U16_MAX is much more than enough for sk local storage
|
||||||
|
* considering a tcp_sock is ~2k.
|
||||||
|
*/
|
||||||
|
#define MAX_VALUE_SIZE \
|
||||||
|
min_t(u32, \
|
||||||
|
(KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \
|
||||||
|
(U16_MAX - sizeof(struct bpf_sk_storage_elem)))
|
||||||
|
|
||||||
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
|
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
|
||||||
{
|
{
|
||||||
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
|
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
|
||||||
@ -619,12 +628,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
|
|||||||
if (!capable(CAP_SYS_ADMIN))
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
if (attr->value_size >= KMALLOC_MAX_SIZE -
|
if (attr->value_size > MAX_VALUE_SIZE)
|
||||||
MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) ||
|
|
||||||
/* U16_MAX is much more than enough for sk local storage
|
|
||||||
* considering a tcp_sock is ~2k.
|
|
||||||
*/
|
|
||||||
attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem))
|
|
||||||
return -E2BIG;
|
return -E2BIG;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -910,3 +914,270 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
|
|||||||
.arg1_type = ARG_CONST_MAP_PTR,
|
.arg1_type = ARG_CONST_MAP_PTR,
|
||||||
.arg2_type = ARG_PTR_TO_SOCKET,
|
.arg2_type = ARG_PTR_TO_SOCKET,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_sk_storage_diag {
|
||||||
|
u32 nr_maps;
|
||||||
|
struct bpf_map *maps[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The reply will be like:
|
||||||
|
* INET_DIAG_BPF_SK_STORAGES (nla_nest)
|
||||||
|
* SK_DIAG_BPF_STORAGE (nla_nest)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
|
||||||
|
* SK_DIAG_BPF_STORAGE (nla_nest)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
|
||||||
|
* ....
|
||||||
|
*/
|
||||||
|
static int nla_value_size(u32 value_size)
|
||||||
|
{
|
||||||
|
/* SK_DIAG_BPF_STORAGE (nla_nest)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
|
||||||
|
* SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
|
||||||
|
*/
|
||||||
|
return nla_total_size(0) + nla_total_size(sizeof(u32)) +
|
||||||
|
nla_total_size_64bit(value_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
|
||||||
|
if (!diag)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < diag->nr_maps; i++)
|
||||||
|
bpf_map_put(diag->maps[i]);
|
||||||
|
|
||||||
|
kfree(diag);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
|
||||||
|
|
||||||
|
static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
|
||||||
|
const struct bpf_map *map)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
|
||||||
|
for (i = 0; i < diag->nr_maps; i++) {
|
||||||
|
if (diag->maps[i] == map)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct bpf_sk_storage_diag *
|
||||||
|
bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
|
||||||
|
{
|
||||||
|
struct bpf_sk_storage_diag *diag;
|
||||||
|
struct nlattr *nla;
|
||||||
|
u32 nr_maps = 0;
|
||||||
|
int rem, err;
|
||||||
|
|
||||||
|
/* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
|
||||||
|
* the map_alloc_check() side also does.
|
||||||
|
*/
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
nla_for_each_nested(nla, nla_stgs, rem) {
|
||||||
|
if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
|
||||||
|
nr_maps++;
|
||||||
|
}
|
||||||
|
|
||||||
|
diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!diag)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
nla_for_each_nested(nla, nla_stgs, rem) {
|
||||||
|
struct bpf_map *map;
|
||||||
|
int map_fd;
|
||||||
|
|
||||||
|
if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
map_fd = nla_get_u32(nla);
|
||||||
|
map = bpf_map_get(map_fd);
|
||||||
|
if (IS_ERR(map)) {
|
||||||
|
err = PTR_ERR(map);
|
||||||
|
goto err_free;
|
||||||
|
}
|
||||||
|
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
|
||||||
|
bpf_map_put(map);
|
||||||
|
err = -EINVAL;
|
||||||
|
goto err_free;
|
||||||
|
}
|
||||||
|
if (diag_check_dup(diag, map)) {
|
||||||
|
bpf_map_put(map);
|
||||||
|
err = -EEXIST;
|
||||||
|
goto err_free;
|
||||||
|
}
|
||||||
|
diag->maps[diag->nr_maps++] = map;
|
||||||
|
}
|
||||||
|
|
||||||
|
return diag;
|
||||||
|
|
||||||
|
err_free:
|
||||||
|
bpf_sk_storage_diag_free(diag);
|
||||||
|
return ERR_PTR(err);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
|
||||||
|
|
||||||
|
static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct nlattr *nla_stg, *nla_value;
|
||||||
|
struct bpf_sk_storage_map *smap;
|
||||||
|
|
||||||
|
/* It cannot exceed max nlattr's payload */
|
||||||
|
BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE);
|
||||||
|
|
||||||
|
nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
|
||||||
|
if (!nla_stg)
|
||||||
|
return -EMSGSIZE;
|
||||||
|
|
||||||
|
smap = rcu_dereference(sdata->smap);
|
||||||
|
if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
|
||||||
|
goto errout;
|
||||||
|
|
||||||
|
nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
|
||||||
|
smap->map.value_size,
|
||||||
|
SK_DIAG_BPF_STORAGE_PAD);
|
||||||
|
if (!nla_value)
|
||||||
|
goto errout;
|
||||||
|
|
||||||
|
if (map_value_has_spin_lock(&smap->map))
|
||||||
|
copy_map_value_locked(&smap->map, nla_data(nla_value),
|
||||||
|
sdata->data, true);
|
||||||
|
else
|
||||||
|
copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
|
||||||
|
|
||||||
|
nla_nest_end(skb, nla_stg);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
errout:
|
||||||
|
nla_nest_cancel(skb, nla_stg);
|
||||||
|
return -EMSGSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
|
||||||
|
int stg_array_type,
|
||||||
|
unsigned int *res_diag_size)
|
||||||
|
{
|
||||||
|
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
|
||||||
|
unsigned int diag_size = nla_total_size(0);
|
||||||
|
struct bpf_sk_storage *sk_storage;
|
||||||
|
struct bpf_sk_storage_elem *selem;
|
||||||
|
struct bpf_sk_storage_map *smap;
|
||||||
|
struct nlattr *nla_stgs;
|
||||||
|
unsigned int saved_len;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||||
|
if (!sk_storage || hlist_empty(&sk_storage->list)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nla_stgs = nla_nest_start(skb, stg_array_type);
|
||||||
|
if (!nla_stgs)
|
||||||
|
/* Continue to learn diag_size */
|
||||||
|
err = -EMSGSIZE;
|
||||||
|
|
||||||
|
saved_len = skb->len;
|
||||||
|
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
|
||||||
|
smap = rcu_dereference(SDATA(selem)->smap);
|
||||||
|
diag_size += nla_value_size(smap->map.value_size);
|
||||||
|
|
||||||
|
if (nla_stgs && diag_get(SDATA(selem), skb))
|
||||||
|
/* Continue to learn diag_size */
|
||||||
|
err = -EMSGSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (nla_stgs) {
|
||||||
|
if (saved_len == skb->len)
|
||||||
|
nla_nest_cancel(skb, nla_stgs);
|
||||||
|
else
|
||||||
|
nla_nest_end(skb, nla_stgs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diag_size == nla_total_size(0)) {
|
||||||
|
*res_diag_size = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*res_diag_size = diag_size;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
|
||||||
|
struct sock *sk, struct sk_buff *skb,
|
||||||
|
int stg_array_type,
|
||||||
|
unsigned int *res_diag_size)
|
||||||
|
{
|
||||||
|
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
|
||||||
|
unsigned int diag_size = nla_total_size(0);
|
||||||
|
struct bpf_sk_storage *sk_storage;
|
||||||
|
struct bpf_sk_storage_data *sdata;
|
||||||
|
struct nlattr *nla_stgs;
|
||||||
|
unsigned int saved_len;
|
||||||
|
int err = 0;
|
||||||
|
u32 i;
|
||||||
|
|
||||||
|
*res_diag_size = 0;
|
||||||
|
|
||||||
|
/* No map has been specified. Dump all. */
|
||||||
|
if (!diag->nr_maps)
|
||||||
|
return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
|
||||||
|
res_diag_size);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||||
|
if (!sk_storage || hlist_empty(&sk_storage->list)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nla_stgs = nla_nest_start(skb, stg_array_type);
|
||||||
|
if (!nla_stgs)
|
||||||
|
/* Continue to learn diag_size */
|
||||||
|
err = -EMSGSIZE;
|
||||||
|
|
||||||
|
saved_len = skb->len;
|
||||||
|
for (i = 0; i < diag->nr_maps; i++) {
|
||||||
|
sdata = __sk_storage_lookup(sk_storage,
|
||||||
|
(struct bpf_sk_storage_map *)diag->maps[i],
|
||||||
|
false);
|
||||||
|
|
||||||
|
if (!sdata)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
diag_size += nla_value_size(diag->maps[i]->value_size);
|
||||||
|
|
||||||
|
if (nla_stgs && diag_get(sdata, skb))
|
||||||
|
/* Continue to learn diag_size */
|
||||||
|
err = -EMSGSIZE;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (nla_stgs) {
|
||||||
|
if (saved_len == skb->len)
|
||||||
|
nla_nest_cancel(skb, nla_stgs);
|
||||||
|
else
|
||||||
|
nla_nest_end(skb, nla_stgs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diag_size == nla_total_size(0)) {
|
||||||
|
*res_diag_size = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*res_diag_size = diag_size;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
|
||||||
|
@ -920,9 +920,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
|
|||||||
(int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
|
(int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
|
||||||
flow_keys->flags = flags;
|
flow_keys->flags = flags;
|
||||||
|
|
||||||
preempt_disable();
|
result = bpf_prog_run_pin_on_cpu(prog, ctx);
|
||||||
result = BPF_PROG_RUN(prog, ctx);
|
|
||||||
preempt_enable();
|
|
||||||
|
|
||||||
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
|
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
|
||||||
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
|
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
|
||||||
|
@ -628,7 +628,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
|||||||
struct bpf_prog *prog;
|
struct bpf_prog *prog;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
prog = READ_ONCE(psock->progs.msg_parser);
|
prog = READ_ONCE(psock->progs.msg_parser);
|
||||||
if (unlikely(!prog)) {
|
if (unlikely(!prog)) {
|
||||||
@ -638,7 +637,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
|||||||
|
|
||||||
sk_msg_compute_data_pointers(msg);
|
sk_msg_compute_data_pointers(msg);
|
||||||
msg->sk = sk;
|
msg->sk = sk;
|
||||||
ret = BPF_PROG_RUN(prog, msg);
|
ret = bpf_prog_run_pin_on_cpu(prog, msg);
|
||||||
ret = sk_psock_map_verd(ret, msg->sk_redir);
|
ret = sk_psock_map_verd(ret, msg->sk_redir);
|
||||||
psock->apply_bytes = msg->apply_bytes;
|
psock->apply_bytes = msg->apply_bytes;
|
||||||
if (ret == __SK_REDIRECT) {
|
if (ret == __SK_REDIRECT) {
|
||||||
@ -653,7 +652,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
|||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
preempt_enable();
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
||||||
@ -665,9 +663,7 @@ static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
|||||||
|
|
||||||
skb->sk = psock->sk;
|
skb->sk = psock->sk;
|
||||||
bpf_compute_data_end_sk_skb(skb);
|
bpf_compute_data_end_sk_skb(skb);
|
||||||
preempt_disable();
|
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||||
ret = BPF_PROG_RUN(prog, skb);
|
|
||||||
preempt_enable();
|
|
||||||
/* strparser clones the skb before handing it to a upper layer,
|
/* strparser clones the skb before handing it to a upper layer,
|
||||||
* meaning skb_orphan has been called. We NULL sk on the way out
|
* meaning skb_orphan has been called. We NULL sk on the way out
|
||||||
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
||||||
|
@ -46,16 +46,15 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
|
inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dccp_diag_dump_one(struct sk_buff *in_skb,
|
static int dccp_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *nlh,
|
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
|
return inet_diag_dump_one_icsk(&dccp_hashinfo, cb, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct inet_diag_handler dccp_diag_handler = {
|
static const struct inet_diag_handler dccp_diag_handler = {
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <net/inet_hashtables.h>
|
#include <net/inet_hashtables.h>
|
||||||
#include <net/inet_timewait_sock.h>
|
#include <net/inet_timewait_sock.h>
|
||||||
#include <net/inet6_hashtables.h>
|
#include <net/inet6_hashtables.h>
|
||||||
|
#include <net/bpf_sk_storage.h>
|
||||||
#include <net/netlink.h>
|
#include <net/netlink.h>
|
||||||
|
|
||||||
#include <linux/inet.h>
|
#include <linux/inet.h>
|
||||||
@ -156,26 +157,28 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
|
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
|
||||||
|
|
||||||
|
#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
|
||||||
|
|
||||||
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
||||||
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
|
struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
struct user_namespace *user_ns,
|
const struct inet_diag_req_v2 *req,
|
||||||
u32 portid, u32 seq, u16 nlmsg_flags,
|
u16 nlmsg_flags, bool net_admin)
|
||||||
const struct nlmsghdr *unlh,
|
|
||||||
bool net_admin)
|
|
||||||
{
|
{
|
||||||
const struct tcp_congestion_ops *ca_ops;
|
const struct tcp_congestion_ops *ca_ops;
|
||||||
const struct inet_diag_handler *handler;
|
const struct inet_diag_handler *handler;
|
||||||
|
struct inet_diag_dump_data *cb_data;
|
||||||
int ext = req->idiag_ext;
|
int ext = req->idiag_ext;
|
||||||
struct inet_diag_msg *r;
|
struct inet_diag_msg *r;
|
||||||
struct nlmsghdr *nlh;
|
struct nlmsghdr *nlh;
|
||||||
struct nlattr *attr;
|
struct nlattr *attr;
|
||||||
void *info = NULL;
|
void *info = NULL;
|
||||||
|
|
||||||
|
cb_data = cb->data;
|
||||||
handler = inet_diag_table[req->sdiag_protocol];
|
handler = inet_diag_table[req->sdiag_protocol];
|
||||||
BUG_ON(!handler);
|
BUG_ON(!handler);
|
||||||
|
|
||||||
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
||||||
nlmsg_flags);
|
cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
|
||||||
if (!nlh)
|
if (!nlh)
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
@ -187,7 +190,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
|||||||
r->idiag_timer = 0;
|
r->idiag_timer = 0;
|
||||||
r->idiag_retrans = 0;
|
r->idiag_retrans = 0;
|
||||||
|
|
||||||
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
|
if (inet_diag_msg_attrs_fill(sk, skb, r, ext,
|
||||||
|
sk_user_ns(NETLINK_CB(cb->skb).sk),
|
||||||
|
net_admin))
|
||||||
goto errout;
|
goto errout;
|
||||||
|
|
||||||
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
|
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
|
||||||
@ -302,6 +307,48 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
|||||||
goto errout;
|
goto errout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Keep it at the end for potential retry with a larger skb,
|
||||||
|
* or else do best-effort fitting, which is only done for the
|
||||||
|
* first_nlmsg.
|
||||||
|
*/
|
||||||
|
if (cb_data->bpf_stg_diag) {
|
||||||
|
bool first_nlmsg = ((unsigned char *)nlh == skb->data);
|
||||||
|
unsigned int prev_min_dump_alloc;
|
||||||
|
unsigned int total_nla_size = 0;
|
||||||
|
unsigned int msg_len;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
|
||||||
|
err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb,
|
||||||
|
INET_DIAG_SK_BPF_STORAGES,
|
||||||
|
&total_nla_size);
|
||||||
|
|
||||||
|
if (!err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
total_nla_size += msg_len;
|
||||||
|
prev_min_dump_alloc = cb->min_dump_alloc;
|
||||||
|
if (total_nla_size > prev_min_dump_alloc)
|
||||||
|
cb->min_dump_alloc = min_t(u32, total_nla_size,
|
||||||
|
MAX_DUMP_ALLOC_SIZE);
|
||||||
|
|
||||||
|
if (!first_nlmsg)
|
||||||
|
goto errout;
|
||||||
|
|
||||||
|
if (cb->min_dump_alloc > prev_min_dump_alloc)
|
||||||
|
/* Retry with pskb_expand_head() with
|
||||||
|
* __GFP_DIRECT_RECLAIM
|
||||||
|
*/
|
||||||
|
goto errout;
|
||||||
|
|
||||||
|
WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc);
|
||||||
|
|
||||||
|
/* Send what we have for this sk
|
||||||
|
* and move on to the next sk in the following
|
||||||
|
* dump()
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
nlmsg_end(skb, nlh);
|
nlmsg_end(skb, nlh);
|
||||||
return 0;
|
return 0;
|
||||||
@ -312,30 +359,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
|
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
|
||||||
|
|
||||||
static int inet_csk_diag_fill(struct sock *sk,
|
|
||||||
struct sk_buff *skb,
|
|
||||||
const struct inet_diag_req_v2 *req,
|
|
||||||
struct user_namespace *user_ns,
|
|
||||||
u32 portid, u32 seq, u16 nlmsg_flags,
|
|
||||||
const struct nlmsghdr *unlh,
|
|
||||||
bool net_admin)
|
|
||||||
{
|
|
||||||
return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
|
|
||||||
portid, seq, nlmsg_flags, unlh, net_admin);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int inet_twsk_diag_fill(struct sock *sk,
|
static int inet_twsk_diag_fill(struct sock *sk,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
u32 portid, u32 seq, u16 nlmsg_flags,
|
struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *unlh)
|
u16 nlmsg_flags)
|
||||||
{
|
{
|
||||||
struct inet_timewait_sock *tw = inet_twsk(sk);
|
struct inet_timewait_sock *tw = inet_twsk(sk);
|
||||||
struct inet_diag_msg *r;
|
struct inet_diag_msg *r;
|
||||||
struct nlmsghdr *nlh;
|
struct nlmsghdr *nlh;
|
||||||
long tmo;
|
long tmo;
|
||||||
|
|
||||||
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
|
||||||
nlmsg_flags);
|
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
|
||||||
|
sizeof(*r), nlmsg_flags);
|
||||||
if (!nlh)
|
if (!nlh)
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
@ -359,16 +395,16 @@ static int inet_twsk_diag_fill(struct sock *sk,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
|
static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
|
||||||
u32 portid, u32 seq, u16 nlmsg_flags,
|
struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *unlh, bool net_admin)
|
u16 nlmsg_flags, bool net_admin)
|
||||||
{
|
{
|
||||||
struct request_sock *reqsk = inet_reqsk(sk);
|
struct request_sock *reqsk = inet_reqsk(sk);
|
||||||
struct inet_diag_msg *r;
|
struct inet_diag_msg *r;
|
||||||
struct nlmsghdr *nlh;
|
struct nlmsghdr *nlh;
|
||||||
long tmo;
|
long tmo;
|
||||||
|
|
||||||
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
||||||
nlmsg_flags);
|
cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
|
||||||
if (!nlh)
|
if (!nlh)
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
@ -397,21 +433,18 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
|
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
|
||||||
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r,
|
const struct inet_diag_req_v2 *r,
|
||||||
struct user_namespace *user_ns,
|
u16 nlmsg_flags, bool net_admin)
|
||||||
u32 portid, u32 seq, u16 nlmsg_flags,
|
|
||||||
const struct nlmsghdr *unlh, bool net_admin)
|
|
||||||
{
|
{
|
||||||
if (sk->sk_state == TCP_TIME_WAIT)
|
if (sk->sk_state == TCP_TIME_WAIT)
|
||||||
return inet_twsk_diag_fill(sk, skb, portid, seq,
|
return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags);
|
||||||
nlmsg_flags, unlh);
|
|
||||||
|
|
||||||
if (sk->sk_state == TCP_NEW_SYN_RECV)
|
if (sk->sk_state == TCP_NEW_SYN_RECV)
|
||||||
return inet_req_diag_fill(sk, skb, portid, seq,
|
return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
|
||||||
nlmsg_flags, unlh, net_admin);
|
|
||||||
|
|
||||||
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
|
return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
|
||||||
nlmsg_flags, unlh, net_admin);
|
net_admin);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct sock *inet_diag_find_one_icsk(struct net *net,
|
struct sock *inet_diag_find_one_icsk(struct net *net,
|
||||||
@ -459,10 +492,10 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
|
|||||||
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
|
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
|
||||||
|
|
||||||
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
|
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
|
||||||
struct sk_buff *in_skb,
|
struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *nlh,
|
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
|
struct sk_buff *in_skb = cb->skb;
|
||||||
bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
|
bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
|
||||||
struct net *net = sock_net(in_skb->sk);
|
struct net *net = sock_net(in_skb->sk);
|
||||||
struct sk_buff *rep;
|
struct sk_buff *rep;
|
||||||
@ -479,10 +512,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = sk_diag_fill(sk, rep, req,
|
err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
|
||||||
sk_user_ns(NETLINK_CB(in_skb).sk),
|
|
||||||
NETLINK_CB(in_skb).portid,
|
|
||||||
nlh->nlmsg_seq, 0, nlh, net_admin);
|
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
WARN_ON(err == -EMSGSIZE);
|
WARN_ON(err == -EMSGSIZE);
|
||||||
nlmsg_free(rep);
|
nlmsg_free(rep);
|
||||||
@ -509,14 +539,21 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
handler = inet_diag_lock_handler(req->sdiag_protocol);
|
handler = inet_diag_lock_handler(req->sdiag_protocol);
|
||||||
if (IS_ERR(handler))
|
if (IS_ERR(handler)) {
|
||||||
err = PTR_ERR(handler);
|
err = PTR_ERR(handler);
|
||||||
else if (cmd == SOCK_DIAG_BY_FAMILY)
|
} else if (cmd == SOCK_DIAG_BY_FAMILY) {
|
||||||
err = handler->dump_one(in_skb, nlh, req);
|
struct inet_diag_dump_data empty_dump_data = {};
|
||||||
else if (cmd == SOCK_DESTROY && handler->destroy)
|
struct netlink_callback cb = {
|
||||||
|
.nlh = nlh,
|
||||||
|
.skb = in_skb,
|
||||||
|
.data = &empty_dump_data,
|
||||||
|
};
|
||||||
|
err = handler->dump_one(&cb, req);
|
||||||
|
} else if (cmd == SOCK_DESTROY && handler->destroy) {
|
||||||
err = handler->destroy(in_skb, req);
|
err = handler->destroy(in_skb, req);
|
||||||
else
|
} else {
|
||||||
err = -EOPNOTSUPP;
|
err = -EOPNOTSUPP;
|
||||||
|
}
|
||||||
inet_diag_unlock_handler(handler);
|
inet_diag_unlock_handler(handler);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
@ -847,23 +884,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
|
|||||||
return len == 0 ? 0 : -EINVAL;
|
return len == 0 ? 0 : -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int inet_csk_diag_dump(struct sock *sk,
|
|
||||||
struct sk_buff *skb,
|
|
||||||
struct netlink_callback *cb,
|
|
||||||
const struct inet_diag_req_v2 *r,
|
|
||||||
const struct nlattr *bc,
|
|
||||||
bool net_admin)
|
|
||||||
{
|
|
||||||
if (!inet_diag_bc_sk(bc, sk))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return inet_csk_diag_fill(sk, skb, r,
|
|
||||||
sk_user_ns(NETLINK_CB(cb->skb).sk),
|
|
||||||
NETLINK_CB(cb->skb).portid,
|
|
||||||
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
|
|
||||||
net_admin);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void twsk_build_assert(void)
|
static void twsk_build_assert(void)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
|
BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
|
||||||
@ -892,14 +912,17 @@ static void twsk_build_assert(void)
|
|||||||
|
|
||||||
void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
||||||
struct netlink_callback *cb,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||||
|
struct inet_diag_dump_data *cb_data = cb->data;
|
||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
u32 idiag_states = r->idiag_states;
|
u32 idiag_states = r->idiag_states;
|
||||||
int i, num, s_i, s_num;
|
int i, num, s_i, s_num;
|
||||||
|
struct nlattr *bc;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
|
||||||
|
bc = cb_data->inet_diag_nla_bc;
|
||||||
if (idiag_states & TCPF_SYN_RECV)
|
if (idiag_states & TCPF_SYN_RECV)
|
||||||
idiag_states |= TCPF_NEW_SYN_RECV;
|
idiag_states |= TCPF_NEW_SYN_RECV;
|
||||||
s_i = cb->args[1];
|
s_i = cb->args[1];
|
||||||
@ -935,8 +958,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
|||||||
r->id.idiag_sport)
|
r->id.idiag_sport)
|
||||||
goto next_listen;
|
goto next_listen;
|
||||||
|
|
||||||
if (inet_csk_diag_dump(sk, skb, cb, r,
|
if (!inet_diag_bc_sk(bc, sk))
|
||||||
bc, net_admin) < 0) {
|
goto next_listen;
|
||||||
|
|
||||||
|
if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
|
||||||
|
cb, r, NLM_F_MULTI,
|
||||||
|
net_admin) < 0) {
|
||||||
spin_unlock(&ilb->lock);
|
spin_unlock(&ilb->lock);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -1014,11 +1041,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
|||||||
res = 0;
|
res = 0;
|
||||||
for (idx = 0; idx < accum; idx++) {
|
for (idx = 0; idx < accum; idx++) {
|
||||||
if (res >= 0) {
|
if (res >= 0) {
|
||||||
res = sk_diag_fill(sk_arr[idx], skb, r,
|
res = sk_diag_fill(sk_arr[idx], skb, cb, r,
|
||||||
sk_user_ns(NETLINK_CB(cb->skb).sk),
|
NLM_F_MULTI, net_admin);
|
||||||
NETLINK_CB(cb->skb).portid,
|
|
||||||
cb->nlh->nlmsg_seq, NLM_F_MULTI,
|
|
||||||
cb->nlh, net_admin);
|
|
||||||
if (res < 0)
|
if (res < 0)
|
||||||
num = num_arr[idx];
|
num = num_arr[idx];
|
||||||
}
|
}
|
||||||
@ -1042,31 +1066,101 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
|||||||
EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
|
EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
|
||||||
|
|
||||||
static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r,
|
const struct inet_diag_req_v2 *r)
|
||||||
struct nlattr *bc)
|
|
||||||
{
|
{
|
||||||
const struct inet_diag_handler *handler;
|
const struct inet_diag_handler *handler;
|
||||||
|
u32 prev_min_dump_alloc;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
|
again:
|
||||||
|
prev_min_dump_alloc = cb->min_dump_alloc;
|
||||||
handler = inet_diag_lock_handler(r->sdiag_protocol);
|
handler = inet_diag_lock_handler(r->sdiag_protocol);
|
||||||
if (!IS_ERR(handler))
|
if (!IS_ERR(handler))
|
||||||
handler->dump(skb, cb, r, bc);
|
handler->dump(skb, cb, r);
|
||||||
else
|
else
|
||||||
err = PTR_ERR(handler);
|
err = PTR_ERR(handler);
|
||||||
inet_diag_unlock_handler(handler);
|
inet_diag_unlock_handler(handler);
|
||||||
|
|
||||||
|
/* The skb is not large enough to fit one sk info and
|
||||||
|
* inet_sk_diag_fill() has requested for a larger skb.
|
||||||
|
*/
|
||||||
|
if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) {
|
||||||
|
err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL);
|
||||||
|
if (!err)
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
return err ? : skb->len;
|
return err ? : skb->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
||||||
{
|
{
|
||||||
int hdrlen = sizeof(struct inet_diag_req_v2);
|
return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh));
|
||||||
struct nlattr *bc = NULL;
|
}
|
||||||
|
|
||||||
if (nlmsg_attrlen(cb->nlh, hdrlen))
|
static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
|
||||||
bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
|
{
|
||||||
|
const struct nlmsghdr *nlh = cb->nlh;
|
||||||
|
struct inet_diag_dump_data *cb_data;
|
||||||
|
struct sk_buff *skb = cb->skb;
|
||||||
|
struct nlattr *nla;
|
||||||
|
int rem, err;
|
||||||
|
|
||||||
return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc);
|
cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL);
|
||||||
|
if (!cb_data)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen),
|
||||||
|
nlmsg_attrlen(nlh, hdrlen), rem) {
|
||||||
|
int type = nla_type(nla);
|
||||||
|
|
||||||
|
if (type < __INET_DIAG_REQ_MAX)
|
||||||
|
cb_data->req_nlas[type] = nla;
|
||||||
|
}
|
||||||
|
|
||||||
|
nla = cb_data->inet_diag_nla_bc;
|
||||||
|
if (nla) {
|
||||||
|
err = inet_diag_bc_audit(nla, skb);
|
||||||
|
if (err) {
|
||||||
|
kfree(cb_data);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nla = cb_data->inet_diag_nla_bpf_stgs;
|
||||||
|
if (nla) {
|
||||||
|
struct bpf_sk_storage_diag *bpf_stg_diag;
|
||||||
|
|
||||||
|
bpf_stg_diag = bpf_sk_storage_diag_alloc(nla);
|
||||||
|
if (IS_ERR(bpf_stg_diag)) {
|
||||||
|
kfree(cb_data);
|
||||||
|
return PTR_ERR(bpf_stg_diag);
|
||||||
|
}
|
||||||
|
cb_data->bpf_stg_diag = bpf_stg_diag;
|
||||||
|
}
|
||||||
|
|
||||||
|
cb->data = cb_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int inet_diag_dump_start(struct netlink_callback *cb)
|
||||||
|
{
|
||||||
|
return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int inet_diag_dump_start_compat(struct netlink_callback *cb)
|
||||||
|
{
|
||||||
|
return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int inet_diag_dump_done(struct netlink_callback *cb)
|
||||||
|
{
|
||||||
|
struct inet_diag_dump_data *cb_data = cb->data;
|
||||||
|
|
||||||
|
bpf_sk_storage_diag_free(cb_data->bpf_stg_diag);
|
||||||
|
kfree(cb->data);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int inet_diag_type2proto(int type)
|
static int inet_diag_type2proto(int type)
|
||||||
@ -1085,9 +1179,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
|
|||||||
struct netlink_callback *cb)
|
struct netlink_callback *cb)
|
||||||
{
|
{
|
||||||
struct inet_diag_req *rc = nlmsg_data(cb->nlh);
|
struct inet_diag_req *rc = nlmsg_data(cb->nlh);
|
||||||
int hdrlen = sizeof(struct inet_diag_req);
|
|
||||||
struct inet_diag_req_v2 req;
|
struct inet_diag_req_v2 req;
|
||||||
struct nlattr *bc = NULL;
|
|
||||||
|
|
||||||
req.sdiag_family = AF_UNSPEC; /* compatibility */
|
req.sdiag_family = AF_UNSPEC; /* compatibility */
|
||||||
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
|
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
|
||||||
@ -1095,10 +1187,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
|
|||||||
req.idiag_states = rc->idiag_states;
|
req.idiag_states = rc->idiag_states;
|
||||||
req.id = rc->id;
|
req.id = rc->id;
|
||||||
|
|
||||||
if (nlmsg_attrlen(cb->nlh, hdrlen))
|
return __inet_diag_dump(skb, cb, &req);
|
||||||
bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
|
|
||||||
|
|
||||||
return __inet_diag_dump(skb, cb, &req, bc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
|
static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
|
||||||
@ -1126,22 +1215,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (nlh->nlmsg_flags & NLM_F_DUMP) {
|
if (nlh->nlmsg_flags & NLM_F_DUMP) {
|
||||||
if (nlmsg_attrlen(nlh, hdrlen)) {
|
struct netlink_dump_control c = {
|
||||||
struct nlattr *attr;
|
.start = inet_diag_dump_start_compat,
|
||||||
int err;
|
.done = inet_diag_dump_done,
|
||||||
|
.dump = inet_diag_dump_compat,
|
||||||
attr = nlmsg_find_attr(nlh, hdrlen,
|
};
|
||||||
INET_DIAG_REQ_BYTECODE);
|
return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
|
||||||
err = inet_diag_bc_audit(attr, skb);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
struct netlink_dump_control c = {
|
|
||||||
.dump = inet_diag_dump_compat,
|
|
||||||
};
|
|
||||||
return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return inet_diag_get_exact_compat(skb, nlh);
|
return inet_diag_get_exact_compat(skb, nlh);
|
||||||
@ -1157,22 +1236,12 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
|
|||||||
|
|
||||||
if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
|
if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
|
||||||
h->nlmsg_flags & NLM_F_DUMP) {
|
h->nlmsg_flags & NLM_F_DUMP) {
|
||||||
if (nlmsg_attrlen(h, hdrlen)) {
|
struct netlink_dump_control c = {
|
||||||
struct nlattr *attr;
|
.start = inet_diag_dump_start,
|
||||||
int err;
|
.done = inet_diag_dump_done,
|
||||||
|
.dump = inet_diag_dump,
|
||||||
attr = nlmsg_find_attr(h, hdrlen,
|
};
|
||||||
INET_DIAG_REQ_BYTECODE);
|
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
|
||||||
err = inet_diag_bc_audit(attr, skb);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
struct netlink_dump_control c = {
|
|
||||||
.dump = inet_diag_dump,
|
|
||||||
};
|
|
||||||
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
|
return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
|
||||||
|
@ -87,15 +87,16 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2
|
|||||||
return sk ? sk : ERR_PTR(-ENOENT);
|
return sk ? sk : ERR_PTR(-ENOENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int raw_diag_dump_one(struct sk_buff *in_skb,
|
static int raw_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *nlh,
|
|
||||||
const struct inet_diag_req_v2 *r)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
struct net *net = sock_net(in_skb->sk);
|
struct sk_buff *in_skb = cb->skb;
|
||||||
struct sk_buff *rep;
|
struct sk_buff *rep;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
struct net *net;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
net = sock_net(in_skb->sk);
|
||||||
sk = raw_sock_get(net, r);
|
sk = raw_sock_get(net, r);
|
||||||
if (IS_ERR(sk))
|
if (IS_ERR(sk))
|
||||||
return PTR_ERR(sk);
|
return PTR_ERR(sk);
|
||||||
@ -108,10 +109,7 @@ static int raw_diag_dump_one(struct sk_buff *in_skb,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = inet_sk_diag_fill(sk, NULL, rep, r,
|
err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0,
|
||||||
sk_user_ns(NETLINK_CB(in_skb).sk),
|
|
||||||
NETLINK_CB(in_skb).portid,
|
|
||||||
nlh->nlmsg_seq, 0, nlh,
|
|
||||||
netlink_net_capable(in_skb, CAP_NET_ADMIN));
|
netlink_net_capable(in_skb, CAP_NET_ADMIN));
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
|
|
||||||
@ -136,25 +134,25 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
|||||||
if (!inet_diag_bc_sk(bc, sk))
|
if (!inet_diag_bc_sk(bc, sk))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return inet_sk_diag_fill(sk, NULL, skb, r,
|
return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
|
||||||
sk_user_ns(NETLINK_CB(cb->skb).sk),
|
|
||||||
NETLINK_CB(cb->skb).portid,
|
|
||||||
cb->nlh->nlmsg_seq, NLM_F_MULTI,
|
|
||||||
cb->nlh, net_admin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||||
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
|
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
|
||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
|
struct inet_diag_dump_data *cb_data;
|
||||||
int num, s_num, slot, s_slot;
|
int num, s_num, slot, s_slot;
|
||||||
struct sock *sk = NULL;
|
struct sock *sk = NULL;
|
||||||
|
struct nlattr *bc;
|
||||||
|
|
||||||
if (IS_ERR(hashinfo))
|
if (IS_ERR(hashinfo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
cb_data = cb->data;
|
||||||
|
bc = cb_data->inet_diag_nla_bc;
|
||||||
s_slot = cb->args[0];
|
s_slot = cb->args[0];
|
||||||
num = s_num = cb->args[1];
|
num = s_num = cb->args[1];
|
||||||
|
|
||||||
|
@ -179,15 +179,15 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
|
inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
|
static int tcp_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
|
return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_INET_DIAG_DESTROY
|
#ifdef CONFIG_INET_DIAG_DESTROY
|
||||||
|
@ -21,16 +21,15 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
|||||||
if (!inet_diag_bc_sk(bc, sk))
|
if (!inet_diag_bc_sk(bc, sk))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return inet_sk_diag_fill(sk, NULL, skb, req,
|
return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI,
|
||||||
sk_user_ns(NETLINK_CB(cb->skb).sk),
|
net_admin);
|
||||||
NETLINK_CB(cb->skb).portid,
|
|
||||||
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
|
static int udp_dump_one(struct udp_table *tbl,
|
||||||
const struct nlmsghdr *nlh,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
|
struct sk_buff *in_skb = cb->skb;
|
||||||
int err = -EINVAL;
|
int err = -EINVAL;
|
||||||
struct sock *sk = NULL;
|
struct sock *sk = NULL;
|
||||||
struct sk_buff *rep;
|
struct sk_buff *rep;
|
||||||
@ -70,11 +69,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
|
|||||||
if (!rep)
|
if (!rep)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
err = inet_sk_diag_fill(sk, NULL, rep, req,
|
err = inet_sk_diag_fill(sk, NULL, rep, cb, req, 0,
|
||||||
sk_user_ns(NETLINK_CB(in_skb).sk),
|
netlink_net_capable(in_skb, CAP_NET_ADMIN));
|
||||||
NETLINK_CB(in_skb).portid,
|
|
||||||
nlh->nlmsg_seq, 0, nlh,
|
|
||||||
netlink_net_capable(in_skb, CAP_NET_ADMIN));
|
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
WARN_ON(err == -EMSGSIZE);
|
WARN_ON(err == -EMSGSIZE);
|
||||||
kfree_skb(rep);
|
kfree_skb(rep);
|
||||||
@ -93,12 +89,16 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
|
|||||||
|
|
||||||
static void udp_dump(struct udp_table *table, struct sk_buff *skb,
|
static void udp_dump(struct udp_table *table, struct sk_buff *skb,
|
||||||
struct netlink_callback *cb,
|
struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
|
struct inet_diag_dump_data *cb_data;
|
||||||
int num, s_num, slot, s_slot;
|
int num, s_num, slot, s_slot;
|
||||||
|
struct nlattr *bc;
|
||||||
|
|
||||||
|
cb_data = cb->data;
|
||||||
|
bc = cb_data->inet_diag_nla_bc;
|
||||||
s_slot = cb->args[0];
|
s_slot = cb->args[0];
|
||||||
num = s_num = cb->args[1];
|
num = s_num = cb->args[1];
|
||||||
|
|
||||||
@ -146,15 +146,15 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
udp_dump(&udp_table, skb, cb, r, bc);
|
udp_dump(&udp_table, skb, cb, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
|
static int udp_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
return udp_dump_one(&udp_table, in_skb, nlh, req);
|
return udp_dump_one(&udp_table, cb, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
||||||
@ -249,16 +249,15 @@ static const struct inet_diag_handler udp_diag_handler = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r,
|
const struct inet_diag_req_v2 *r)
|
||||||
struct nlattr *bc)
|
|
||||||
{
|
{
|
||||||
udp_dump(&udplite_table, skb, cb, r, bc);
|
udp_dump(&udplite_table, skb, cb, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
|
static int udplite_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
return udp_dump_one(&udplite_table, in_skb, nlh, req);
|
return udp_dump_one(&udplite_table, cb, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct inet_diag_handler udplite_diag_handler = {
|
static const struct inet_diag_handler udplite_diag_handler = {
|
||||||
|
@ -380,9 +380,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
|
|||||||
struct bpf_prog *prog = psock->bpf_prog;
|
struct bpf_prog *prog = psock->bpf_prog;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
preempt_disable();
|
res = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||||
res = BPF_PROG_RUN(prog, skb);
|
|
||||||
preempt_enable();
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,11 +432,12 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
|||||||
sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
|
sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sctp_diag_dump_one(struct sk_buff *in_skb,
|
static int sctp_diag_dump_one(struct netlink_callback *cb,
|
||||||
const struct nlmsghdr *nlh,
|
|
||||||
const struct inet_diag_req_v2 *req)
|
const struct inet_diag_req_v2 *req)
|
||||||
{
|
{
|
||||||
|
struct sk_buff *in_skb = cb->skb;
|
||||||
struct net *net = sock_net(in_skb->sk);
|
struct net *net = sock_net(in_skb->sk);
|
||||||
|
const struct nlmsghdr *nlh = cb->nlh;
|
||||||
union sctp_addr laddr, paddr;
|
union sctp_addr laddr, paddr;
|
||||||
struct sctp_comm_param commp = {
|
struct sctp_comm_param commp = {
|
||||||
.skb = in_skb,
|
.skb = in_skb,
|
||||||
@ -470,7 +471,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||||
const struct inet_diag_req_v2 *r, struct nlattr *bc)
|
const struct inet_diag_req_v2 *r)
|
||||||
{
|
{
|
||||||
u32 idiag_states = r->idiag_states;
|
u32 idiag_states = r->idiag_states;
|
||||||
struct net *net = sock_net(skb->sk);
|
struct net *net = sock_net(skb->sk);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/env python3
|
||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
#
|
#
|
||||||
# Copyright (C) 2018-2019 Netronome Systems, Inc.
|
# Copyright (C) 2018-2019 Netronome Systems, Inc.
|
||||||
|
@ -19,19 +19,24 @@ SYNOPSIS
|
|||||||
FEATURE COMMANDS
|
FEATURE COMMANDS
|
||||||
================
|
================
|
||||||
|
|
||||||
| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]]
|
| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]]
|
||||||
| **bpftool** **feature help**
|
| **bpftool** **feature help**
|
||||||
|
|
|
|
||||||
| *COMPONENT* := { **kernel** | **dev** *NAME* }
|
| *COMPONENT* := { **kernel** | **dev** *NAME* }
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
===========
|
===========
|
||||||
**bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]]
|
**bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
|
||||||
Probe the running kernel and dump a number of eBPF-related
|
Probe the running kernel and dump a number of eBPF-related
|
||||||
parameters, such as availability of the **bpf()** system call,
|
parameters, such as availability of the **bpf()** system call,
|
||||||
JIT status, eBPF program types availability, eBPF helper
|
JIT status, eBPF program types availability, eBPF helper
|
||||||
functions availability, and more.
|
functions availability, and more.
|
||||||
|
|
||||||
|
By default, bpftool **does not run probes** for
|
||||||
|
**bpf_probe_write_user**\ () and **bpf_trace_printk**\()
|
||||||
|
helpers which print warnings to kernel logs. To enable them
|
||||||
|
and run all probes, the **full** keyword should be used.
|
||||||
|
|
||||||
If the **macros** keyword (but not the **-j** option) is
|
If the **macros** keyword (but not the **-j** option) is
|
||||||
passed, a subset of the output is dumped as a list of
|
passed, a subset of the output is dumped as a list of
|
||||||
**#define** macros that are ready to be included in a C
|
**#define** macros that are ready to be included in a C
|
||||||
@ -44,16 +49,12 @@ DESCRIPTION
|
|||||||
Keyword **kernel** can be omitted. If no probe target is
|
Keyword **kernel** can be omitted. If no probe target is
|
||||||
specified, probing the kernel is the default behaviour.
|
specified, probing the kernel is the default behaviour.
|
||||||
|
|
||||||
Note that when probed, some eBPF helpers (e.g.
|
**bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
|
||||||
**bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may
|
|
||||||
print warnings to kernel logs.
|
|
||||||
|
|
||||||
**bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]]
|
|
||||||
Probe network device for supported eBPF features and dump
|
Probe network device for supported eBPF features and dump
|
||||||
results to the console.
|
results to the console.
|
||||||
|
|
||||||
The two keywords **macros** and **prefix** have the same
|
The keywords **full**, **macros** and **prefix** have the
|
||||||
role as when probing the kernel.
|
same role as when probing the kernel.
|
||||||
|
|
||||||
**bpftool feature help**
|
**bpftool feature help**
|
||||||
Print short help message.
|
Print short help message.
|
||||||
|
@ -42,7 +42,8 @@ PROG COMMANDS
|
|||||||
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
|
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
|
||||||
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
|
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
|
||||||
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
|
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
|
||||||
| **cgroup/getsockopt** | **cgroup/setsockopt**
|
| **cgroup/getsockopt** | **cgroup/setsockopt** |
|
||||||
|
| **struct_ops** | **fentry** | **fexit** | **freplace**
|
||||||
| }
|
| }
|
||||||
| *ATTACH_TYPE* := {
|
| *ATTACH_TYPE* := {
|
||||||
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
|
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
|
||||||
|
@ -469,7 +469,8 @@ _bpftool()
|
|||||||
cgroup/recvmsg4 cgroup/recvmsg6 \
|
cgroup/recvmsg4 cgroup/recvmsg6 \
|
||||||
cgroup/post_bind4 cgroup/post_bind6 \
|
cgroup/post_bind4 cgroup/post_bind6 \
|
||||||
cgroup/sysctl cgroup/getsockopt \
|
cgroup/sysctl cgroup/getsockopt \
|
||||||
cgroup/setsockopt" -- \
|
cgroup/setsockopt struct_ops \
|
||||||
|
fentry fexit freplace" -- \
|
||||||
"$cur" ) )
|
"$cur" ) )
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
@ -983,11 +984,12 @@ _bpftool()
|
|||||||
probe)
|
probe)
|
||||||
[[ $prev == "prefix" ]] && return 0
|
[[ $prev == "prefix" ]] && return 0
|
||||||
if _bpftool_search_list 'macros'; then
|
if _bpftool_search_list 'macros'; then
|
||||||
COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
|
_bpftool_once_attr 'prefix'
|
||||||
else
|
else
|
||||||
COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
|
COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
|
||||||
fi
|
fi
|
||||||
_bpftool_one_of_list 'kernel dev'
|
_bpftool_one_of_list 'kernel dev'
|
||||||
|
_bpftool_once_attr 'full'
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
@ -112,18 +112,12 @@ print_start_section(const char *json_title, const char *plain_title,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void print_end_section(void)
|
||||||
print_end_then_start_section(const char *json_title, const char *plain_title,
|
|
||||||
const char *define_comment,
|
|
||||||
const char *define_prefix)
|
|
||||||
{
|
{
|
||||||
if (json_output)
|
if (json_output)
|
||||||
jsonw_end_object(json_wtr);
|
jsonw_end_object(json_wtr);
|
||||||
else
|
else
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
print_start_section(json_title, plain_title, define_comment,
|
|
||||||
define_prefix);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Probing functions */
|
/* Probing functions */
|
||||||
@ -519,14 +513,39 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
|
|||||||
define_prefix);
|
define_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
||||||
|
const char *define_prefix, unsigned int id,
|
||||||
|
const char *ptype_name, __u32 ifindex)
|
||||||
|
{
|
||||||
|
bool res;
|
||||||
|
|
||||||
|
if (!supported_type)
|
||||||
|
res = false;
|
||||||
|
else
|
||||||
|
res = bpf_probe_helper(id, prog_type, ifindex);
|
||||||
|
|
||||||
|
if (json_output) {
|
||||||
|
if (res)
|
||||||
|
jsonw_string(json_wtr, helper_name[id]);
|
||||||
|
} else if (define_prefix) {
|
||||||
|
printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
|
||||||
|
define_prefix, ptype_name, helper_name[id],
|
||||||
|
res ? "1" : "0");
|
||||||
|
} else {
|
||||||
|
if (res)
|
||||||
|
printf("\n\t- %s", helper_name[id]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
||||||
const char *define_prefix, __u32 ifindex)
|
const char *define_prefix, bool full_mode,
|
||||||
|
__u32 ifindex)
|
||||||
{
|
{
|
||||||
const char *ptype_name = prog_type_name[prog_type];
|
const char *ptype_name = prog_type_name[prog_type];
|
||||||
char feat_name[128];
|
char feat_name[128];
|
||||||
unsigned int id;
|
unsigned int id;
|
||||||
bool res;
|
|
||||||
|
|
||||||
if (ifindex)
|
if (ifindex)
|
||||||
/* Only test helpers for offload-able program types */
|
/* Only test helpers for offload-able program types */
|
||||||
@ -548,21 +567,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
|
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
|
||||||
if (!supported_type)
|
/* Skip helper functions which emit dmesg messages when not in
|
||||||
res = false;
|
* the full mode.
|
||||||
else
|
*/
|
||||||
res = bpf_probe_helper(id, prog_type, ifindex);
|
switch (id) {
|
||||||
|
case BPF_FUNC_trace_printk:
|
||||||
if (json_output) {
|
case BPF_FUNC_probe_write_user:
|
||||||
if (res)
|
if (!full_mode)
|
||||||
jsonw_string(json_wtr, helper_name[id]);
|
continue;
|
||||||
} else if (define_prefix) {
|
/* fallthrough */
|
||||||
printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
|
default:
|
||||||
define_prefix, ptype_name, helper_name[id],
|
probe_helper_for_progtype(prog_type, supported_type,
|
||||||
res ? "1" : "0");
|
define_prefix, id, ptype_name,
|
||||||
} else {
|
ifindex);
|
||||||
if (res)
|
|
||||||
printf("\n\t- %s", helper_name[id]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -584,13 +601,132 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
|
|||||||
res, define_prefix);
|
res, define_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
section_system_config(enum probe_component target, const char *define_prefix)
|
||||||
|
{
|
||||||
|
switch (target) {
|
||||||
|
case COMPONENT_KERNEL:
|
||||||
|
case COMPONENT_UNSPEC:
|
||||||
|
if (define_prefix)
|
||||||
|
break;
|
||||||
|
|
||||||
|
print_start_section("system_config",
|
||||||
|
"Scanning system configuration...",
|
||||||
|
NULL, /* define_comment never used here */
|
||||||
|
NULL); /* define_prefix always NULL here */
|
||||||
|
if (check_procfs()) {
|
||||||
|
probe_unprivileged_disabled();
|
||||||
|
probe_jit_enable();
|
||||||
|
probe_jit_harden();
|
||||||
|
probe_jit_kallsyms();
|
||||||
|
probe_jit_limit();
|
||||||
|
} else {
|
||||||
|
p_info("/* procfs not mounted, skipping related probes */");
|
||||||
|
}
|
||||||
|
probe_kernel_image_config();
|
||||||
|
print_end_section();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool section_syscall_config(const char *define_prefix)
|
||||||
|
{
|
||||||
|
bool res;
|
||||||
|
|
||||||
|
print_start_section("syscall_config",
|
||||||
|
"Scanning system call availability...",
|
||||||
|
"/*** System call availability ***/",
|
||||||
|
define_prefix);
|
||||||
|
res = probe_bpf_syscall(define_prefix);
|
||||||
|
print_end_section();
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
section_program_types(bool *supported_types, const char *define_prefix,
|
||||||
|
__u32 ifindex)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
print_start_section("program_types",
|
||||||
|
"Scanning eBPF program types...",
|
||||||
|
"/*** eBPF program types ***/",
|
||||||
|
define_prefix);
|
||||||
|
|
||||||
|
for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
|
||||||
|
probe_prog_type(i, supported_types, define_prefix, ifindex);
|
||||||
|
|
||||||
|
print_end_section();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void section_map_types(const char *define_prefix, __u32 ifindex)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
print_start_section("map_types",
|
||||||
|
"Scanning eBPF map types...",
|
||||||
|
"/*** eBPF map types ***/",
|
||||||
|
define_prefix);
|
||||||
|
|
||||||
|
for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
|
||||||
|
probe_map_type(i, define_prefix, ifindex);
|
||||||
|
|
||||||
|
print_end_section();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
section_helpers(bool *supported_types, const char *define_prefix,
|
||||||
|
bool full_mode, __u32 ifindex)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
print_start_section("helpers",
|
||||||
|
"Scanning eBPF helper functions...",
|
||||||
|
"/*** eBPF helper functions ***/",
|
||||||
|
define_prefix);
|
||||||
|
|
||||||
|
if (define_prefix)
|
||||||
|
printf("/*\n"
|
||||||
|
" * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
|
||||||
|
" * to determine if <helper_name> is available for <prog_type_name>,\n"
|
||||||
|
" * e.g.\n"
|
||||||
|
" * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
|
||||||
|
" * // do stuff with this helper\n"
|
||||||
|
" * #elif\n"
|
||||||
|
" * // use a workaround\n"
|
||||||
|
" * #endif\n"
|
||||||
|
" */\n"
|
||||||
|
"#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
|
||||||
|
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
|
||||||
|
define_prefix, define_prefix, define_prefix,
|
||||||
|
define_prefix);
|
||||||
|
for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
|
||||||
|
probe_helpers_for_progtype(i, supported_types[i],
|
||||||
|
define_prefix, full_mode, ifindex);
|
||||||
|
|
||||||
|
print_end_section();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void section_misc(const char *define_prefix, __u32 ifindex)
|
||||||
|
{
|
||||||
|
print_start_section("misc",
|
||||||
|
"Scanning miscellaneous eBPF features...",
|
||||||
|
"/*** eBPF misc features ***/",
|
||||||
|
define_prefix);
|
||||||
|
probe_large_insn_limit(define_prefix, ifindex);
|
||||||
|
print_end_section();
|
||||||
|
}
|
||||||
|
|
||||||
static int do_probe(int argc, char **argv)
|
static int do_probe(int argc, char **argv)
|
||||||
{
|
{
|
||||||
enum probe_component target = COMPONENT_UNSPEC;
|
enum probe_component target = COMPONENT_UNSPEC;
|
||||||
const char *define_prefix = NULL;
|
const char *define_prefix = NULL;
|
||||||
bool supported_types[128] = {};
|
bool supported_types[128] = {};
|
||||||
|
bool full_mode = false;
|
||||||
__u32 ifindex = 0;
|
__u32 ifindex = 0;
|
||||||
unsigned int i;
|
|
||||||
char *ifname;
|
char *ifname;
|
||||||
|
|
||||||
/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
|
/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
|
||||||
@ -629,6 +765,9 @@ static int do_probe(int argc, char **argv)
|
|||||||
strerror(errno));
|
strerror(errno));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
} else if (is_prefix(*argv, "full")) {
|
||||||
|
full_mode = true;
|
||||||
|
NEXT_ARG();
|
||||||
} else if (is_prefix(*argv, "macros") && !define_prefix) {
|
} else if (is_prefix(*argv, "macros") && !define_prefix) {
|
||||||
define_prefix = "";
|
define_prefix = "";
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
@ -658,97 +797,19 @@ static int do_probe(int argc, char **argv)
|
|||||||
jsonw_start_object(json_wtr);
|
jsonw_start_object(json_wtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (target) {
|
section_system_config(target, define_prefix);
|
||||||
case COMPONENT_KERNEL:
|
if (!section_syscall_config(define_prefix))
|
||||||
case COMPONENT_UNSPEC:
|
|
||||||
if (define_prefix)
|
|
||||||
break;
|
|
||||||
|
|
||||||
print_start_section("system_config",
|
|
||||||
"Scanning system configuration...",
|
|
||||||
NULL, /* define_comment never used here */
|
|
||||||
NULL); /* define_prefix always NULL here */
|
|
||||||
if (check_procfs()) {
|
|
||||||
probe_unprivileged_disabled();
|
|
||||||
probe_jit_enable();
|
|
||||||
probe_jit_harden();
|
|
||||||
probe_jit_kallsyms();
|
|
||||||
probe_jit_limit();
|
|
||||||
} else {
|
|
||||||
p_info("/* procfs not mounted, skipping related probes */");
|
|
||||||
}
|
|
||||||
probe_kernel_image_config();
|
|
||||||
if (json_output)
|
|
||||||
jsonw_end_object(json_wtr);
|
|
||||||
else
|
|
||||||
printf("\n");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
print_start_section("syscall_config",
|
|
||||||
"Scanning system call availability...",
|
|
||||||
"/*** System call availability ***/",
|
|
||||||
define_prefix);
|
|
||||||
|
|
||||||
if (!probe_bpf_syscall(define_prefix))
|
|
||||||
/* bpf() syscall unavailable, don't probe other BPF features */
|
/* bpf() syscall unavailable, don't probe other BPF features */
|
||||||
goto exit_close_json;
|
goto exit_close_json;
|
||||||
|
section_program_types(supported_types, define_prefix, ifindex);
|
||||||
print_end_then_start_section("program_types",
|
section_map_types(define_prefix, ifindex);
|
||||||
"Scanning eBPF program types...",
|
section_helpers(supported_types, define_prefix, full_mode, ifindex);
|
||||||
"/*** eBPF program types ***/",
|
section_misc(define_prefix, ifindex);
|
||||||
define_prefix);
|
|
||||||
|
|
||||||
for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
|
|
||||||
probe_prog_type(i, supported_types, define_prefix, ifindex);
|
|
||||||
|
|
||||||
print_end_then_start_section("map_types",
|
|
||||||
"Scanning eBPF map types...",
|
|
||||||
"/*** eBPF map types ***/",
|
|
||||||
define_prefix);
|
|
||||||
|
|
||||||
for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
|
|
||||||
probe_map_type(i, define_prefix, ifindex);
|
|
||||||
|
|
||||||
print_end_then_start_section("helpers",
|
|
||||||
"Scanning eBPF helper functions...",
|
|
||||||
"/*** eBPF helper functions ***/",
|
|
||||||
define_prefix);
|
|
||||||
|
|
||||||
if (define_prefix)
|
|
||||||
printf("/*\n"
|
|
||||||
" * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
|
|
||||||
" * to determine if <helper_name> is available for <prog_type_name>,\n"
|
|
||||||
" * e.g.\n"
|
|
||||||
" * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
|
|
||||||
" * // do stuff with this helper\n"
|
|
||||||
" * #elif\n"
|
|
||||||
" * // use a workaround\n"
|
|
||||||
" * #endif\n"
|
|
||||||
" */\n"
|
|
||||||
"#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
|
|
||||||
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
|
|
||||||
define_prefix, define_prefix, define_prefix,
|
|
||||||
define_prefix);
|
|
||||||
for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
|
|
||||||
probe_helpers_for_progtype(i, supported_types[i],
|
|
||||||
define_prefix, ifindex);
|
|
||||||
|
|
||||||
print_end_then_start_section("misc",
|
|
||||||
"Scanning miscellaneous eBPF features...",
|
|
||||||
"/*** eBPF misc features ***/",
|
|
||||||
define_prefix);
|
|
||||||
probe_large_insn_limit(define_prefix, ifindex);
|
|
||||||
|
|
||||||
exit_close_json:
|
exit_close_json:
|
||||||
if (json_output) {
|
if (json_output)
|
||||||
/* End current "section" of probes */
|
|
||||||
jsonw_end_object(json_wtr);
|
|
||||||
/* End root object */
|
/* End root object */
|
||||||
jsonw_end_object(json_wtr);
|
jsonw_end_object(json_wtr);
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -761,7 +822,7 @@ static int do_help(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n"
|
"Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n"
|
||||||
" %s %s help\n"
|
" %s %s help\n"
|
||||||
"\n"
|
"\n"
|
||||||
" COMPONENT := { kernel | dev NAME }\n"
|
" COMPONENT := { kernel | dev NAME }\n"
|
||||||
|
@ -76,6 +76,9 @@ static const char * const prog_type_name[] = {
|
|||||||
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
|
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
|
||||||
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
|
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
|
||||||
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
|
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
|
||||||
|
[BPF_PROG_TYPE_TRACING] = "tracing",
|
||||||
|
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
|
||||||
|
[BPF_PROG_TYPE_EXT] = "ext",
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const char * const map_type_name[];
|
extern const char * const map_type_name[];
|
||||||
|
@ -1573,8 +1573,8 @@ static int do_help(int argc, char **argv)
|
|||||||
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
|
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
|
||||||
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
|
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
|
||||||
" cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
|
" cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
|
||||||
" cgroup/recvmsg6 | cgroup/getsockopt |\n"
|
" cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n"
|
||||||
" cgroup/setsockopt }\n"
|
" struct_ops | fentry | fexit | freplace }\n"
|
||||||
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
|
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
|
||||||
" flow_dissector }\n"
|
" flow_dissector }\n"
|
||||||
" " HELP_SPEC_OPTIONS "\n"
|
" " HELP_SPEC_OPTIONS "\n"
|
||||||
|
5
tools/testing/selftests/.gitignore
vendored
5
tools/testing/selftests/.gitignore
vendored
@ -3,4 +3,7 @@ gpiogpio-hammer
|
|||||||
gpioinclude/
|
gpioinclude/
|
||||||
gpiolsgpio
|
gpiolsgpio
|
||||||
tpm2/SpaceTest.log
|
tpm2/SpaceTest.log
|
||||||
tpm2/*.pyc
|
|
||||||
|
# Python bytecode and cache
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
@ -20,7 +20,7 @@ CLANG ?= clang
|
|||||||
LLC ?= llc
|
LLC ?= llc
|
||||||
LLVM_OBJCOPY ?= llvm-objcopy
|
LLVM_OBJCOPY ?= llvm-objcopy
|
||||||
BPF_GCC ?= $(shell command -v bpf-gcc;)
|
BPF_GCC ?= $(shell command -v bpf-gcc;)
|
||||||
CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
|
CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
|
||||||
-I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
|
-I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
|
||||||
-Dbpf_prog_load=bpf_prog_test_load \
|
-Dbpf_prog_load=bpf_prog_test_load \
|
||||||
-Dbpf_load_program=bpf_test_load_program
|
-Dbpf_load_program=bpf_test_load_program
|
||||||
@ -62,7 +62,8 @@ TEST_PROGS := test_kmod.sh \
|
|||||||
test_tc_tunnel.sh \
|
test_tc_tunnel.sh \
|
||||||
test_tc_edt.sh \
|
test_tc_edt.sh \
|
||||||
test_xdping.sh \
|
test_xdping.sh \
|
||||||
test_bpftool_build.sh
|
test_bpftool_build.sh \
|
||||||
|
test_bpftool.sh
|
||||||
|
|
||||||
TEST_PROGS_EXTENDED := with_addr.sh \
|
TEST_PROGS_EXTENDED := with_addr.sh \
|
||||||
with_tunnels.sh \
|
with_tunnels.sh \
|
||||||
|
@ -509,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family)
|
|||||||
.pass_on_failure = 0,
|
.pass_on_failure = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (type != SOCK_STREAM) {
|
|
||||||
test__skip();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* +1 for TCP-SYN and
|
* +1 for TCP-SYN and
|
||||||
* +1 for the TCP-ACK (ack the syncookie)
|
* +1 for the TCP-ACK (ack the syncookie)
|
||||||
@ -787,7 +782,7 @@ static const char *sotype_str(int sotype)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
|
#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
|
||||||
|
|
||||||
static void test_config(int sotype, sa_family_t family, bool inany)
|
static void test_config(int sotype, sa_family_t family, bool inany)
|
||||||
{
|
{
|
||||||
@ -795,19 +790,31 @@ static void test_config(int sotype, sa_family_t family, bool inany)
|
|||||||
void (*fn)(int sotype, sa_family_t family);
|
void (*fn)(int sotype, sa_family_t family);
|
||||||
const char *name;
|
const char *name;
|
||||||
bool no_inner_map;
|
bool no_inner_map;
|
||||||
|
int need_sotype;
|
||||||
} tests[] = {
|
} tests[] = {
|
||||||
TEST_INIT(test_err_inner_map, true /* no_inner_map */),
|
TEST_INIT(test_err_inner_map,
|
||||||
|
.no_inner_map = true),
|
||||||
TEST_INIT(test_err_skb_data),
|
TEST_INIT(test_err_skb_data),
|
||||||
TEST_INIT(test_err_sk_select_port),
|
TEST_INIT(test_err_sk_select_port),
|
||||||
TEST_INIT(test_pass),
|
TEST_INIT(test_pass),
|
||||||
TEST_INIT(test_syncookie),
|
TEST_INIT(test_syncookie,
|
||||||
|
.need_sotype = SOCK_STREAM),
|
||||||
TEST_INIT(test_pass_on_err),
|
TEST_INIT(test_pass_on_err),
|
||||||
TEST_INIT(test_detach_bpf),
|
TEST_INIT(test_detach_bpf),
|
||||||
};
|
};
|
||||||
char s[MAX_TEST_NAME];
|
char s[MAX_TEST_NAME];
|
||||||
const struct test *t;
|
const struct test *t;
|
||||||
|
|
||||||
|
/* SOCKMAP/SOCKHASH don't support UDP yet */
|
||||||
|
if (sotype == SOCK_DGRAM &&
|
||||||
|
(inner_map_type == BPF_MAP_TYPE_SOCKMAP ||
|
||||||
|
inner_map_type == BPF_MAP_TYPE_SOCKHASH))
|
||||||
|
return;
|
||||||
|
|
||||||
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
|
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
|
||||||
|
if (t->need_sotype && t->need_sotype != sotype)
|
||||||
|
continue; /* test not compatible with socket type */
|
||||||
|
|
||||||
snprintf(s, sizeof(s), "%s %s/%s %s %s",
|
snprintf(s, sizeof(s), "%s %s/%s %s %s",
|
||||||
maptype_str(inner_map_type),
|
maptype_str(inner_map_type),
|
||||||
family_str(family), sotype_str(sotype),
|
family_str(family), sotype_str(sotype),
|
||||||
@ -816,13 +823,6 @@ static void test_config(int sotype, sa_family_t family, bool inany)
|
|||||||
if (!test__start_subtest(s))
|
if (!test__start_subtest(s))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (sotype == SOCK_DGRAM &&
|
|
||||||
inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
|
|
||||||
/* SOCKMAP/SOCKHASH don't support UDP yet */
|
|
||||||
test__skip();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
setup_per_test(sotype, family, inany, t->no_inner_map);
|
setup_per_test(sotype, family, inany, t->no_inner_map);
|
||||||
t->fn(sotype, family);
|
t->fn(sotype, family);
|
||||||
cleanup_per_test(t->no_inner_map);
|
cleanup_per_test(t->no_inner_map);
|
||||||
|
178
tools/testing/selftests/bpf/test_bpftool.py
Normal file
178
tools/testing/selftests/bpf/test_bpftool.py
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
# Copyright (c) 2020 SUSE LLC.
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
# Add the source tree of bpftool and /usr/local/sbin to PATH
|
||||||
|
cur_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..",
|
||||||
|
"tools", "bpf", "bpftool"))
|
||||||
|
os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"]
|
||||||
|
|
||||||
|
|
||||||
|
class IfaceNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class UnprivilegedUserError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _bpftool(args, json=True):
|
||||||
|
_args = ["bpftool"]
|
||||||
|
if json:
|
||||||
|
_args.append("-j")
|
||||||
|
_args.extend(args)
|
||||||
|
|
||||||
|
return subprocess.check_output(_args)
|
||||||
|
|
||||||
|
|
||||||
|
def bpftool(args):
|
||||||
|
return _bpftool(args, json=False).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def bpftool_json(args):
|
||||||
|
res = _bpftool(args)
|
||||||
|
return json.loads(res)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_iface():
|
||||||
|
for iface in socket.if_nameindex():
|
||||||
|
if iface[1] != "lo":
|
||||||
|
return iface[1]
|
||||||
|
raise IfaceNotFoundError("Could not find any network interface to probe")
|
||||||
|
|
||||||
|
|
||||||
|
def default_iface(f):
|
||||||
|
@functools.wraps(f)
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
iface = get_default_iface()
|
||||||
|
return f(*args, iface, **kwargs)
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class TestBpftool(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
if os.getuid() != 0:
|
||||||
|
raise UnprivilegedUserError(
|
||||||
|
"This test suite needs root privileges")
|
||||||
|
|
||||||
|
@default_iface
|
||||||
|
def test_feature_dev_json(self, iface):
|
||||||
|
unexpected_helpers = [
|
||||||
|
"bpf_probe_write_user",
|
||||||
|
"bpf_trace_printk",
|
||||||
|
]
|
||||||
|
expected_keys = [
|
||||||
|
"syscall_config",
|
||||||
|
"program_types",
|
||||||
|
"map_types",
|
||||||
|
"helpers",
|
||||||
|
"misc",
|
||||||
|
]
|
||||||
|
|
||||||
|
res = bpftool_json(["feature", "probe", "dev", iface])
|
||||||
|
# Check if the result has all expected keys.
|
||||||
|
self.assertCountEqual(res.keys(), expected_keys)
|
||||||
|
# Check if unexpected helpers are not included in helpers probes
|
||||||
|
# result.
|
||||||
|
for helpers in res["helpers"].values():
|
||||||
|
for unexpected_helper in unexpected_helpers:
|
||||||
|
self.assertNotIn(unexpected_helper, helpers)
|
||||||
|
|
||||||
|
def test_feature_kernel(self):
|
||||||
|
test_cases = [
|
||||||
|
bpftool_json(["feature", "probe", "kernel"]),
|
||||||
|
bpftool_json(["feature", "probe"]),
|
||||||
|
bpftool_json(["feature"]),
|
||||||
|
]
|
||||||
|
unexpected_helpers = [
|
||||||
|
"bpf_probe_write_user",
|
||||||
|
"bpf_trace_printk",
|
||||||
|
]
|
||||||
|
expected_keys = [
|
||||||
|
"syscall_config",
|
||||||
|
"system_config",
|
||||||
|
"program_types",
|
||||||
|
"map_types",
|
||||||
|
"helpers",
|
||||||
|
"misc",
|
||||||
|
]
|
||||||
|
|
||||||
|
for tc in test_cases:
|
||||||
|
# Check if the result has all expected keys.
|
||||||
|
self.assertCountEqual(tc.keys(), expected_keys)
|
||||||
|
# Check if unexpected helpers are not included in helpers probes
|
||||||
|
# result.
|
||||||
|
for helpers in tc["helpers"].values():
|
||||||
|
for unexpected_helper in unexpected_helpers:
|
||||||
|
self.assertNotIn(unexpected_helper, helpers)
|
||||||
|
|
||||||
|
def test_feature_kernel_full(self):
|
||||||
|
test_cases = [
|
||||||
|
bpftool_json(["feature", "probe", "kernel", "full"]),
|
||||||
|
bpftool_json(["feature", "probe", "full"]),
|
||||||
|
]
|
||||||
|
expected_helpers = [
|
||||||
|
"bpf_probe_write_user",
|
||||||
|
"bpf_trace_printk",
|
||||||
|
]
|
||||||
|
|
||||||
|
for tc in test_cases:
|
||||||
|
# Check if expected helpers are included at least once in any
|
||||||
|
# helpers list for any program type. Unfortunately we cannot assume
|
||||||
|
# that they will be included in all program types or a specific
|
||||||
|
# subset of programs. It depends on the kernel version and
|
||||||
|
# configuration.
|
||||||
|
found_helpers = False
|
||||||
|
|
||||||
|
for helpers in tc["helpers"].values():
|
||||||
|
if all(expected_helper in helpers
|
||||||
|
for expected_helper in expected_helpers):
|
||||||
|
found_helpers = True
|
||||||
|
break
|
||||||
|
|
||||||
|
self.assertTrue(found_helpers)
|
||||||
|
|
||||||
|
def test_feature_kernel_full_vs_not_full(self):
|
||||||
|
full_res = bpftool_json(["feature", "probe", "full"])
|
||||||
|
not_full_res = bpftool_json(["feature", "probe"])
|
||||||
|
not_full_set = set()
|
||||||
|
full_set = set()
|
||||||
|
|
||||||
|
for helpers in full_res["helpers"].values():
|
||||||
|
for helper in helpers:
|
||||||
|
full_set.add(helper)
|
||||||
|
|
||||||
|
for helpers in not_full_res["helpers"].values():
|
||||||
|
for helper in helpers:
|
||||||
|
not_full_set.add(helper)
|
||||||
|
|
||||||
|
self.assertCountEqual(full_set - not_full_set,
|
||||||
|
{"bpf_probe_write_user", "bpf_trace_printk"})
|
||||||
|
self.assertCountEqual(not_full_set - full_set, set())
|
||||||
|
|
||||||
|
def test_feature_macros(self):
|
||||||
|
expected_patterns = [
|
||||||
|
r"/\*\*\* System call availability \*\*\*/",
|
||||||
|
r"#define HAVE_BPF_SYSCALL",
|
||||||
|
r"/\*\*\* eBPF program types \*\*\*/",
|
||||||
|
r"#define HAVE.*PROG_TYPE",
|
||||||
|
r"/\*\*\* eBPF map types \*\*\*/",
|
||||||
|
r"#define HAVE.*MAP_TYPE",
|
||||||
|
r"/\*\*\* eBPF helper functions \*\*\*/",
|
||||||
|
r"#define HAVE.*HELPER",
|
||||||
|
r"/\*\*\* eBPF misc features \*\*\*/",
|
||||||
|
]
|
||||||
|
|
||||||
|
res = bpftool(["feature", "probe", "macros"])
|
||||||
|
for pattern in expected_patterns:
|
||||||
|
self.assertRegex(res, pattern)
|
5
tools/testing/selftests/bpf/test_bpftool.sh
Executable file
5
tools/testing/selftests/bpf/test_bpftool.sh
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
# Copyright (c) 2020 SUSE LLC.
|
||||||
|
|
||||||
|
python3 -m unittest -v test_bpftool.TestBpftool
|
@ -6,6 +6,8 @@
|
|||||||
#include "bpf_rlimit.h"
|
#include "bpf_rlimit.h"
|
||||||
#include <argp.h>
|
#include <argp.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <execinfo.h> /* backtrace */
|
||||||
|
|
||||||
/* defined in test_progs.h */
|
/* defined in test_progs.h */
|
||||||
struct test_env env = {};
|
struct test_env env = {};
|
||||||
@ -617,6 +619,23 @@ int cd_flavor_subdir(const char *exec_name)
|
|||||||
return chdir(flavor);
|
return chdir(flavor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MAX_BACKTRACE_SZ 128
|
||||||
|
void crash_handler(int signum)
|
||||||
|
{
|
||||||
|
void *bt[MAX_BACKTRACE_SZ];
|
||||||
|
size_t sz;
|
||||||
|
|
||||||
|
sz = backtrace(bt, ARRAY_SIZE(bt));
|
||||||
|
|
||||||
|
if (env.test)
|
||||||
|
dump_test_log(env.test, true);
|
||||||
|
if (env.stdout)
|
||||||
|
stdio_restore();
|
||||||
|
|
||||||
|
fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
|
||||||
|
backtrace_symbols_fd(bt, sz, STDERR_FILENO);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
static const struct argp argp = {
|
static const struct argp argp = {
|
||||||
@ -624,8 +643,14 @@ int main(int argc, char **argv)
|
|||||||
.parser = parse_arg,
|
.parser = parse_arg,
|
||||||
.doc = argp_program_doc,
|
.doc = argp_program_doc,
|
||||||
};
|
};
|
||||||
|
struct sigaction sigact = {
|
||||||
|
.sa_handler = crash_handler,
|
||||||
|
.sa_flags = SA_RESETHAND,
|
||||||
|
};
|
||||||
int err, i;
|
int err, i;
|
||||||
|
|
||||||
|
sigaction(SIGSEGV, &sigact, NULL);
|
||||||
|
|
||||||
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
|
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
Loading…
Reference in New Issue
Block a user