bpf: Abstract loop unrolling pragmas in BPF selftests

[Changes from V1:
- Avoid conflict by rebasing with latest master.]

Some BPF tests use loop unrolling compiler pragmas that are clang
specific and not supported by GCC.  These pragmas, along with their
GCC equivalences are:

  #pragma clang loop unroll_count(N)
  #pragma GCC unroll N

  #pragma clang loop unroll(full)
  #pragma GCC unroll 65534

  #pragma clang loop unroll(disable)
  #pragma GCC unroll 1

  #pragma unroll [aka #pragma clang loop unroll(enable)]
  There is no GCC equivalence to this pragma.  It enables unrolling on
  loops that the compiler would not ordinarily unroll even with
  -O2|-funroll-loops, but it is not equivalent to full unrolling
  either.

This patch adds a new header progs/bpf_compiler.h that defines the
following macros, which correspond to each pair of compiler-specific
pragmas above:

  __pragma_loop_unroll_count(N)
  __pragma_loop_unroll_full
  __pragma_loop_no_unroll
  __pragma_loop_unroll

The selftests using loop unrolling pragmas are then changed to include
the header and use these macros in place of the explicit pragmas.

Tested in bpf-next master.
No regressions.

Signed-off-by: Jose E. Marchesi <jose.marchesi@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20240208203612.29611-1-jose.marchesi@oracle.com
This commit is contained in:
Jose E. Marchesi 2024-02-08 21:36:12 +01:00 committed by Andrii Nakryiko
parent fc1c9e40da
commit 52dbd67dff
19 changed files with 102 additions and 42 deletions

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_COMPILER_H__
#define __BPF_COMPILER_H__
#define DO_PRAGMA_(X) _Pragma(#X)
#if __clang__
#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable))
#else
/* In GCC -funroll-loops, which is enabled with -O2, should have the
same impact than the loop-unroll-enable pragma above. */
#define __pragma_loop_unroll
#endif
#if __clang__
#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N))
#else
#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N)
#endif
#if __clang__
#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full))
#else
#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534)
#endif
#if __clang__
#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable))
#else
#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1)
#endif
#endif

View File

@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_compiler.h"
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
@ -183,7 +184,7 @@ int iter_pragma_unroll_loop(const void *ctx)
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 2);
#pragma nounroll
__pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
@ -238,7 +239,7 @@ int iter_multiple_sequential_loops(const void *ctx)
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 0, 2);
#pragma nounroll
__pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);

View File

@ -3,6 +3,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
char _license[] SEC("license") = "GPL";
SEC("socket")
@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb)
{
int ret = 0, i;
#pragma nounroll
__pragma_loop_no_unroll
for (i = 0; i < 20; i++)
if (skb->len)
ret |= 1 << i;

View File

@ -8,6 +8,7 @@
#include "profiler.h"
#include "err.h"
#include "bpf_experimental.h"
#include "bpf_compiler.h"
#ifndef NULL
#define NULL 0
@ -169,7 +170,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
int spid)
{
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == spid)
@ -185,7 +186,7 @@ static INLINE void populate_ancestors(struct task_struct* task,
ancestors_data->num_ancestors = 0;
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
parent = BPF_CORE_READ(parent, real_parent);
@ -212,7 +213,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
size_t filepart_length;
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
filepart_length =
@ -261,7 +262,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
pids_cgrp_id___local);
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys_state* subsys =
@ -402,7 +403,7 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
if (kill_data == NULL)
return 0;
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == 0) {
@ -482,7 +483,7 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
struct dentry* parent_dentry;
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
filepart_length =
@ -508,7 +509,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
{
struct dentry* parent_dentry;
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
@ -629,7 +630,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
#ifdef UNROLL
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];

View File

@ -8,6 +8,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_compiler.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@ -298,11 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
#if defined(USE_ITER)
/* no for loop, no unrolling */
#elif defined(NO_UNROLL)
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
#elif defined(UNROLL_COUNT)
#pragma clang loop unroll_count(UNROLL_COUNT)
__pragma_loop_unroll_count(UNROLL_COUNT)
#else
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
#endif /* NO_UNROLL */
/* Unwind python stack */
#ifdef USE_ITER

View File

@ -10,6 +10,8 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
typedef uint32_t pid_t;
struct task_struct {};
@ -419,9 +421,9 @@ static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
#else
#pragma unroll
__pragma_loop_unroll
#endif
for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
if (i >= map.cnt)
@ -560,25 +562,25 @@ static void *read_strobe_meta(struct task_struct *task,
payload_off = sizeof(data->payload);
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
#else
#pragma unroll
__pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
#else
#pragma unroll
__pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
#else
#pragma unroll
__pragma_loop_unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);

View File

@ -20,6 +20,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_compiler.h"
#include "test_cls_redirect.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@ -269,7 +270,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
uint32_t acc = 0;
uint16_t *ipw = (uint16_t *)iph;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
acc += ipw[i];
}
@ -296,7 +297,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
};
*is_fragment = false;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (int i = 0; i < 6; i++) {
switch (exthdr.next) {
case IPPROTO_FRAGMENT:

View File

@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_compiler.h"
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb)
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = bpf_cpu_to_be64(4 - lo);
seg->hi = bpf_cpu_to_be64(hi);

View File

@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_compiler.h"
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
// we can only go as far as ~10 TLVs due to the BPF max stack size
// workaround: define induction variable "i" as "long" instead
// of "int" to prevent alu32 sub-register spilling.
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;

View File

@ -3,12 +3,14 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
char _license[] SEC("license") = "GPL";
SEC("tc")
int process(struct __sk_buff *skb)
{
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (int i = 0; i < 5; i++) {
if (skb->cb[i] != i + 1)
return 1;

View File

@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);

View File

@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);

View File

@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);

View File

@ -19,6 +19,7 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@ -83,7 +84,7 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = 0;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;

View File

@ -19,6 +19,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@ -137,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;

View File

@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@ -133,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
#pragma clang loop unroll(disable)
__pragma_loop_no_unroll
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;

View File

@ -15,6 +15,7 @@
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_compiler.h"
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
@ -362,7 +363,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
iph->ttl = 4;
next_iph_u16 = (__u16 *) iph;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
@ -409,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end)
iph->saddr = tmp_addr;
iph->check = 0;
next_iph_u16 = (__u16 *) iph;
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));

View File

@ -7,6 +7,8 @@
#include <bpf/bpf_endian.h>
#include <asm/errno.h>
#include "bpf_compiler.h"
#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
@ -151,11 +153,11 @@ static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
__u64 sum = csum;
int i;
#pragma unroll
__pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)saddr->in6_u.u6_addr32[i];
#pragma unroll
__pragma_loop_unroll
for (i = 0; i < 4; i++)
sum += (__u32)daddr->in6_u.u6_addr32[i];

View File

@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_compiler.h"
#include "xdping.h"
struct {
@ -116,7 +117,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_PASS;
if (pinginfo->start) {
#pragma clang loop unroll(full)
__pragma_loop_unroll_full
for (i = 0; i < XDPING_MAX_COUNT; i++) {
if (pinginfo->times[i] == 0)
break;