mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
Including fixes from netfilter and bpf.
Current release - regressions: - eth: stmmac: fix failure to probe without MAC interface specified Current release - new code bugs: - docs: netlink: fix missing classic_netlink doc reference Previous releases - regressions: - deal with integer overflows in kmalloc_reserve() - use sk_forward_alloc_get() in sk_get_meminfo() - bpf_sk_storage: fix the missing uncharge in sk_omem_alloc - fib: avoid warn splat in flow dissector after packet mangling - skb_segment: call zero copy functions before using skbuff frags - eth: sfc: check for zero length in EF10 RX prefix Previous releases - always broken: - af_unix: fix msg_controllen test in scm_pidfd_recv() for MSG_CMSG_COMPAT - xsk: fix xsk_build_skb() dereferencing possible ERR_PTR() - netfilter: - nft_exthdr: fix non-linear header modification - xt_u32, xt_sctp: validate user space input - nftables: exthdr: fix 4-byte stack OOB write - nfnetlink_osf: avoid OOB read - one more fix for the garbage collection work from last release - igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU - bpf, sockmap: fix preempt_rt splat when using raw_spin_lock_t - handshake: fix null-deref in handshake_nl_done_doit() - ip: ignore dst hint for multipath routes to ensure packets are hashed across the nexthops - phy: micrel: - correct bit assignments for cable test errata - disable EEE according to the KSZ9477 errata Misc: - docs/bpf: document compile-once-run-everywhere (CO-RE) relocations - Revert "net: macsec: preserve ingress frame ordering", it appears to have been developed against an older kernel, problem doesn't exist upstream Signed-off-by: Jakub Kicinski <kuba@kernel.org> -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE6jPA+I1ugmIBA4hXMUZtbf5SIrsFAmT6R6wACgkQMUZtbf5S IrsmTg//TgmRjxSZ0lrPQtJwZR/eN3ZR2oQG3rwnssCx+YgHEGGxQsfT4KHEMacR ZgGDZVTpthUJkkACBPi8ZMoy++RdjEmlCcanfeDkGHoYGtiX1lhkofhLMn1KUHbI rIbP9EdNKxQT0SsBlw/U28pD5jKyqOgL23QobEwmcjLTdMpamb+qIsD6/xNv9tEj Tu4BdCIkhjxnBD622hsE3pFTG7oSn2WM6rf5NT1E43mJ3W8RrMcydSB27J7Oryo9 l3nYMAhz0vQINS2WQ9eCT1/7GI6gg1nDtxFtrnV7ASvxayRBPIUr4kg1vT+Tixsz CZMnwVamEBIYl9agmj7vSji7d5nOUgXPhtWhwWUM2tRoGdeGw3vSi1pgDvRiUCHE PJ4UHv7goa2AgnOlOQCFtRybAu+9nmSGm7V+GkeGLnH7xbFsEa5smQ/+FSPJs8Dn Yf4q5QAhdN8tdnofRlrN/nCssoDF3cfmBsTJ7wo5h71gW+BWhsP58eDCJlXd/r8k +Qnvoe2kw27ktFR1tjsUDZ0AcSmeVARNwmXCOBYZsG4tEek8pLyj008mDvJvdfyn PGPn7Eo5DyaERlHVmPuebHXSyniDEPe2GLTmlHcGiRpGspoUHbB+HRiDAuRLMB9g pkL8RHpNfppnuUXeUoNy3rgEkYwlpTjZX0QHC6N8NQ76ccB6CNM= =YpmE -----END PGP SIGNATURE----- Merge tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net Pull networking updates from Jakub Kicinski: "Including fixes from netfilter and bpf. Current release - regressions: - eth: stmmac: fix failure to probe without MAC interface specified Current release - new code bugs: - docs: netlink: fix missing classic_netlink doc reference Previous releases - regressions: - deal with integer overflows in kmalloc_reserve() - use sk_forward_alloc_get() in sk_get_meminfo() - bpf_sk_storage: fix the missing uncharge in sk_omem_alloc - fib: avoid warn splat in flow dissector after packet mangling - skb_segment: call zero copy functions before using skbuff frags - eth: sfc: check for zero length in EF10 RX prefix Previous releases - always broken: - af_unix: fix msg_controllen test in scm_pidfd_recv() for MSG_CMSG_COMPAT - xsk: fix xsk_build_skb() dereferencing possible ERR_PTR() - netfilter: - nft_exthdr: fix non-linear header modification - xt_u32, xt_sctp: validate user space input - nftables: exthdr: fix 4-byte stack OOB write - nfnetlink_osf: avoid OOB read - one more fix for the garbage collection work from last release - igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU - bpf, sockmap: fix preempt_rt splat when using raw_spin_lock_t - handshake: fix null-deref in handshake_nl_done_doit() - ip: ignore dst hint for multipath routes to ensure packets are hashed across the nexthops - phy: micrel: - correct bit assignments for cable test errata - disable EEE according to the KSZ9477 errata Misc: - docs/bpf: document compile-once-run-everywhere (CO-RE) relocations - Revert "net: macsec: preserve ingress frame ordering", it appears to have been developed against an older kernel, problem doesn't exist upstream" * tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (95 commits) net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs() Revert "net: team: do not use dynamic lockdep key" net: hns3: remove GSO partial feature bit net: hns3: fix the port information display when sfp is absent net: hns3: fix invalid mutex between tc qdisc and dcb ets command issue net: hns3: fix debugfs concurrency issue between kfree buffer and read net: hns3: fix byte order conversion issue in hclge_dbg_fd_tcam_read() net: hns3: Support query tx timeout threshold by debugfs net: hns3: fix tx timeout issue net: phy: Provide Module 4 KSZ9477 errata (DS80000754C) netfilter: nf_tables: Unbreak audit log reset netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction netfilter: nf_tables: uapi: Describe NFTA_RULE_CHAIN_ID netfilter: nfnetlink_osf: avoid OOB read netfilter: nftables: exthdr: fix 4-byte stack OOB write selftests/bpf: Check bpf_sk_storage has uncharged sk_omem_alloc bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc bpf: bpf_sk_storage: Fix invalid wait context lockdep report s390/bpf: Pass through tail call counter in trampolines ...
This commit is contained in:
commit
73be7fb14e
@ -726,8 +726,8 @@ same as the one describe in :ref:`BTF_Type_String`.
|
||||
4.2 .BTF.ext section
|
||||
--------------------
|
||||
|
||||
The .BTF.ext section encodes func_info and line_info which needs loader
|
||||
manipulation before loading into the kernel.
|
||||
The .BTF.ext section encodes func_info, line_info and CO-RE relocations
|
||||
which needs loader manipulation before loading into the kernel.
|
||||
|
||||
The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
|
||||
and ``tools/lib/bpf/btf.c``.
|
||||
@ -745,15 +745,20 @@ The current header of .BTF.ext section::
|
||||
__u32 func_info_len;
|
||||
__u32 line_info_off;
|
||||
__u32 line_info_len;
|
||||
|
||||
/* optional part of .BTF.ext header */
|
||||
__u32 core_relo_off;
|
||||
__u32 core_relo_len;
|
||||
};
|
||||
|
||||
It is very similar to .BTF section. Instead of type/string section, it
|
||||
contains func_info and line_info section. See :ref:`BPF_Prog_Load` for details
|
||||
about func_info and line_info record format.
|
||||
contains func_info, line_info and core_relo sub-sections.
|
||||
See :ref:`BPF_Prog_Load` for details about func_info and line_info
|
||||
record format.
|
||||
|
||||
The func_info is organized as below.::
|
||||
|
||||
func_info_rec_size
|
||||
func_info_rec_size /* __u32 value */
|
||||
btf_ext_info_sec for section #1 /* func_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* func_info for section #2 */
|
||||
...
|
||||
@ -773,7 +778,7 @@ Here, num_info must be greater than 0.
|
||||
|
||||
The line_info is organized as below.::
|
||||
|
||||
line_info_rec_size
|
||||
line_info_rec_size /* __u32 value */
|
||||
btf_ext_info_sec for section #1 /* line_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* line_info for section #2 */
|
||||
...
|
||||
@ -787,6 +792,20 @@ kernel API, the ``insn_off`` is the instruction offset in the unit of ``struct
|
||||
bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
|
||||
beginning of section (``btf_ext_info_sec->sec_name_off``).
|
||||
|
||||
The core_relo is organized as below.::
|
||||
|
||||
core_relo_rec_size /* __u32 value */
|
||||
btf_ext_info_sec for section #1 /* core_relo for section #1 */
|
||||
btf_ext_info_sec for section #2 /* core_relo for section #2 */
|
||||
|
||||
``core_relo_rec_size`` specifies the size of ``bpf_core_relo``
|
||||
structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
|
||||
within a single ``btf_ext_info_sec`` describe relocations applied to
|
||||
section named by ``btf_ext_info_sec->sec_name_off``.
|
||||
|
||||
See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
|
||||
for more information on CO-RE relocations.
|
||||
|
||||
4.2 .BTF_ids section
|
||||
--------------------
|
||||
|
||||
|
@ -29,6 +29,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
bpf_licensing
|
||||
test_debug
|
||||
clang-notes
|
||||
linux-notes
|
||||
other
|
||||
redirect
|
||||
|
||||
|
@ -240,3 +240,307 @@ The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
000000000000002c 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
|
||||
0000000000000040 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
|
||||
|
||||
.. _btf-co-re-relocations:
|
||||
|
||||
=================
|
||||
CO-RE Relocations
|
||||
=================
|
||||
|
||||
From object file point of view CO-RE mechanism is implemented as a set
|
||||
of CO-RE specific relocation records. These relocation records are not
|
||||
related to ELF relocations and are encoded in .BTF.ext section.
|
||||
See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
|
||||
information on .BTF.ext structure.
|
||||
|
||||
CO-RE relocations are applied to BPF instructions to update immediate
|
||||
or offset fields of the instruction at load time with information
|
||||
relevant for target kernel.
|
||||
|
||||
Field to patch is selected basing on the instruction class:
|
||||
|
||||
* For BPF_ALU, BPF_ALU64, BPF_LD `immediate` field is patched;
|
||||
* For BPF_LDX, BPF_STX, BPF_ST `offset` field is patched;
|
||||
* BPF_JMP, BPF_JMP32 instructions **should not** be patched.
|
||||
|
||||
Relocation kinds
|
||||
================
|
||||
|
||||
There are several kinds of CO-RE relocations that could be split in
|
||||
three groups:
|
||||
|
||||
* Field-based - patch instruction with field related information, e.g.
|
||||
change offset field of the BPF_LDX instruction to reflect offset
|
||||
of a specific structure field in the target kernel.
|
||||
|
||||
* Type-based - patch instruction with type related information, e.g.
|
||||
change immediate field of the BPF_ALU move instruction to 0 or 1 to
|
||||
reflect if specific type is present in the target kernel.
|
||||
|
||||
* Enum-based - patch instruction with enum related information, e.g.
|
||||
change immediate field of the BPF_LD_IMM64 instruction to reflect
|
||||
value of a specific enum literal in the target kernel.
|
||||
|
||||
The complete list of relocation kinds is represented by the following enum:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
enum bpf_core_relo_kind {
|
||||
BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */
|
||||
BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */
|
||||
BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */
|
||||
BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
|
||||
BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
|
||||
BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
|
||||
BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
|
||||
BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */
|
||||
BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */
|
||||
BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */
|
||||
BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
|
||||
BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */
|
||||
BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */
|
||||
};
|
||||
|
||||
Notes:
|
||||
|
||||
* ``BPF_CORE_FIELD_LSHIFT_U64`` and ``BPF_CORE_FIELD_RSHIFT_U64`` are
|
||||
supposed to be used to read bitfield values using the following
|
||||
algorithm:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
// To read bitfield ``f`` from ``struct s``
|
||||
is_signed = relo(s->f, BPF_CORE_FIELD_SIGNED)
|
||||
off = relo(s->f, BPF_CORE_FIELD_BYTE_OFFSET)
|
||||
sz = relo(s->f, BPF_CORE_FIELD_BYTE_SIZE)
|
||||
l = relo(s->f, BPF_CORE_FIELD_LSHIFT_U64)
|
||||
r = relo(s->f, BPF_CORE_FIELD_RSHIFT_U64)
|
||||
// define ``v`` as signed or unsigned integer of size ``sz``
|
||||
v = *({s|u}<sz> *)((void *)s + off)
|
||||
v <<= l
|
||||
v >>= r
|
||||
|
||||
* The ``BPF_CORE_TYPE_MATCHES`` queries matching relation, defined as
|
||||
follows:
|
||||
|
||||
* for integers: types match if size and signedness match;
|
||||
* for arrays & pointers: target types are recursively matched;
|
||||
* for structs & unions:
|
||||
|
||||
* local members need to exist in target with the same name;
|
||||
|
||||
* for each member we recursively check match unless it is already behind a
|
||||
pointer, in which case we only check matching names and compatible kind;
|
||||
|
||||
* for enums:
|
||||
|
||||
* local variants have to have a match in target by symbolic name (but not
|
||||
numeric value);
|
||||
|
||||
* size has to match (but enum may match enum64 and vice versa);
|
||||
|
||||
* for function pointers:
|
||||
|
||||
* number and position of arguments in local type has to match target;
|
||||
* for each argument and the return value we recursively check match.
|
||||
|
||||
CO-RE Relocation Record
|
||||
=======================
|
||||
|
||||
Relocation record is encoded as the following structure:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct bpf_core_relo {
|
||||
__u32 insn_off;
|
||||
__u32 type_id;
|
||||
__u32 access_str_off;
|
||||
enum bpf_core_relo_kind kind;
|
||||
};
|
||||
|
||||
* ``insn_off`` - instruction offset (in bytes) within a code section
|
||||
associated with this relocation;
|
||||
|
||||
* ``type_id`` - BTF type ID of the "root" (containing) entity of a
|
||||
relocatable type or field;
|
||||
|
||||
* ``access_str_off`` - offset into corresponding .BTF string section.
|
||||
String interpretation depends on specific relocation kind:
|
||||
|
||||
* for field-based relocations, string encodes an accessed field using
|
||||
a sequence of field and array indices, separated by colon (:). It's
|
||||
conceptually very close to LLVM's `getelementptr <GEP_>`_ instruction's
|
||||
arguments for identifying offset to a field. For example, consider the
|
||||
following C code:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct sample {
|
||||
int a;
|
||||
int b;
|
||||
struct { int c[10]; };
|
||||
} __attribute__((preserve_access_index));
|
||||
struct sample *s;
|
||||
|
||||
* Access to ``s[0].a`` would be encoded as ``0:0``:
|
||||
|
||||
* ``0``: first element of ``s`` (as if ``s`` is an array);
|
||||
* ``0``: index of field ``a`` in ``struct sample``.
|
||||
|
||||
* Access to ``s->a`` would be encoded as ``0:0`` as well.
|
||||
* Access to ``s->b`` would be encoded as ``0:1``:
|
||||
|
||||
* ``0``: first element of ``s``;
|
||||
* ``1``: index of field ``b`` in ``struct sample``.
|
||||
|
||||
* Access to ``s[1].c[5]`` would be encoded as ``1:2:0:5``:
|
||||
|
||||
* ``1``: second element of ``s``;
|
||||
* ``2``: index of anonymous structure field in ``struct sample``;
|
||||
* ``0``: index of field ``c`` in anonymous structure;
|
||||
* ``5``: access to array element #5.
|
||||
|
||||
* for type-based relocations, string is expected to be just "0";
|
||||
|
||||
* for enum value-based relocations, string contains an index of enum
|
||||
value within its enum type;
|
||||
|
||||
* ``kind`` - one of ``enum bpf_core_relo_kind``.
|
||||
|
||||
.. _GEP: https://llvm.org/docs/LangRef.html#getelementptr-instruction
|
||||
|
||||
.. _btf_co_re_relocation_examples:
|
||||
|
||||
CO-RE Relocation Examples
|
||||
=========================
|
||||
|
||||
For the following C code:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct foo {
|
||||
int a;
|
||||
int b;
|
||||
unsigned c:15;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
enum bar { U, V };
|
||||
|
||||
With the following BTF definitions:
|
||||
|
||||
.. code-block::
|
||||
|
||||
...
|
||||
[2] STRUCT 'foo' size=8 vlen=2
|
||||
'a' type_id=3 bits_offset=0
|
||||
'b' type_id=3 bits_offset=32
|
||||
'c' type_id=4 bits_offset=64 bitfield_size=15
|
||||
[3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
|
||||
[4] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
|
||||
...
|
||||
[16] ENUM 'bar' encoding=UNSIGNED size=4 vlen=2
|
||||
'U' val=0
|
||||
'V' val=1
|
||||
|
||||
Field offset relocations are generated automatically when
|
||||
``__attribute__((preserve_access_index))`` is used, for example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void alpha(struct foo *s, volatile unsigned long *g) {
|
||||
*g = s->a;
|
||||
s->a = 1;
|
||||
}
|
||||
|
||||
00 <alpha>:
|
||||
0: r3 = *(s32 *)(r1 + 0x0)
|
||||
00: CO-RE <byte_off> [2] struct foo::a (0:0)
|
||||
1: *(u64 *)(r2 + 0x0) = r3
|
||||
2: *(u32 *)(r1 + 0x0) = 0x1
|
||||
10: CO-RE <byte_off> [2] struct foo::a (0:0)
|
||||
3: exit
|
||||
|
||||
|
||||
All relocation kinds could be requested via built-in functions.
|
||||
E.g. field-based relocations:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void bravo(struct foo *s, volatile unsigned long *g) {
|
||||
*g = __builtin_preserve_field_info(s->b, 0 /* field byte offset */);
|
||||
*g = __builtin_preserve_field_info(s->b, 1 /* field byte size */);
|
||||
*g = __builtin_preserve_field_info(s->b, 2 /* field existence */);
|
||||
*g = __builtin_preserve_field_info(s->b, 3 /* field signedness */);
|
||||
*g = __builtin_preserve_field_info(s->c, 4 /* bitfield left shift */);
|
||||
*g = __builtin_preserve_field_info(s->c, 5 /* bitfield right shift */);
|
||||
}
|
||||
|
||||
20 <bravo>:
|
||||
4: r1 = 0x4
|
||||
20: CO-RE <byte_off> [2] struct foo::b (0:1)
|
||||
5: *(u64 *)(r2 + 0x0) = r1
|
||||
6: r1 = 0x4
|
||||
30: CO-RE <byte_sz> [2] struct foo::b (0:1)
|
||||
7: *(u64 *)(r2 + 0x0) = r1
|
||||
8: r1 = 0x1
|
||||
40: CO-RE <field_exists> [2] struct foo::b (0:1)
|
||||
9: *(u64 *)(r2 + 0x0) = r1
|
||||
10: r1 = 0x1
|
||||
50: CO-RE <signed> [2] struct foo::b (0:1)
|
||||
11: *(u64 *)(r2 + 0x0) = r1
|
||||
12: r1 = 0x31
|
||||
60: CO-RE <lshift_u64> [2] struct foo::c (0:2)
|
||||
13: *(u64 *)(r2 + 0x0) = r1
|
||||
14: r1 = 0x31
|
||||
70: CO-RE <rshift_u64> [2] struct foo::c (0:2)
|
||||
15: *(u64 *)(r2 + 0x0) = r1
|
||||
16: exit
|
||||
|
||||
|
||||
Type-based relocations:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void charlie(struct foo *s, volatile unsigned long *g) {
|
||||
*g = __builtin_preserve_type_info(*s, 0 /* type existence */);
|
||||
*g = __builtin_preserve_type_info(*s, 1 /* type size */);
|
||||
*g = __builtin_preserve_type_info(*s, 2 /* type matches */);
|
||||
*g = __builtin_btf_type_id(*s, 0 /* type id in this object file */);
|
||||
*g = __builtin_btf_type_id(*s, 1 /* type id in target kernel */);
|
||||
}
|
||||
|
||||
88 <charlie>:
|
||||
17: r1 = 0x1
|
||||
88: CO-RE <type_exists> [2] struct foo
|
||||
18: *(u64 *)(r2 + 0x0) = r1
|
||||
19: r1 = 0xc
|
||||
98: CO-RE <type_size> [2] struct foo
|
||||
20: *(u64 *)(r2 + 0x0) = r1
|
||||
21: r1 = 0x1
|
||||
a8: CO-RE <type_matches> [2] struct foo
|
||||
22: *(u64 *)(r2 + 0x0) = r1
|
||||
23: r1 = 0x2 ll
|
||||
b8: CO-RE <local_type_id> [2] struct foo
|
||||
25: *(u64 *)(r2 + 0x0) = r1
|
||||
26: r1 = 0x2 ll
|
||||
d0: CO-RE <target_type_id> [2] struct foo
|
||||
28: *(u64 *)(r2 + 0x0) = r1
|
||||
29: exit
|
||||
|
||||
Enum-based relocations:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void delta(struct foo *s, volatile unsigned long *g) {
|
||||
*g = __builtin_preserve_enum_value(*(enum bar *)U, 0 /* enum literal existence */);
|
||||
*g = __builtin_preserve_enum_value(*(enum bar *)V, 1 /* enum literal value */);
|
||||
}
|
||||
|
||||
f0 <delta>:
|
||||
30: r1 = 0x1 ll
|
||||
f0: CO-RE <enumval_exists> [16] enum bar::U = 0
|
||||
32: *(u64 *)(r2 + 0x0) = r1
|
||||
33: r1 = 0x1 ll
|
||||
108: CO-RE <enumval_value> [16] enum bar::V = 1
|
||||
35: *(u64 *)(r2 + 0x0) = r1
|
||||
36: exit
|
||||
|
25
Documentation/bpf/standardization/abi.rst
Normal file
25
Documentation/bpf/standardization/abi.rst
Normal file
@ -0,0 +1,25 @@
|
||||
.. contents::
|
||||
.. sectnum::
|
||||
|
||||
===================================================
|
||||
BPF ABI Recommended Conventions and Guidelines v1.0
|
||||
===================================================
|
||||
|
||||
This is version 1.0 of an informational document containing recommended
|
||||
conventions and guidelines for producing portable BPF program binaries.
|
||||
|
||||
Registers and calling convention
|
||||
================================
|
||||
|
||||
BPF has 10 general purpose registers and a read-only frame pointer register,
|
||||
all of which are 64-bits wide.
|
||||
|
||||
The BPF calling convention is defined as:
|
||||
|
||||
* R0: return value from function calls, and exit value for BPF programs
|
||||
* R1 - R5: arguments for function calls
|
||||
* R6 - R9: callee saved registers that function calls will preserve
|
||||
* R10: read-only frame pointer to access stack
|
||||
|
||||
R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
|
||||
necessary across calls.
|
@ -12,7 +12,7 @@ for the working group charter, documents, and more.
|
||||
:maxdepth: 1
|
||||
|
||||
instruction-set
|
||||
linux-notes
|
||||
abi
|
||||
|
||||
.. Links:
|
||||
.. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
|
||||
|
@ -1,11 +1,11 @@
|
||||
.. contents::
|
||||
.. sectnum::
|
||||
|
||||
========================================
|
||||
eBPF Instruction Set Specification, v1.0
|
||||
========================================
|
||||
=======================================
|
||||
BPF Instruction Set Specification, v1.0
|
||||
=======================================
|
||||
|
||||
This document specifies version 1.0 of the eBPF instruction set.
|
||||
This document specifies version 1.0 of the BPF instruction set.
|
||||
|
||||
Documentation conventions
|
||||
=========================
|
||||
@ -97,26 +97,10 @@ Definitions
|
||||
A: 10000110
|
||||
B: 11111111 10000110
|
||||
|
||||
Registers and calling convention
|
||||
================================
|
||||
|
||||
eBPF has 10 general purpose registers and a read-only frame pointer register,
|
||||
all of which are 64-bits wide.
|
||||
|
||||
The eBPF calling convention is defined as:
|
||||
|
||||
* R0: return value from function calls, and exit value for eBPF programs
|
||||
* R1 - R5: arguments for function calls
|
||||
* R6 - R9: callee saved registers that function calls will preserve
|
||||
* R10: read-only frame pointer to access stack
|
||||
|
||||
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
|
||||
necessary across calls.
|
||||
|
||||
Instruction encoding
|
||||
====================
|
||||
|
||||
eBPF has two instruction encodings:
|
||||
BPF has two instruction encodings:
|
||||
|
||||
* the basic instruction encoding, which uses 64 bits to encode an instruction
|
||||
* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
|
||||
@ -260,7 +244,7 @@ BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ b
|
||||
========= ===== ======= ==========================================================
|
||||
|
||||
Underflow and overflow are allowed during arithmetic operations, meaning
|
||||
the 64-bit or 32-bit value will wrap. If eBPF program execution would
|
||||
the 64-bit or 32-bit value will wrap. If BPF program execution would
|
||||
result in division by zero, the destination register is instead set to zero.
|
||||
If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
|
||||
the destination register is unchanged whereas for ``BPF_ALU`` the upper
|
||||
@ -373,7 +357,7 @@ BPF_JNE 0x5 any PC += offset if dst != src
|
||||
BPF_JSGT 0x6 any PC += offset if dst > src signed
|
||||
BPF_JSGE 0x7 any PC += offset if dst >= src signed
|
||||
BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
|
||||
BPF_CALL 0x8 0x1 call PC += offset see `Program-local functions`_
|
||||
BPF_CALL 0x8 0x1 call PC += imm see `Program-local functions`_
|
||||
BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
|
||||
BPF_EXIT 0x9 0x0 return BPF_JMP only
|
||||
BPF_JLT 0xa any PC += offset if dst < src unsigned
|
||||
@ -382,7 +366,7 @@ BPF_JSLT 0xc any PC += offset if dst < src signed
|
||||
BPF_JSLE 0xd any PC += offset if dst <= src signed
|
||||
======== ===== === =========================================== =========================================
|
||||
|
||||
The eBPF program needs to store the return value into register R0 before doing a
|
||||
The BPF program needs to store the return value into register R0 before doing a
|
||||
``BPF_EXIT``.
|
||||
|
||||
Example:
|
||||
@ -424,8 +408,8 @@ Program-local functions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Program-local functions are functions exposed by the same BPF program as the
|
||||
caller, and are referenced by offset from the call instruction, similar to
|
||||
``BPF_JA``. A ``BPF_EXIT`` within the program-local function will return to
|
||||
the caller.
|
||||
``BPF_JA``. The offset is encoded in the imm field of the call instruction.
|
||||
A ``BPF_EXIT`` within the program-local function will return to the caller.
|
||||
|
||||
Load and store instructions
|
||||
===========================
|
||||
@ -502,9 +486,9 @@ Atomic operations
|
||||
|
||||
Atomic operations are operations that operate on memory and can not be
|
||||
interrupted or corrupted by other access to the same memory region
|
||||
by other eBPF programs or means outside of this specification.
|
||||
by other BPF programs or means outside of this specification.
|
||||
|
||||
All atomic operations supported by eBPF are encoded as store operations
|
||||
All atomic operations supported by BPF are encoded as store operations
|
||||
that use the ``BPF_ATOMIC`` mode modifier as follows:
|
||||
|
||||
* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
|
||||
@ -594,7 +578,7 @@ where
|
||||
Maps
|
||||
~~~~
|
||||
|
||||
Maps are shared memory regions accessible by eBPF programs on some platforms.
|
||||
Maps are shared memory regions accessible by BPF programs on some platforms.
|
||||
A map can have various semantics as defined in a separate document, and may or
|
||||
may not have a single contiguous memory region, but the 'map_val(map)' is
|
||||
currently only defined for maps that do have a single contiguous memory region.
|
||||
@ -616,6 +600,6 @@ identified by the given id.
|
||||
Legacy BPF Packet access instructions
|
||||
-------------------------------------
|
||||
|
||||
eBPF previously introduced special instructions for access to packet data that were
|
||||
BPF previously introduced special instructions for access to packet data that were
|
||||
carried over from classic BPF. However, these instructions are
|
||||
deprecated and should no longer be used.
|
||||
|
@ -98,7 +98,7 @@ If you aren't subscribed to netdev and/or are simply unsure if
|
||||
repository link above for any new networking-related commits. You may
|
||||
also check the following website for the current status:
|
||||
|
||||
https://patchwork.hopto.org/net-next.html
|
||||
https://netdev.bots.linux.dev/net-next.html
|
||||
|
||||
The ``net`` tree continues to collect fixes for the vX.Y content, and is
|
||||
fed back to Linus at regular (~weekly) intervals. Meaning that the
|
||||
@ -120,7 +120,37 @@ queue for netdev:
|
||||
https://patchwork.kernel.org/project/netdevbpf/list/
|
||||
|
||||
The "State" field will tell you exactly where things are at with your
|
||||
patch. Patches are indexed by the ``Message-ID`` header of the emails
|
||||
patch:
|
||||
|
||||
================== =============================================================
|
||||
Patch state Description
|
||||
================== =============================================================
|
||||
New, Under review pending review, patch is in the maintainer’s queue for
|
||||
review; the two states are used interchangeably (depending on
|
||||
the exact co-maintainer handling patchwork at the time)
|
||||
Accepted patch was applied to the appropriate networking tree, this is
|
||||
usually set automatically by the pw-bot
|
||||
Needs ACK waiting for an ack from an area expert or testing
|
||||
Changes requested patch has not passed the review, new revision is expected
|
||||
with appropriate code and commit message changes
|
||||
Rejected patch has been rejected and new revision is not expected
|
||||
Not applicable patch is expected to be applied outside of the networking
|
||||
subsystem
|
||||
Awaiting upstream patch should be reviewed and handled by appropriate
|
||||
sub-maintainer, who will send it on to the networking trees;
|
||||
patches set to ``Awaiting upstream`` in netdev's patchwork
|
||||
will usually remain in this state, whether the sub-maintainer
|
||||
requested changes, accepted or rejected the patch
|
||||
Deferred patch needs to be reposted later, usually due to dependency
|
||||
or because it was posted for a closed tree
|
||||
Superseded new version of the patch was posted, usually set by the
|
||||
pw-bot
|
||||
RFC not to be applied, usually not in maintainer’s review queue,
|
||||
pw-bot can automatically set patches to this state based
|
||||
on subject tags
|
||||
================== =============================================================
|
||||
|
||||
Patches are indexed by the ``Message-ID`` header of the emails
|
||||
which carried them so if you have trouble finding your patch append
|
||||
the value of ``Message-ID`` to the URL above.
|
||||
|
||||
@ -155,7 +185,7 @@ must match the MAINTAINERS entry) and a handful of senior reviewers.
|
||||
|
||||
Bot records its activity here:
|
||||
|
||||
https://patchwork.hopto.org/pw-bot.html
|
||||
https://netdev.bots.linux.dev/pw-bot.html
|
||||
|
||||
Review timelines
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
@ -528,6 +528,8 @@ families may, however, require a larger buffer. 32kB buffer is recommended
|
||||
for most efficient handling of dumps (larger buffer fits more dumped
|
||||
objects and therefore fewer recvmsg() calls are needed).
|
||||
|
||||
.. _classic_netlink:
|
||||
|
||||
Classic Netlink
|
||||
===============
|
||||
|
||||
|
@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
|
||||
*/
|
||||
int r14_off; /* Offset of saved %r14 */
|
||||
int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
|
||||
int tccnt_off; /* Offset of saved tailcall counter */
|
||||
int do_fexit; /* do_fexit: label */
|
||||
};
|
||||
|
||||
@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
||||
tjit->r14_off = alloc_stack(tjit, sizeof(u64));
|
||||
tjit->run_ctx_off = alloc_stack(tjit,
|
||||
sizeof(struct bpf_tramp_run_ctx));
|
||||
tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
|
||||
/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
|
||||
tjit->stack_size -= STACK_FRAME_OVERHEAD;
|
||||
tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
|
||||
|
||||
/* aghi %r15,-stack_size */
|
||||
EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
|
||||
/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
|
||||
_EMIT6(0xd203f000 | tjit->tccnt_off,
|
||||
0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
|
||||
/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
|
||||
if (nr_reg_args)
|
||||
EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
|
||||
@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
||||
(nr_stack_args * sizeof(u64) - 1) << 16 |
|
||||
tjit->stack_args_off,
|
||||
0xf000 | tjit->orig_stack_args_off);
|
||||
/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
|
||||
_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
|
||||
/* lgr %r1,%r8 */
|
||||
EMIT4(0xb9040000, REG_1, REG_8);
|
||||
/* %r1() */
|
||||
@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
||||
if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
|
||||
tjit->retval_off);
|
||||
/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
|
||||
_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
|
||||
0xf000 | tjit->tccnt_off);
|
||||
/* aghi %r15,stack_size */
|
||||
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
|
||||
/* Emit an expoline for the following indirect jump. */
|
||||
|
@ -2335,13 +2335,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
|
||||
{
|
||||
struct ksz_device *dev = ds->priv;
|
||||
|
||||
if (dev->chip_id == KSZ8830_CHIP_ID) {
|
||||
switch (dev->chip_id) {
|
||||
case KSZ8830_CHIP_ID:
|
||||
/* Silicon Errata Sheet (DS80000830A):
|
||||
* Port 1 does not work with LinkMD Cable-Testing.
|
||||
* Port 1 does not respond to received PAUSE control frames.
|
||||
*/
|
||||
if (!port)
|
||||
return MICREL_KSZ8_P1_ERRATA;
|
||||
break;
|
||||
case KSZ9477_CHIP_ID:
|
||||
/* KSZ9477 Errata DS80000754C
|
||||
*
|
||||
* Module 4: Energy Efficient Ethernet (EEE) feature select must
|
||||
* be manually disabled
|
||||
* The EEE feature is enabled by default, but it is not fully
|
||||
* operational. It must be manually disabled through register
|
||||
* controls. If not disabled, the PHY ports can auto-negotiate
|
||||
* to enable EEE, and this feature can cause link drops when
|
||||
* linked to another device supporting EEE.
|
||||
*/
|
||||
return MICREL_NO_EEE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -132,6 +132,8 @@ struct sja1105_info {
|
||||
int max_frame_mem;
|
||||
int num_ports;
|
||||
bool multiple_cascade_ports;
|
||||
/* Every {port, TXQ} has its own CBS shaper */
|
||||
bool fixed_cbs_mapping;
|
||||
enum dsa_tag_protocol tag_proto;
|
||||
const struct sja1105_dynamic_table_ops *dyn_ops;
|
||||
const struct sja1105_table_ops *static_ops;
|
||||
|
@ -2115,11 +2115,36 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
|
||||
}
|
||||
|
||||
#define BYTES_PER_KBIT (1000LL / 8)
|
||||
/* Port 0 (the uC port) does not have CBS shapers */
|
||||
#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
|
||||
|
||||
static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
|
||||
int port, int prio)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (priv->info->fixed_cbs_mapping) {
|
||||
i = SJA1110_FIXED_CBS(port, prio);
|
||||
if (i >= 0 && i < priv->info->num_cbs_shapers)
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->info->num_cbs_shapers; i++)
|
||||
if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (priv->info->fixed_cbs_mapping)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < priv->info->num_cbs_shapers; i++)
|
||||
if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
|
||||
return i;
|
||||
@ -2150,14 +2175,20 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
|
||||
{
|
||||
struct sja1105_private *priv = ds->priv;
|
||||
struct sja1105_cbs_entry *cbs;
|
||||
s64 port_transmit_rate_kbps;
|
||||
int index;
|
||||
|
||||
if (!offload->enable)
|
||||
return sja1105_delete_cbs_shaper(priv, port, offload->queue);
|
||||
|
||||
index = sja1105_find_unused_cbs_shaper(priv);
|
||||
if (index < 0)
|
||||
return -ENOSPC;
|
||||
/* The user may be replacing an existing shaper */
|
||||
index = sja1105_find_cbs_shaper(priv, port, offload->queue);
|
||||
if (index < 0) {
|
||||
/* That isn't the case - see if we can allocate a new one */
|
||||
index = sja1105_find_unused_cbs_shaper(priv);
|
||||
if (index < 0)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
cbs = &priv->cbs[index];
|
||||
cbs->port = port;
|
||||
@ -2167,9 +2198,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
|
||||
*/
|
||||
cbs->credit_hi = offload->hicredit;
|
||||
cbs->credit_lo = abs(offload->locredit);
|
||||
/* User space is in kbits/sec, hardware in bytes/sec */
|
||||
cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
|
||||
cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
|
||||
/* User space is in kbits/sec, while the hardware in bytes/sec times
|
||||
* link speed. Since the given offload->sendslope is good only for the
|
||||
* current link speed anyway, and user space is likely to reprogram it
|
||||
* when that changes, don't even bother to track the port's link speed,
|
||||
* but deduce the port transmit rate from idleslope - sendslope.
|
||||
*/
|
||||
port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
|
||||
cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
|
||||
port_transmit_rate_kbps);
|
||||
cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
|
||||
port_transmit_rate_kbps);
|
||||
/* Convert the negative values from 64-bit 2's complement
|
||||
* to 32-bit 2's complement (for the case of 0x80000000 whose
|
||||
* negative is still negative).
|
||||
|
@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = {
|
||||
.tag_proto = DSA_TAG_PROTO_SJA1110,
|
||||
.can_limit_mcast_flood = true,
|
||||
.multiple_cascade_ports = true,
|
||||
.fixed_cbs_mapping = true,
|
||||
.ptp_ts_bits = 32,
|
||||
.ptpegr_ts_bytes = 8,
|
||||
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
|
||||
@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = {
|
||||
.tag_proto = DSA_TAG_PROTO_SJA1110,
|
||||
.can_limit_mcast_flood = true,
|
||||
.multiple_cascade_ports = true,
|
||||
.fixed_cbs_mapping = true,
|
||||
.ptp_ts_bits = 32,
|
||||
.ptpegr_ts_bytes = 8,
|
||||
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
|
||||
@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
|
||||
.tag_proto = DSA_TAG_PROTO_SJA1110,
|
||||
.can_limit_mcast_flood = true,
|
||||
.multiple_cascade_ports = true,
|
||||
.fixed_cbs_mapping = true,
|
||||
.ptp_ts_bits = 32,
|
||||
.ptpegr_ts_bytes = 8,
|
||||
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
|
||||
@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = {
|
||||
.tag_proto = DSA_TAG_PROTO_SJA1110,
|
||||
.can_limit_mcast_flood = true,
|
||||
.multiple_cascade_ports = true,
|
||||
.fixed_cbs_mapping = true,
|
||||
.ptp_ts_bits = 32,
|
||||
.ptpegr_ts_bytes = 8,
|
||||
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
|
||||
|
@ -1402,7 +1402,7 @@ static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
|
||||
return;
|
||||
|
||||
si = enetc_psi_create(pdev);
|
||||
if (si)
|
||||
if (!IS_ERR(si))
|
||||
enetc_psi_destroy(pdev);
|
||||
}
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
|
||||
|
@ -570,7 +570,10 @@ static int gve_rx_append_frags(struct napi_struct *napi,
|
||||
if (!skb)
|
||||
return -1;
|
||||
|
||||
skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
|
||||
if (rx->ctx.skb_tail == rx->ctx.skb_head)
|
||||
skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
|
||||
else
|
||||
rx->ctx.skb_tail->next = skb;
|
||||
rx->ctx.skb_tail = skb;
|
||||
num_frags = 0;
|
||||
}
|
||||
|
@ -814,6 +814,7 @@ struct hnae3_tc_info {
|
||||
u8 max_tc; /* Total number of TCs */
|
||||
u8 num_tc; /* Total number of enabled TCs */
|
||||
bool mqprio_active;
|
||||
bool dcb_ets_active;
|
||||
};
|
||||
|
||||
#define HNAE3_MAX_DSCP 64
|
||||
|
@ -1045,6 +1045,7 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
|
||||
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
|
||||
struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
|
||||
struct hnae3_knic_private_info *kinfo = &h->kinfo;
|
||||
struct net_device *dev = kinfo->netdev;
|
||||
|
||||
*pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n");
|
||||
*pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n",
|
||||
@ -1087,6 +1088,9 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
|
||||
dev_specs->mc_mac_size);
|
||||
*pos += scnprintf(buf + *pos, len - *pos, "MAC statistics number: %u\n",
|
||||
dev_specs->mac_stats_num);
|
||||
*pos += scnprintf(buf + *pos, len - *pos,
|
||||
"TX timeout threshold: %d seconds\n",
|
||||
dev->watchdog_timeo / HZ);
|
||||
}
|
||||
|
||||
static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
|
||||
@ -1411,9 +1415,9 @@ int hns3_dbg_init(struct hnae3_handle *handle)
|
||||
return 0;
|
||||
|
||||
out:
|
||||
mutex_destroy(&handle->dbgfs_lock);
|
||||
debugfs_remove_recursive(handle->hnae3_dbgfs);
|
||||
handle->hnae3_dbgfs = NULL;
|
||||
mutex_destroy(&handle->dbgfs_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1421,6 +1425,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
debugfs_remove_recursive(handle->hnae3_dbgfs);
|
||||
handle->hnae3_dbgfs = NULL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
|
||||
if (handle->dbgfs_buf[i]) {
|
||||
kvfree(handle->dbgfs_buf[i]);
|
||||
@ -1428,8 +1435,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
|
||||
}
|
||||
|
||||
mutex_destroy(&handle->dbgfs_lock);
|
||||
debugfs_remove_recursive(handle->hnae3_dbgfs);
|
||||
handle->hnae3_dbgfs = NULL;
|
||||
}
|
||||
|
||||
void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
|
||||
|
@ -2103,8 +2103,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
|
||||
*/
|
||||
if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
|
||||
!ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
|
||||
/* This smp_store_release() pairs with smp_load_aquire() in
|
||||
* hns3_nic_reclaim_desc(). Ensure that the BD valid bit
|
||||
* is updated.
|
||||
*/
|
||||
smp_store_release(&ring->last_to_use, ring->next_to_use);
|
||||
hns3_tx_push_bd(ring, num);
|
||||
WRITE_ONCE(ring->last_to_use, ring->next_to_use);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2115,6 +2119,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
|
||||
return;
|
||||
}
|
||||
|
||||
/* This smp_store_release() pairs with smp_load_aquire() in
|
||||
* hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated.
|
||||
*/
|
||||
smp_store_release(&ring->last_to_use, ring->next_to_use);
|
||||
|
||||
if (ring->tqp->mem_base)
|
||||
hns3_tx_mem_doorbell(ring);
|
||||
else
|
||||
@ -2122,7 +2131,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
|
||||
ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
|
||||
|
||||
ring->pending_buf = 0;
|
||||
WRITE_ONCE(ring->last_to_use, ring->next_to_use);
|
||||
}
|
||||
|
||||
static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
|
||||
@ -3308,8 +3316,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
|
||||
|
||||
netdev->priv_flags |= IFF_UNICAST_FLT;
|
||||
|
||||
netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
|
||||
|
||||
netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
|
||||
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
|
||||
NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
|
||||
@ -3563,9 +3569,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
|
||||
static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
|
||||
int *bytes, int *pkts, int budget)
|
||||
{
|
||||
/* pair with ring->last_to_use update in hns3_tx_doorbell(),
|
||||
* smp_store_release() is not used in hns3_tx_doorbell() because
|
||||
* the doorbell operation already have the needed barrier operation.
|
||||
/* This smp_load_acquire() pairs with smp_store_release() in
|
||||
* hns3_tx_doorbell().
|
||||
*/
|
||||
int ltu = smp_load_acquire(&ring->last_to_use);
|
||||
int ntc = ring->next_to_clean;
|
||||
|
@ -773,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
|
||||
hns3_get_ksettings(h, cmd);
|
||||
break;
|
||||
case HNAE3_MEDIA_TYPE_FIBER:
|
||||
if (module_type == HNAE3_MODULE_TYPE_CR)
|
||||
if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
|
||||
cmd->base.port = PORT_OTHER;
|
||||
else if (module_type == HNAE3_MODULE_TYPE_CR)
|
||||
cmd->base.port = PORT_DA;
|
||||
else
|
||||
cmd->base.port = PORT_FIBRE;
|
||||
|
@ -259,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
|
||||
int ret;
|
||||
|
||||
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
|
||||
hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
|
||||
h->kinfo.tc_info.mqprio_active)
|
||||
return -EINVAL;
|
||||
|
||||
ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
|
||||
@ -275,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
|
||||
}
|
||||
|
||||
hclge_tm_schd_info_update(hdev, num_tc);
|
||||
if (num_tc > 1)
|
||||
hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
|
||||
else
|
||||
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
|
||||
h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
|
||||
|
||||
ret = hclge_ieee_ets_to_tm_info(hdev, ets);
|
||||
if (ret)
|
||||
@ -487,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h)
|
||||
struct hclge_vport *vport = hclge_get_vport(h);
|
||||
struct hclge_dev *hdev = vport->back;
|
||||
|
||||
if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
|
||||
if (h->kinfo.tc_info.mqprio_active)
|
||||
return 0;
|
||||
|
||||
return hdev->dcbx_cap;
|
||||
@ -611,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
|
||||
if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
|
||||
return -EBUSY;
|
||||
|
||||
if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
|
||||
kinfo = &vport->nic.kinfo;
|
||||
if (kinfo->tc_info.dcb_ets_active)
|
||||
return -EINVAL;
|
||||
|
||||
ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
|
||||
@ -625,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kinfo = &vport->nic.kinfo;
|
||||
memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
|
||||
hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
|
||||
kinfo->tc_info.mqprio_active = tc > 0;
|
||||
@ -634,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
|
||||
if (ret)
|
||||
goto err_out;
|
||||
|
||||
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
|
||||
|
||||
if (tc > 1)
|
||||
hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
|
||||
else
|
||||
hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
|
||||
|
||||
return hclge_notify_init_up(hdev);
|
||||
|
||||
err_out:
|
||||
|
@ -1519,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
|
||||
struct hclge_desc desc[3];
|
||||
int pos = 0;
|
||||
int ret, i;
|
||||
u32 *req;
|
||||
__le32 *req;
|
||||
|
||||
hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
|
||||
desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
|
||||
@ -1544,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
|
||||
tcam_msg.loc);
|
||||
|
||||
/* tcam_data0 ~ tcam_data1 */
|
||||
req = (u32 *)req1->tcam_data;
|
||||
req = (__le32 *)req1->tcam_data;
|
||||
for (i = 0; i < 2; i++)
|
||||
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
|
||||
"%08x\n", *req++);
|
||||
"%08x\n", le32_to_cpu(*req++));
|
||||
|
||||
/* tcam_data2 ~ tcam_data7 */
|
||||
req = (u32 *)req2->tcam_data;
|
||||
req = (__le32 *)req2->tcam_data;
|
||||
for (i = 0; i < 6; i++)
|
||||
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
|
||||
"%08x\n", *req++);
|
||||
"%08x\n", le32_to_cpu(*req++));
|
||||
|
||||
/* tcam_data8 ~ tcam_data12 */
|
||||
req = (u32 *)req3->tcam_data;
|
||||
req = (__le32 *)req3->tcam_data;
|
||||
for (i = 0; i < 5; i++)
|
||||
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
|
||||
"%08x\n", *req++);
|
||||
"%08x\n", le32_to_cpu(*req++));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -11026,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
|
||||
|
||||
static void hclge_info_show(struct hclge_dev *hdev)
|
||||
{
|
||||
struct hnae3_handle *handle = &hdev->vport->nic;
|
||||
struct device *dev = &hdev->pdev->dev;
|
||||
|
||||
dev_info(dev, "PF info begin:\n");
|
||||
@ -11042,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev)
|
||||
dev_info(dev, "This is %s PF\n",
|
||||
hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
|
||||
dev_info(dev, "DCB %s\n",
|
||||
hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
|
||||
handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
|
||||
dev_info(dev, "MQPRIO %s\n",
|
||||
hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
|
||||
handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
|
||||
dev_info(dev, "Default tx spare buffer size: %u\n",
|
||||
hdev->tx_spare_buf_size);
|
||||
|
||||
|
@ -919,8 +919,6 @@ struct hclge_dev {
|
||||
|
||||
#define HCLGE_FLAG_MAIN BIT(0)
|
||||
#define HCLGE_FLAG_DCB_CAPABLE BIT(1)
|
||||
#define HCLGE_FLAG_DCB_ENABLE BIT(2)
|
||||
#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3)
|
||||
u32 flag;
|
||||
|
||||
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
|
||||
|
@ -34,11 +34,11 @@ struct igb_adapter;
|
||||
/* TX/RX descriptor defines */
|
||||
#define IGB_DEFAULT_TXD 256
|
||||
#define IGB_DEFAULT_TX_WORK 128
|
||||
#define IGB_MIN_TXD 80
|
||||
#define IGB_MIN_TXD 64
|
||||
#define IGB_MAX_TXD 4096
|
||||
|
||||
#define IGB_DEFAULT_RXD 256
|
||||
#define IGB_MIN_RXD 80
|
||||
#define IGB_MIN_RXD 64
|
||||
#define IGB_MAX_RXD 4096
|
||||
|
||||
#define IGB_DEFAULT_ITR 3 /* dynamic */
|
||||
|
@ -3933,8 +3933,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
|
||||
struct pci_dev *pdev = adapter->pdev;
|
||||
struct e1000_hw *hw = &adapter->hw;
|
||||
|
||||
/* Virtualization features not supported on i210 family. */
|
||||
if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
|
||||
/* Virtualization features not supported on i210 and 82580 family. */
|
||||
if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
|
||||
(hw->mac.type == e1000_82580))
|
||||
return;
|
||||
|
||||
/* Of the below we really only want the effect of getting
|
||||
|
@ -39,11 +39,11 @@ enum latency_range {
|
||||
/* Tx/Rx descriptor defines */
|
||||
#define IGBVF_DEFAULT_TXD 256
|
||||
#define IGBVF_MAX_TXD 4096
|
||||
#define IGBVF_MIN_TXD 80
|
||||
#define IGBVF_MIN_TXD 64
|
||||
|
||||
#define IGBVF_DEFAULT_RXD 256
|
||||
#define IGBVF_MAX_RXD 4096
|
||||
#define IGBVF_MIN_RXD 80
|
||||
#define IGBVF_MIN_RXD 64
|
||||
|
||||
#define IGBVF_MIN_ITR_USECS 10 /* 100000 irq/sec */
|
||||
#define IGBVF_MAX_ITR_USECS 10000 /* 100 irq/sec */
|
||||
|
@ -379,11 +379,11 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
|
||||
/* TX/RX descriptor defines */
|
||||
#define IGC_DEFAULT_TXD 256
|
||||
#define IGC_DEFAULT_TX_WORK 128
|
||||
#define IGC_MIN_TXD 80
|
||||
#define IGC_MIN_TXD 64
|
||||
#define IGC_MAX_TXD 4096
|
||||
|
||||
#define IGC_DEFAULT_RXD 256
|
||||
#define IGC_MIN_RXD 80
|
||||
#define IGC_MIN_RXD 64
|
||||
#define IGC_MAX_RXD 4096
|
||||
|
||||
/* Supported Rx Buffer Sizes */
|
||||
|
@ -846,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
|
||||
u16 *smq, u16 *smq_mask)
|
||||
{
|
||||
struct nix_cn10k_aq_enq_req *aq_req;
|
||||
|
||||
if (!is_rvu_otx2(rvu)) {
|
||||
aq_req = (struct nix_cn10k_aq_enq_req *)req;
|
||||
*smq = aq_req->sq.smq;
|
||||
*smq_mask = aq_req->sq_mask.smq;
|
||||
} else {
|
||||
*smq = req->sq.smq;
|
||||
*smq_mask = req->sq_mask.smq;
|
||||
}
|
||||
}
|
||||
|
||||
static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
|
||||
struct nix_aq_enq_req *req,
|
||||
struct nix_aq_enq_rsp *rsp)
|
||||
@ -857,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
|
||||
struct rvu_block *block;
|
||||
struct admin_queue *aq;
|
||||
struct rvu_pfvf *pfvf;
|
||||
u16 smq, smq_mask;
|
||||
void *ctx, *mask;
|
||||
bool ena;
|
||||
u64 cfg;
|
||||
@ -928,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
|
||||
/* Check if SQ pointed SMQ belongs to this PF/VF or not */
|
||||
if (req->ctype == NIX_AQ_CTYPE_SQ &&
|
||||
((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
|
||||
(req->op == NIX_AQ_INSTOP_WRITE &&
|
||||
req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
|
||||
req->sq_mask.ena && req->sq.ena && smq_mask))) {
|
||||
if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
|
||||
pcifunc, req->sq.smq))
|
||||
pcifunc, smq))
|
||||
return NIX_AF_ERR_AQ_ENQUEUE;
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (mlx5e_is_eswitch_flow(parse_state->flow))
|
||||
if (mlx5e_is_eswitch_flow(parse_state->flow)) {
|
||||
attr->esw_attr->split_count = attr->esw_attr->out_count;
|
||||
parse_state->if_count = 0;
|
||||
}
|
||||
|
||||
attr->flags |= MLX5_ATTR_FLAG_CT;
|
||||
|
||||
|
@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
parse_state->if_count = 0;
|
||||
esw_attr->out_count++;
|
||||
return 0;
|
||||
}
|
||||
|
@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
|
||||
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
|
||||
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
|
||||
esw_attr->split_count = esw_attr->out_count;
|
||||
parse_state->if_count = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
parse_state->if_count = 0;
|
||||
esw_attr->out_count++;
|
||||
|
||||
return 0;
|
||||
|
@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
return err;
|
||||
|
||||
esw_attr->split_count = esw_attr->out_count;
|
||||
parse_state->if_count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
|
||||
attr->esw_attr->split_count = attr->esw_attr->out_count;
|
||||
parse_state->if_count = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3936,6 +3936,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
|
||||
}
|
||||
|
||||
i_split = i + 1;
|
||||
parse_state->if_count = 0;
|
||||
list_add(&attr->list, &flow->attrs);
|
||||
}
|
||||
|
||||
|
@ -1276,12 +1276,19 @@ int
|
||||
mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
|
||||
enum mlx5_eswitch_vport_event enabled_events)
|
||||
{
|
||||
bool pf_needed;
|
||||
int ret;
|
||||
|
||||
pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) ||
|
||||
esw->mode == MLX5_ESWITCH_LEGACY;
|
||||
|
||||
/* Enable PF vport */
|
||||
ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (pf_needed) {
|
||||
ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
|
||||
enabled_events);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Enable external host PF HCA */
|
||||
ret = host_pf_enable_hca(esw->dev);
|
||||
@ -1317,7 +1324,8 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
|
||||
ecpf_err:
|
||||
host_pf_disable_hca(esw->dev);
|
||||
pf_hca_err:
|
||||
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
|
||||
if (pf_needed)
|
||||
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1335,7 +1343,10 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
|
||||
}
|
||||
|
||||
host_pf_disable_hca(esw->dev);
|
||||
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
|
||||
|
||||
if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
|
||||
esw->mode == MLX5_ESWITCH_LEGACY)
|
||||
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
|
||||
}
|
||||
|
||||
static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
|
||||
|
@ -3216,26 +3216,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
|
||||
esw_acl_ingress_ofld_cleanup(esw, vport);
|
||||
}
|
||||
|
||||
static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
|
||||
static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
|
||||
{
|
||||
struct mlx5_vport *vport;
|
||||
struct mlx5_vport *uplink, *manager;
|
||||
int ret;
|
||||
|
||||
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
|
||||
if (IS_ERR(vport))
|
||||
return PTR_ERR(vport);
|
||||
uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
|
||||
if (IS_ERR(uplink))
|
||||
return PTR_ERR(uplink);
|
||||
|
||||
return esw_vport_create_offloads_acl_tables(esw, vport);
|
||||
ret = esw_vport_create_offloads_acl_tables(esw, uplink);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
|
||||
if (IS_ERR(manager)) {
|
||||
ret = PTR_ERR(manager);
|
||||
goto err_manager;
|
||||
}
|
||||
|
||||
ret = esw_vport_create_offloads_acl_tables(esw, manager);
|
||||
if (ret)
|
||||
goto err_manager;
|
||||
|
||||
return 0;
|
||||
|
||||
err_manager:
|
||||
esw_vport_destroy_offloads_acl_tables(esw, uplink);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
|
||||
static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
|
||||
{
|
||||
struct mlx5_vport *vport;
|
||||
|
||||
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
|
||||
if (IS_ERR(vport))
|
||||
return;
|
||||
vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
|
||||
if (!IS_ERR(vport))
|
||||
esw_vport_destroy_offloads_acl_tables(esw, vport);
|
||||
|
||||
esw_vport_destroy_offloads_acl_tables(esw, vport);
|
||||
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
|
||||
if (!IS_ERR(vport))
|
||||
esw_vport_destroy_offloads_acl_tables(esw, vport);
|
||||
}
|
||||
|
||||
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
|
||||
@ -3280,7 +3301,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
|
||||
}
|
||||
esw->fdb_table.offloads.indir = indir;
|
||||
|
||||
err = esw_create_uplink_offloads_acl_tables(esw);
|
||||
err = esw_create_offloads_acl_tables(esw);
|
||||
if (err)
|
||||
goto create_acl_err;
|
||||
|
||||
@ -3321,7 +3342,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
|
||||
create_restore_err:
|
||||
esw_destroy_offloads_table(esw);
|
||||
create_offloads_err:
|
||||
esw_destroy_uplink_offloads_acl_tables(esw);
|
||||
esw_destroy_offloads_acl_tables(esw);
|
||||
create_acl_err:
|
||||
mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
|
||||
create_indir_err:
|
||||
@ -3337,7 +3358,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
|
||||
esw_destroy_offloads_fdb_tables(esw);
|
||||
esw_destroy_restore_table(esw);
|
||||
esw_destroy_offloads_table(esw);
|
||||
esw_destroy_uplink_offloads_acl_tables(esw);
|
||||
esw_destroy_offloads_acl_tables(esw);
|
||||
mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
|
||||
mutex_destroy(&esw->fdb_table.offloads.vports.lock);
|
||||
}
|
||||
|
@ -359,26 +359,36 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
|
||||
/* Handle a received packet. Second half: Touches packet payload. */
|
||||
void __efx_rx_packet(struct efx_channel *channel)
|
||||
{
|
||||
struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
|
||||
struct efx_nic *efx = channel->efx;
|
||||
struct efx_rx_buffer *rx_buf =
|
||||
efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
|
||||
efx_rx_buffer(rx_queue, channel->rx_pkt_index);
|
||||
u8 *eh = efx_rx_buf_va(rx_buf);
|
||||
|
||||
/* Read length from the prefix if necessary. This already
|
||||
* excludes the length of the prefix itself.
|
||||
*/
|
||||
if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
|
||||
if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) {
|
||||
rx_buf->len = le16_to_cpup((__le16 *)
|
||||
(eh + efx->rx_packet_len_offset));
|
||||
/* A known issue may prevent this being filled in;
|
||||
* if that happens, just drop the packet.
|
||||
* Must do that in the driver since passing a zero-length
|
||||
* packet up to the stack may cause a crash.
|
||||
*/
|
||||
if (unlikely(!rx_buf->len)) {
|
||||
efx_free_rx_buffers(rx_queue, rx_buf,
|
||||
channel->rx_pkt_n_frags);
|
||||
channel->n_rx_frm_trunc++;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we're in loopback test, then pass the packet directly to the
|
||||
* loopback layer, and free the rx_buf here
|
||||
*/
|
||||
if (unlikely(efx->loopback_selftest)) {
|
||||
struct efx_rx_queue *rx_queue;
|
||||
|
||||
efx_loopback_rx_packet(efx, eh, rx_buf->len);
|
||||
rx_queue = efx_channel_get_rx_queue(channel);
|
||||
efx_free_rx_buffers(rx_queue, rx_buf,
|
||||
channel->rx_pkt_n_frags);
|
||||
goto out;
|
||||
|
@ -419,9 +419,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
|
||||
return ERR_PTR(phy_mode);
|
||||
|
||||
plat->phy_interface = phy_mode;
|
||||
plat->mac_interface = stmmac_of_get_mac_mode(np);
|
||||
if (plat->mac_interface < 0)
|
||||
plat->mac_interface = plat->phy_interface;
|
||||
rc = stmmac_of_get_mac_mode(np);
|
||||
plat->mac_interface = rc < 0 ? plat->phy_interface : rc;
|
||||
|
||||
/* Some wrapper drivers still rely on phy_node. Let's save it while
|
||||
* they are not converted to phylink. */
|
||||
|
@ -1330,8 +1330,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
|
||||
struct crypto_aead *tfm;
|
||||
int ret;
|
||||
|
||||
/* Pick a sync gcm(aes) cipher to ensure order is preserved. */
|
||||
tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
|
||||
tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
|
||||
|
||||
if (IS_ERR(tfm))
|
||||
return tfm;
|
||||
|
@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
|
||||
/* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
|
||||
{0x1c, 0x04, 0x00d0},
|
||||
|
||||
/* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
|
||||
{0x07, 0x3c, 0x0000},
|
||||
|
||||
/* Register settings are required to meet data sheet supply current specifications */
|
||||
{0x1c, 0x13, 0x6eff},
|
||||
{0x1c, 0x14, 0xe6ff},
|
||||
@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
|
||||
* in this switch shall be regarded as broken.
|
||||
*/
|
||||
if (phydev->dev_flags & MICREL_NO_EEE)
|
||||
phydev->eee_broken_modes = -1;
|
||||
|
||||
err = genphy_restart_aneg(phydev);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -344,6 +344,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
|
||||
struct veth_rq *rq = NULL;
|
||||
int ret = NETDEV_TX_OK;
|
||||
struct net_device *rcv;
|
||||
int length = skb->len;
|
||||
bool use_napi = false;
|
||||
@ -378,11 +379,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
} else {
|
||||
drop:
|
||||
atomic64_inc(&priv->dropped);
|
||||
ret = NET_XMIT_DROP;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return NETDEV_TX_OK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
|
||||
|
@ -336,6 +336,7 @@ MODULE_DEVICE_TABLE(of, of_nxp_nci_i2c_match);
|
||||
#ifdef CONFIG_ACPI
|
||||
static const struct acpi_device_id acpi_id[] = {
|
||||
{ "NXP1001" },
|
||||
{ "NXP1002" },
|
||||
{ "NXP7471" },
|
||||
{ }
|
||||
};
|
||||
|
@ -117,6 +117,8 @@ enum audit_nfcfgop {
|
||||
AUDIT_NFT_OP_OBJ_RESET,
|
||||
AUDIT_NFT_OP_FLOWTABLE_REGISTER,
|
||||
AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
|
||||
AUDIT_NFT_OP_SETELEM_RESET,
|
||||
AUDIT_NFT_OP_RULE_RESET,
|
||||
AUDIT_NFT_OP_INVALID,
|
||||
};
|
||||
|
||||
|
@ -438,7 +438,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
|
||||
|
||||
size /= sizeof(long);
|
||||
while (size--)
|
||||
*ldst++ = *lsrc++;
|
||||
data_race(*ldst++ = *lsrc++);
|
||||
}
|
||||
|
||||
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
|
||||
|
@ -147,6 +147,7 @@ struct inet6_skb_parm {
|
||||
#define IP6SKB_JUMBOGRAM 128
|
||||
#define IP6SKB_SEG6 256
|
||||
#define IP6SKB_FAKEJUMBO 512
|
||||
#define IP6SKB_MULTIPATH 1024
|
||||
};
|
||||
|
||||
#if defined(CONFIG_NET_L3_MASTER_DEV)
|
||||
|
@ -41,9 +41,10 @@
|
||||
#define PHY_ID_KSZ9477 0x00221631
|
||||
|
||||
/* struct phy_device dev_flags definitions */
|
||||
#define MICREL_PHY_50MHZ_CLK 0x00000001
|
||||
#define MICREL_PHY_FXEN 0x00000002
|
||||
#define MICREL_KSZ8_P1_ERRATA 0x00000003
|
||||
#define MICREL_PHY_50MHZ_CLK BIT(0)
|
||||
#define MICREL_PHY_FXEN BIT(1)
|
||||
#define MICREL_KSZ8_P1_ERRATA BIT(2)
|
||||
#define MICREL_NO_EEE BIT(3)
|
||||
|
||||
#define MICREL_KSZ9021_EXTREG_CTRL 0xB
|
||||
#define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC
|
||||
|
@ -600,7 +600,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
|
||||
*
|
||||
* The %neg_mode argument should be tested via the phylink_mode_*() family of
|
||||
* functions, or for PCS that set pcs->neg_mode true, should be tested
|
||||
* against the %PHYLINK_PCS_NEG_* definitions.
|
||||
* against the PHYLINK_PCS_NEG_* definitions.
|
||||
*/
|
||||
int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
|
||||
phy_interface_t interface, const unsigned long *advertising,
|
||||
@ -630,7 +630,7 @@ void pcs_an_restart(struct phylink_pcs *pcs);
|
||||
*
|
||||
* The %mode argument should be tested via the phylink_mode_*() family of
|
||||
* functions, or for PCS that set pcs->neg_mode true, should be tested
|
||||
* against the %PHYLINK_PCS_NEG_* definitions.
|
||||
* against the PHYLINK_PCS_NEG_* definitions.
|
||||
*/
|
||||
void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
|
||||
phy_interface_t interface, int speed, int duplex);
|
||||
|
@ -57,6 +57,7 @@ struct inet_skb_parm {
|
||||
#define IPSKB_FRAG_PMTU BIT(6)
|
||||
#define IPSKB_L3SLAVE BIT(7)
|
||||
#define IPSKB_NOPOLICY BIT(8)
|
||||
#define IPSKB_MULTIPATH BIT(9)
|
||||
|
||||
u16 frag_max_size;
|
||||
};
|
||||
@ -94,7 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
|
||||
ipcm_init(ipcm);
|
||||
|
||||
ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
|
||||
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
|
||||
ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
|
||||
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
|
||||
ipcm->addr = inet->inet_saddr;
|
||||
ipcm->protocol = inet->inet_num;
|
||||
|
@ -642,7 +642,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
|
||||
if (!net->ipv6.fib6_rules_require_fldissect)
|
||||
return false;
|
||||
|
||||
skb_flow_dissect_flow_keys(skb, flkeys, flag);
|
||||
memset(flkeys, 0, sizeof(*flkeys));
|
||||
__skb_flow_dissect(net, skb, &flow_keys_dissector,
|
||||
flkeys, NULL, 0, 0, 0, flag);
|
||||
|
||||
fl6->fl6_sport = flkeys->ports.src;
|
||||
fl6->fl6_dport = flkeys->ports.dst;
|
||||
fl6->flowi6_proto = flkeys->basic.ip_proto;
|
||||
|
@ -418,7 +418,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
|
||||
if (!net->ipv4.fib_rules_require_fldissect)
|
||||
return false;
|
||||
|
||||
skb_flow_dissect_flow_keys(skb, flkeys, flag);
|
||||
memset(flkeys, 0, sizeof(*flkeys));
|
||||
__skb_flow_dissect(net, skb, &flow_keys_dissector,
|
||||
flkeys, NULL, 0, 0, 0, flag);
|
||||
|
||||
fl4->fl4_sport = flkeys->ports.src;
|
||||
fl4->fl4_dport = flkeys->ports.dst;
|
||||
fl4->flowi4_proto = flkeys->basic.ip_proto;
|
||||
|
@ -483,15 +483,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
|
||||
u64_stats_inc(&tstats->tx_packets);
|
||||
u64_stats_update_end(&tstats->syncp);
|
||||
put_cpu_ptr(tstats);
|
||||
} else {
|
||||
struct net_device_stats *err_stats = &dev->stats;
|
||||
return;
|
||||
}
|
||||
|
||||
if (pkt_len < 0) {
|
||||
err_stats->tx_errors++;
|
||||
err_stats->tx_aborted_errors++;
|
||||
} else {
|
||||
err_stats->tx_dropped++;
|
||||
}
|
||||
if (pkt_len < 0) {
|
||||
DEV_STATS_INC(dev, tx_errors);
|
||||
DEV_STATS_INC(dev, tx_aborted_errors);
|
||||
} else {
|
||||
DEV_STATS_INC(dev, tx_dropped);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/pid.h>
|
||||
#include <linux/nsproxy.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <net/compat.h>
|
||||
|
||||
/* Well, we should have at least one descriptor open
|
||||
* to accept passed FDs 8)
|
||||
@ -123,14 +124,17 @@ static inline bool scm_has_secdata(struct socket *sock)
|
||||
static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
|
||||
{
|
||||
struct file *pidfd_file = NULL;
|
||||
int pidfd;
|
||||
int len, pidfd;
|
||||
|
||||
/*
|
||||
* put_cmsg() doesn't return an error if CMSG is truncated,
|
||||
/* put_cmsg() doesn't return an error if CMSG is truncated,
|
||||
* that's why we need to opencode these checks here.
|
||||
*/
|
||||
if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
|
||||
(msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
|
||||
if (msg->msg_flags & MSG_CMSG_COMPAT)
|
||||
len = sizeof(struct compat_cmsghdr) + sizeof(int);
|
||||
else
|
||||
len = sizeof(struct cmsghdr) + sizeof(int);
|
||||
|
||||
if (msg->msg_controllen < len) {
|
||||
msg->msg_flags |= MSG_CTRUNC;
|
||||
return;
|
||||
}
|
||||
|
@ -1053,6 +1053,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val)
|
||||
WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
|
||||
}
|
||||
|
||||
static inline void sk_forward_alloc_add(struct sock *sk, int val)
|
||||
{
|
||||
/* Paired with lockless reads of sk->sk_forward_alloc */
|
||||
WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
|
||||
}
|
||||
|
||||
void sk_stream_write_space(struct sock *sk);
|
||||
|
||||
/* OOB backlog add */
|
||||
@ -1377,7 +1383,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk)
|
||||
if (sk->sk_prot->forward_alloc_get)
|
||||
return sk->sk_prot->forward_alloc_get(sk);
|
||||
#endif
|
||||
return sk->sk_forward_alloc;
|
||||
return READ_ONCE(sk->sk_forward_alloc);
|
||||
}
|
||||
|
||||
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
|
||||
@ -1673,14 +1679,14 @@ static inline void sk_mem_charge(struct sock *sk, int size)
|
||||
{
|
||||
if (!sk_has_account(sk))
|
||||
return;
|
||||
sk->sk_forward_alloc -= size;
|
||||
sk_forward_alloc_add(sk, -size);
|
||||
}
|
||||
|
||||
static inline void sk_mem_uncharge(struct sock *sk, int size)
|
||||
{
|
||||
if (!sk_has_account(sk))
|
||||
return;
|
||||
sk->sk_forward_alloc += size;
|
||||
sk_forward_alloc_add(sk, size);
|
||||
sk_mem_reclaim(sk);
|
||||
}
|
||||
|
||||
@ -1900,7 +1906,9 @@ struct sockcm_cookie {
|
||||
static inline void sockcm_init(struct sockcm_cookie *sockc,
|
||||
const struct sock *sk)
|
||||
{
|
||||
*sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
|
||||
*sockc = (struct sockcm_cookie) {
|
||||
.tsflags = READ_ONCE(sk->sk_tsflags)
|
||||
};
|
||||
}
|
||||
|
||||
int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
|
||||
@ -2695,9 +2703,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
|
||||
static inline void
|
||||
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
ktime_t kt = skb->tstamp;
|
||||
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
|
||||
|
||||
u32 tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
ktime_t kt = skb->tstamp;
|
||||
/*
|
||||
* generate control messages if
|
||||
* - receive time stamping in software requested
|
||||
@ -2705,10 +2713,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
|
||||
* - hardware time stamps available and wanted
|
||||
*/
|
||||
if (sock_flag(sk, SOCK_RCVTSTAMP) ||
|
||||
(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
|
||||
(kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
|
||||
(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
|
||||
(kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
|
||||
(hwtstamps->hwtstamp &&
|
||||
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
|
||||
(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
|
||||
__sock_recv_timestamp(msg, sk, skb);
|
||||
else
|
||||
sock_write_timestamp(sk, kt);
|
||||
@ -2730,7 +2738,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
|
||||
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
|
||||
SOF_TIMESTAMPING_RAW_HARDWARE)
|
||||
|
||||
if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
|
||||
if (sk->sk_flags & FLAGS_RECV_CMSGS ||
|
||||
READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
|
||||
__sock_recv_cmsgs(msg, sk, skb);
|
||||
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
|
||||
sock_write_timestamp(sk, skb->tstamp);
|
||||
|
@ -263,6 +263,7 @@ enum nft_chain_attributes {
|
||||
* @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
|
||||
* @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
|
||||
* @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32)
|
||||
* @NFTA_RULE_CHAIN_ID: add the rule to chain by ID, alternative to @NFTA_RULE_CHAIN (NLA_U32)
|
||||
*/
|
||||
enum nft_rule_attributes {
|
||||
NFTA_RULE_UNSPEC,
|
||||
|
@ -143,6 +143,8 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
|
||||
{ AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" },
|
||||
{ AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" },
|
||||
{ AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" },
|
||||
{ AUDIT_NFT_OP_SETELEM_RESET, "nft_reset_setelem" },
|
||||
{ AUDIT_NFT_OP_RULE_RESET, "nft_reset_rule" },
|
||||
{ AUDIT_NFT_OP_INVALID, "nft_invalid" },
|
||||
};
|
||||
|
||||
|
@ -553,7 +553,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *old_sdata = NULL;
|
||||
struct bpf_local_storage_elem *selem = NULL;
|
||||
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
|
||||
struct bpf_local_storage *local_storage;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
@ -607,11 +607,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
}
|
||||
}
|
||||
|
||||
if (gfp_flags == GFP_KERNEL) {
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
/* A lookup has just been done before and concluded a new selem is
|
||||
* needed. The chance of an unnecessary alloc is unlikely.
|
||||
*/
|
||||
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
if (!alloc_selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
|
||||
@ -623,13 +624,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
* simple.
|
||||
*/
|
||||
err = -EAGAIN;
|
||||
goto unlock_err;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
|
||||
err = check_flags(old_sdata, map_flags);
|
||||
if (err)
|
||||
goto unlock_err;
|
||||
goto unlock;
|
||||
|
||||
if (old_sdata && (map_flags & BPF_F_LOCK)) {
|
||||
copy_map_value_locked(&smap->map, old_sdata->data, value,
|
||||
@ -638,23 +639,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (gfp_flags != GFP_KERNEL) {
|
||||
/* local_storage->lock is held. Hence, we are sure
|
||||
* we can unlink and uncharge the old_sdata successfully
|
||||
* later. Hence, instead of charging the new selem now
|
||||
* and then uncharge the old selem later (which may cause
|
||||
* a potential but unnecessary charge failure), avoid taking
|
||||
* a charge at all here (the "!old_sdata" check) and the
|
||||
* old_sdata will not be uncharged later during
|
||||
* bpf_selem_unlink_storage_nolock().
|
||||
*/
|
||||
selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
|
||||
if (!selem) {
|
||||
err = -ENOMEM;
|
||||
goto unlock_err;
|
||||
}
|
||||
}
|
||||
|
||||
alloc_selem = NULL;
|
||||
/* First, link the new selem to the map */
|
||||
bpf_selem_link_map(smap, selem);
|
||||
|
||||
@ -665,20 +650,16 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (old_sdata) {
|
||||
bpf_selem_unlink_map(SELEM(old_sdata));
|
||||
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
|
||||
false, false);
|
||||
true, false);
|
||||
}
|
||||
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
return SDATA(selem);
|
||||
|
||||
unlock_err:
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
if (selem) {
|
||||
if (alloc_selem) {
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
bpf_selem_free(selem, smap, true);
|
||||
bpf_selem_free(alloc_selem, smap, true);
|
||||
}
|
||||
return ERR_PTR(err);
|
||||
return err ? ERR_PTR(err) : SDATA(selem);
|
||||
}
|
||||
|
||||
static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
|
||||
@ -779,7 +760,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
|
||||
* of the loop will set the free_cgroup_storage to true.
|
||||
*/
|
||||
free_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false, true);
|
||||
local_storage, selem, true, true);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
|
@ -5502,9 +5502,9 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
|
||||
}
|
||||
|
||||
run_ctx.bpf_cookie = 0;
|
||||
run_ctx.saved_run_ctx = NULL;
|
||||
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
|
||||
/* recursion detected */
|
||||
__bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
|
||||
bpf_prog_put(prog);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
@ -926,13 +926,12 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
|
||||
migrate_disable();
|
||||
might_fault();
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
bpf_prog_inc_misses_counter(prog);
|
||||
return 0;
|
||||
}
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
|
@ -543,6 +543,7 @@ struct bpf_fentry_test_t {
|
||||
|
||||
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
|
||||
{
|
||||
asm volatile ("");
|
||||
return (long)arg;
|
||||
}
|
||||
|
||||
|
@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
|
||||
struct sock_exterr_skb *serr;
|
||||
struct sk_buff *skb;
|
||||
char *state = "UNK";
|
||||
u32 tsflags;
|
||||
int err;
|
||||
|
||||
jsk = j1939_sk(sk);
|
||||
@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
|
||||
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
|
||||
return;
|
||||
|
||||
tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
switch (type) {
|
||||
case J1939_ERRQUEUE_TX_ACK:
|
||||
if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
|
||||
if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
|
||||
return;
|
||||
break;
|
||||
case J1939_ERRQUEUE_TX_SCHED:
|
||||
if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
|
||||
if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
|
||||
return;
|
||||
break;
|
||||
case J1939_ERRQUEUE_TX_ABORT:
|
||||
@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
|
||||
case J1939_ERRQUEUE_RX_DPO:
|
||||
fallthrough;
|
||||
case J1939_ERRQUEUE_RX_ABORT:
|
||||
if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
|
||||
if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
|
||||
}
|
||||
|
||||
serr->opt_stats = true;
|
||||
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
|
||||
if (tsflags & SOF_TIMESTAMPING_OPT_ID)
|
||||
serr->ee.ee_data = session->tskey;
|
||||
|
||||
netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
|
||||
|
@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
|
||||
|
||||
memset(&keys, 0, sizeof(keys));
|
||||
__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
|
||||
&keys, NULL, 0, 0, 0,
|
||||
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
|
||||
&keys, NULL, 0, 0, 0, 0);
|
||||
|
||||
return __flow_hash_from_keys(&keys, &hashrnd);
|
||||
}
|
||||
|
@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
|
||||
bool *pfmemalloc)
|
||||
{
|
||||
bool ret_pfmemalloc = false;
|
||||
unsigned int obj_size;
|
||||
size_t obj_size;
|
||||
void *obj;
|
||||
|
||||
obj_size = SKB_HEAD_ALIGN(*size);
|
||||
@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
|
||||
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
|
||||
goto out;
|
||||
}
|
||||
*size = obj_size = kmalloc_size_roundup(obj_size);
|
||||
|
||||
obj_size = kmalloc_size_roundup(obj_size);
|
||||
/* The following cast might truncate high-order bits of obj_size, this
|
||||
* is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
|
||||
*/
|
||||
*size = (unsigned int)obj_size;
|
||||
|
||||
/*
|
||||
* Try a regular allocation, when that fails and we're not entitled
|
||||
* to the reserves, fail.
|
||||
@ -4423,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
struct sk_buff *segs = NULL;
|
||||
struct sk_buff *tail = NULL;
|
||||
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
|
||||
skb_frag_t *frag = skb_shinfo(head_skb)->frags;
|
||||
unsigned int mss = skb_shinfo(head_skb)->gso_size;
|
||||
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
|
||||
struct sk_buff *frag_skb = head_skb;
|
||||
unsigned int offset = doffset;
|
||||
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
|
||||
unsigned int partial_segs = 0;
|
||||
unsigned int headroom;
|
||||
unsigned int len = head_skb->len;
|
||||
struct sk_buff *frag_skb;
|
||||
skb_frag_t *frag;
|
||||
__be16 proto;
|
||||
bool csum, sg;
|
||||
int nfrags = skb_shinfo(head_skb)->nr_frags;
|
||||
int err = -ENOMEM;
|
||||
int i = 0;
|
||||
int pos;
|
||||
int nfrags, pos;
|
||||
|
||||
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
|
||||
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
|
||||
@ -4514,6 +4519,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
headroom = skb_headroom(head_skb);
|
||||
pos = skb_headlen(head_skb);
|
||||
|
||||
if (skb_orphan_frags(head_skb, GFP_ATOMIC))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
nfrags = skb_shinfo(head_skb)->nr_frags;
|
||||
frag = skb_shinfo(head_skb)->frags;
|
||||
frag_skb = head_skb;
|
||||
|
||||
do {
|
||||
struct sk_buff *nskb;
|
||||
skb_frag_t *nskb_frag;
|
||||
@ -4534,6 +4546,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
(skb_headlen(list_skb) == len || sg)) {
|
||||
BUG_ON(skb_headlen(list_skb) > len);
|
||||
|
||||
nskb = skb_clone(list_skb, GFP_ATOMIC);
|
||||
if (unlikely(!nskb))
|
||||
goto err;
|
||||
|
||||
i = 0;
|
||||
nfrags = skb_shinfo(list_skb)->nr_frags;
|
||||
frag = skb_shinfo(list_skb)->frags;
|
||||
@ -4552,12 +4568,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
frag++;
|
||||
}
|
||||
|
||||
nskb = skb_clone(list_skb, GFP_ATOMIC);
|
||||
list_skb = list_skb->next;
|
||||
|
||||
if (unlikely(!nskb))
|
||||
goto err;
|
||||
|
||||
if (unlikely(pskb_trim(nskb, len))) {
|
||||
kfree_skb(nskb);
|
||||
goto err;
|
||||
@ -4633,12 +4645,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
|
||||
SKBFL_SHARED_FRAG;
|
||||
|
||||
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
|
||||
skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
|
||||
if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
|
||||
goto err;
|
||||
|
||||
while (pos < offset + len) {
|
||||
if (i >= nfrags) {
|
||||
if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
|
||||
skb_zerocopy_clone(nskb, list_skb,
|
||||
GFP_ATOMIC))
|
||||
goto err;
|
||||
|
||||
i = 0;
|
||||
nfrags = skb_shinfo(list_skb)->nr_frags;
|
||||
frag = skb_shinfo(list_skb)->frags;
|
||||
@ -4652,10 +4668,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
|
||||
i--;
|
||||
frag--;
|
||||
}
|
||||
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
|
||||
skb_zerocopy_clone(nskb, frag_skb,
|
||||
GFP_ATOMIC))
|
||||
goto err;
|
||||
|
||||
list_skb = list_skb->next;
|
||||
}
|
||||
@ -5207,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
|
||||
serr->ee.ee_info = tstype;
|
||||
serr->opt_stats = opt_stats;
|
||||
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
|
||||
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
|
||||
if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
|
||||
serr->ee.ee_data = skb_shinfo(skb)->tskey;
|
||||
if (sk_is_tcp(sk))
|
||||
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
|
||||
@ -5263,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
bool tsonly, opt_stats = false;
|
||||
u32 tsflags;
|
||||
|
||||
if (!sk)
|
||||
return;
|
||||
|
||||
if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
|
||||
tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
|
||||
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
|
||||
return;
|
||||
|
||||
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
|
||||
tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
|
||||
if (!skb_may_tx_timestamp(sk, tsonly))
|
||||
return;
|
||||
|
||||
if (tsonly) {
|
||||
#ifdef CONFIG_INET
|
||||
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
|
||||
if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
|
||||
sk_is_tcp(sk)) {
|
||||
skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
|
||||
ack_skb);
|
||||
|
@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
|
||||
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
u32 off, u32 len, bool ingress)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!ingress) {
|
||||
if (!sock_writeable(psock->sk))
|
||||
return -EAGAIN;
|
||||
return skb_send_sock(psock->sk, skb, off, len);
|
||||
}
|
||||
return sk_psock_skb_ingress(psock, skb, off, len);
|
||||
skb_get(skb);
|
||||
err = sk_psock_skb_ingress(psock, skb, off, len);
|
||||
if (err < 0)
|
||||
kfree_skb(skb);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void sk_psock_skb_state(struct sk_psock *psock,
|
||||
@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
|
||||
} while (len);
|
||||
|
||||
skb = skb_dequeue(&psock->ingress_skb);
|
||||
if (!ingress) {
|
||||
kfree_skb(skb);
|
||||
}
|
||||
kfree_skb(skb);
|
||||
}
|
||||
end:
|
||||
mutex_unlock(&psock->work_mutex);
|
||||
|
@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk)
|
||||
return false;
|
||||
if (!sk)
|
||||
return true;
|
||||
switch (sk->sk_family) {
|
||||
/* IPV6_ADDRFORM can change sk->sk_family under us. */
|
||||
switch (READ_ONCE(sk->sk_family)) {
|
||||
case AF_INET:
|
||||
return inet_test_bit(MC_LOOP, sk);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
|
||||
if (!match)
|
||||
return -EINVAL;
|
||||
|
||||
sk->sk_bind_phc = phc_index;
|
||||
WRITE_ONCE(sk->sk_bind_phc, phc_index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
|
||||
return ret;
|
||||
}
|
||||
|
||||
sk->sk_tsflags = val;
|
||||
WRITE_ONCE(sk->sk_tsflags, val);
|
||||
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
|
||||
|
||||
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
|
||||
@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
|
||||
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
|
||||
return -ENOMEM;
|
||||
}
|
||||
sk->sk_forward_alloc += pages << PAGE_SHIFT;
|
||||
sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
|
||||
|
||||
WRITE_ONCE(sk->sk_reserved_mem,
|
||||
sk->sk_reserved_mem + (pages << PAGE_SHIFT));
|
||||
@ -1718,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
|
||||
|
||||
case SO_TIMESTAMPING_OLD:
|
||||
lv = sizeof(v.timestamping);
|
||||
v.timestamping.flags = sk->sk_tsflags;
|
||||
v.timestamping.bind_phc = sk->sk_bind_phc;
|
||||
v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
|
||||
v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
|
||||
break;
|
||||
|
||||
case SO_RCVTIMEO_OLD:
|
||||
@ -2746,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
|
||||
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
|
||||
if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
|
||||
break;
|
||||
if (sk->sk_shutdown & SEND_SHUTDOWN)
|
||||
if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
|
||||
break;
|
||||
if (sk->sk_err)
|
||||
if (READ_ONCE(sk->sk_err))
|
||||
break;
|
||||
timeo = schedule_timeout(timeo);
|
||||
}
|
||||
@ -2776,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
|
||||
goto failure;
|
||||
|
||||
err = -EPIPE;
|
||||
if (sk->sk_shutdown & SEND_SHUTDOWN)
|
||||
if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
|
||||
goto failure;
|
||||
|
||||
if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
|
||||
@ -3138,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
|
||||
{
|
||||
int ret, amt = sk_mem_pages(size);
|
||||
|
||||
sk->sk_forward_alloc += amt << PAGE_SHIFT;
|
||||
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
|
||||
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
|
||||
if (!ret)
|
||||
sk->sk_forward_alloc -= amt << PAGE_SHIFT;
|
||||
sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__sk_mem_schedule);
|
||||
@ -3173,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
|
||||
void __sk_mem_reclaim(struct sock *sk, int amount)
|
||||
{
|
||||
amount >>= PAGE_SHIFT;
|
||||
sk->sk_forward_alloc -= amount << PAGE_SHIFT;
|
||||
sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
|
||||
__sk_mem_reduce_allocated(sk, amount);
|
||||
}
|
||||
EXPORT_SYMBOL(__sk_mem_reclaim);
|
||||
@ -3742,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
|
||||
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
|
||||
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
|
||||
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
|
||||
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
|
||||
mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
|
||||
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
|
||||
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
|
||||
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
|
||||
|
@ -18,7 +18,7 @@ struct bpf_stab {
|
||||
struct bpf_map map;
|
||||
struct sock **sks;
|
||||
struct sk_psock_progs progs;
|
||||
raw_spinlock_t lock;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
#define SOCK_CREATE_FLAG_MASK \
|
||||
@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bpf_map_init_from_attr(&stab->map, attr);
|
||||
raw_spin_lock_init(&stab->lock);
|
||||
spin_lock_init(&stab->lock);
|
||||
|
||||
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
|
||||
sizeof(struct sock *),
|
||||
@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
|
||||
struct sock *sk;
|
||||
int err = 0;
|
||||
|
||||
raw_spin_lock_bh(&stab->lock);
|
||||
spin_lock_bh(&stab->lock);
|
||||
sk = *psk;
|
||||
if (!sk_test || sk_test == sk)
|
||||
sk = xchg(psk, NULL);
|
||||
@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
|
||||
else
|
||||
err = -EINVAL;
|
||||
|
||||
raw_spin_unlock_bh(&stab->lock);
|
||||
spin_unlock_bh(&stab->lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
|
||||
psock = sk_psock(sk);
|
||||
WARN_ON_ONCE(!psock);
|
||||
|
||||
raw_spin_lock_bh(&stab->lock);
|
||||
spin_lock_bh(&stab->lock);
|
||||
osk = stab->sks[idx];
|
||||
if (osk && flags == BPF_NOEXIST) {
|
||||
ret = -EEXIST;
|
||||
@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
|
||||
stab->sks[idx] = sk;
|
||||
if (osk)
|
||||
sock_map_unref(osk, &stab->sks[idx]);
|
||||
raw_spin_unlock_bh(&stab->lock);
|
||||
spin_unlock_bh(&stab->lock);
|
||||
return 0;
|
||||
out_unlock:
|
||||
raw_spin_unlock_bh(&stab->lock);
|
||||
spin_unlock_bh(&stab->lock);
|
||||
if (psock)
|
||||
sk_psock_put(sk, psock);
|
||||
out_free:
|
||||
@ -835,7 +835,7 @@ struct bpf_shtab_elem {
|
||||
|
||||
struct bpf_shtab_bucket {
|
||||
struct hlist_head head;
|
||||
raw_spinlock_t lock;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct bpf_shtab {
|
||||
@ -910,7 +910,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
|
||||
* is okay since it's going away only after RCU grace period.
|
||||
* However, we need to check whether it's still present.
|
||||
*/
|
||||
raw_spin_lock_bh(&bucket->lock);
|
||||
spin_lock_bh(&bucket->lock);
|
||||
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
|
||||
elem->key, map->key_size);
|
||||
if (elem_probe && elem_probe == elem) {
|
||||
@ -918,7 +918,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
|
||||
sock_map_unref(elem->sk, elem);
|
||||
sock_hash_free_elem(htab, elem);
|
||||
}
|
||||
raw_spin_unlock_bh(&bucket->lock);
|
||||
spin_unlock_bh(&bucket->lock);
|
||||
}
|
||||
|
||||
static long sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
@ -932,7 +932,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
hash = sock_hash_bucket_hash(key, key_size);
|
||||
bucket = sock_hash_select_bucket(htab, hash);
|
||||
|
||||
raw_spin_lock_bh(&bucket->lock);
|
||||
spin_lock_bh(&bucket->lock);
|
||||
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
|
||||
if (elem) {
|
||||
hlist_del_rcu(&elem->node);
|
||||
@ -940,7 +940,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
sock_hash_free_elem(htab, elem);
|
||||
ret = 0;
|
||||
}
|
||||
raw_spin_unlock_bh(&bucket->lock);
|
||||
spin_unlock_bh(&bucket->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1000,7 +1000,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
|
||||
hash = sock_hash_bucket_hash(key, key_size);
|
||||
bucket = sock_hash_select_bucket(htab, hash);
|
||||
|
||||
raw_spin_lock_bh(&bucket->lock);
|
||||
spin_lock_bh(&bucket->lock);
|
||||
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
|
||||
if (elem && flags == BPF_NOEXIST) {
|
||||
ret = -EEXIST;
|
||||
@ -1026,10 +1026,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
|
||||
sock_map_unref(elem->sk, elem);
|
||||
sock_hash_free_elem(htab, elem);
|
||||
}
|
||||
raw_spin_unlock_bh(&bucket->lock);
|
||||
spin_unlock_bh(&bucket->lock);
|
||||
return 0;
|
||||
out_unlock:
|
||||
raw_spin_unlock_bh(&bucket->lock);
|
||||
spin_unlock_bh(&bucket->lock);
|
||||
sk_psock_put(sk, psock);
|
||||
out_free:
|
||||
sk_psock_free_link(link);
|
||||
@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
|
||||
|
||||
for (i = 0; i < htab->buckets_num; i++) {
|
||||
INIT_HLIST_HEAD(&htab->buckets[i].head);
|
||||
raw_spin_lock_init(&htab->buckets[i].lock);
|
||||
spin_lock_init(&htab->buckets[i].lock);
|
||||
}
|
||||
|
||||
return &htab->map;
|
||||
@ -1147,11 +1147,11 @@ static void sock_hash_free(struct bpf_map *map)
|
||||
* exists, psock exists and holds a ref to socket. That
|
||||
* lets us to grab a socket ref too.
|
||||
*/
|
||||
raw_spin_lock_bh(&bucket->lock);
|
||||
spin_lock_bh(&bucket->lock);
|
||||
hlist_for_each_entry(elem, &bucket->head, node)
|
||||
sock_hold(elem->sk);
|
||||
hlist_move_list(&bucket->head, &unlink_list);
|
||||
raw_spin_unlock_bh(&bucket->lock);
|
||||
spin_unlock_bh(&bucket->lock);
|
||||
|
||||
/* Process removed entries out of atomic context to
|
||||
* block for socket lock before deleting the psock's
|
||||
|
@ -157,26 +157,24 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct handshake_req *req = NULL;
|
||||
struct socket *sock = NULL;
|
||||
struct handshake_req *req;
|
||||
struct socket *sock;
|
||||
int fd, status, err;
|
||||
|
||||
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
|
||||
return -EINVAL;
|
||||
fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
|
||||
|
||||
err = 0;
|
||||
sock = sockfd_lookup(fd, &err);
|
||||
if (err) {
|
||||
err = -EBADF;
|
||||
goto out_status;
|
||||
}
|
||||
if (!sock)
|
||||
return err;
|
||||
|
||||
req = handshake_req_hash_lookup(sock->sk);
|
||||
if (!req) {
|
||||
err = -EBUSY;
|
||||
trace_handshake_cmd_done_err(net, req, sock->sk, err);
|
||||
fput(sock->file);
|
||||
goto out_status;
|
||||
return err;
|
||||
}
|
||||
|
||||
trace_handshake_cmd_done(net, req, sock->sk, fd);
|
||||
@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
handshake_complete(req, status, info);
|
||||
fput(sock->file);
|
||||
return 0;
|
||||
|
||||
out_status:
|
||||
trace_handshake_cmd_done_err(net, req, sock->sk, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int handshake_net_id;
|
||||
|
@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
|
||||
hlist_del(&nexthop_nh->nh_hash);
|
||||
} endfor_nexthops(fi)
|
||||
}
|
||||
fi->fib_dead = 1;
|
||||
/* Paired with READ_ONCE() from fib_table_lookup() */
|
||||
WRITE_ONCE(fi->fib_dead, 1);
|
||||
fib_info_put(fi);
|
||||
}
|
||||
spin_unlock_bh(&fib_info_lock);
|
||||
@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
|
||||
link_it:
|
||||
ofi = fib_find_info(fi);
|
||||
if (ofi) {
|
||||
/* fib_table_lookup() should not see @fi yet. */
|
||||
fi->fib_dead = 1;
|
||||
free_fib_info(fi);
|
||||
refcount_inc(&ofi->fib_treeref);
|
||||
@ -1619,6 +1621,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
|
||||
|
||||
failure:
|
||||
if (fi) {
|
||||
/* fib_table_lookup() should not see @fi yet. */
|
||||
fi->fib_dead = 1;
|
||||
free_fib_info(fi);
|
||||
}
|
||||
|
@ -1582,7 +1582,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
|
||||
if (fa->fa_dscp &&
|
||||
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
|
||||
continue;
|
||||
if (fi->fib_dead)
|
||||
/* Paired with WRITE_ONCE() in fib_release_info() */
|
||||
if (READ_ONCE(fi->fib_dead))
|
||||
continue;
|
||||
if (fa->fa_info->fib_scope < flp->flowi4_scope)
|
||||
continue;
|
||||
|
@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
|
||||
struct flowi4 fl4;
|
||||
int hlen = LL_RESERVED_SPACE(dev);
|
||||
int tlen = dev->needed_tailroom;
|
||||
unsigned int size = mtu;
|
||||
unsigned int size;
|
||||
|
||||
size = min(mtu, IP_MAX_MTU);
|
||||
while (1) {
|
||||
skb = alloc_skb(size + hlen + tlen,
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
|
@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
|
||||
struct ip_options *opt = &(IPCB(skb)->opt);
|
||||
|
||||
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
if (skb->offload_l3_fwd_mark) {
|
||||
|
@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
|
||||
static struct sk_buff *ip_extract_route_hint(const struct net *net,
|
||||
struct sk_buff *skb, int rt_type)
|
||||
{
|
||||
if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
|
||||
if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
|
||||
IPCB(skb)->flags & IPSKB_MULTIPATH)
|
||||
return NULL;
|
||||
|
||||
return skb;
|
||||
|
@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
|
||||
} else if (rt->rt_type == RTN_BROADCAST)
|
||||
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
|
||||
|
||||
/* OUTOCTETS should be counted after fragment */
|
||||
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
|
||||
|
||||
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
|
||||
skb = skb_expand_head(skb, hh_len);
|
||||
if (!skb)
|
||||
@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
/*
|
||||
* If the indicated interface is up and running, send the packet.
|
||||
*/
|
||||
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
|
||||
|
||||
skb->dev = dev;
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
|
||||
@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
|
||||
|
||||
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
|
||||
|
||||
skb->dev = dev;
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
|
||||
@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
|
||||
paged = !!cork->gso_size;
|
||||
|
||||
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
|
||||
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
|
||||
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
|
||||
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
|
||||
|
||||
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
|
||||
|
@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
|
||||
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
|
||||
*/
|
||||
info = PKTINFO_SKB_CB(skb);
|
||||
if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
|
||||
if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
|
||||
!info->ipi_ifindex)
|
||||
return false;
|
||||
|
||||
|
@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
|
||||
struct ip_options *opt = &(IPCB(skb)->opt);
|
||||
|
||||
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
|
||||
if (unlikely(opt->optlen))
|
||||
ip_forward_options(skb);
|
||||
|
@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
|
||||
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
|
||||
|
||||
fib_select_multipath(res, h);
|
||||
IPCB(skb)->flags |= IPSKB_MULTIPATH;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
|
||||
}
|
||||
}
|
||||
|
||||
if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
|
||||
if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
|
||||
has_timestamping = true;
|
||||
else
|
||||
tss->ts[0] = (struct timespec64) {0};
|
||||
}
|
||||
|
||||
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
|
||||
if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
|
||||
if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
|
||||
has_timestamping = true;
|
||||
else
|
||||
tss->ts[2] = (struct timespec64) {0};
|
||||
|
@ -3474,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
|
||||
if (delta <= 0)
|
||||
return;
|
||||
amt = sk_mem_pages(delta);
|
||||
sk->sk_forward_alloc += amt << PAGE_SHIFT;
|
||||
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
|
||||
sk_memory_allocated_add(sk, amt);
|
||||
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
|
||||
|
@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
|
||||
spin_lock(&sk_queue->lock);
|
||||
|
||||
|
||||
sk->sk_forward_alloc += size;
|
||||
sk_forward_alloc_add(sk, size);
|
||||
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
|
||||
sk->sk_forward_alloc -= amt;
|
||||
sk_forward_alloc_add(sk, -amt);
|
||||
|
||||
if (amt)
|
||||
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
|
||||
@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
|
||||
goto uncharge_drop;
|
||||
}
|
||||
|
||||
sk->sk_forward_alloc -= size;
|
||||
sk_forward_alloc_add(sk, -size);
|
||||
|
||||
/* no need to setup a destructor, we will explicitly release the
|
||||
* forward allocated memory on dequeue
|
||||
|
@ -1378,7 +1378,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
|
||||
* idev->desync_factor if it's larger
|
||||
*/
|
||||
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
|
||||
max_desync_factor = min_t(__u32,
|
||||
max_desync_factor = min_t(long,
|
||||
idev->cnf.max_desync_factor,
|
||||
cnf_temp_preferred_lft - regen_advance);
|
||||
|
||||
|
@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
|
||||
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
|
||||
if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
|
||||
IP6CB(skb)->flags & IP6SKB_MULTIPATH)
|
||||
return NULL;
|
||||
|
||||
return skb;
|
||||
|
@ -451,7 +451,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
|
||||
struct dst_entry *dst = skb_dst(skb);
|
||||
|
||||
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
if (skb->offload_l3_fwd_mark) {
|
||||
@ -1502,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk,
|
||||
orig_mtu = mtu;
|
||||
|
||||
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
|
||||
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
|
||||
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
|
||||
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
|
||||
|
||||
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
|
||||
|
@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
|
||||
{
|
||||
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
|
||||
IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
|
||||
IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
return dst_output(net, sk, skb);
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
return -EINVAL;
|
||||
|
||||
ipcm6_init_sk(&ipc6, np);
|
||||
ipc6.sockc.tsflags = sk->sk_tsflags;
|
||||
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
|
||||
|
||||
fl6.flowi6_oif = oif;
|
||||
|
@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
fl6.flowi6_uid = sk->sk_uid;
|
||||
|
||||
ipcm6_init(&ipc6);
|
||||
ipc6.sockc.tsflags = sk->sk_tsflags;
|
||||
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
ipc6.sockc.mark = fl6.flowi6_mark;
|
||||
|
||||
if (sin6) {
|
||||
|
@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
|
||||
if (match->nh && have_oif_match && res->nh)
|
||||
return;
|
||||
|
||||
if (skb)
|
||||
IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
|
||||
|
||||
/* We might have already computed the hash for ICMPv6 errors. In such
|
||||
* case it will always be non-zero. Otherwise now is the time to do it.
|
||||
*/
|
||||
|
@ -1339,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
|
||||
ipcm6_init(&ipc6);
|
||||
ipc6.gso_size = READ_ONCE(up->gso_size);
|
||||
ipc6.sockc.tsflags = sk->sk_tsflags;
|
||||
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
|
||||
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
|
||||
|
||||
/* destination address check */
|
||||
|
@ -1859,6 +1859,8 @@ static __net_exit void kcm_exit_net(struct net *net)
|
||||
* that all multiplexors and psocks have been destroyed.
|
||||
*/
|
||||
WARN_ON(!list_empty(&knet->mux_list));
|
||||
|
||||
mutex_destroy(&knet->mutex);
|
||||
}
|
||||
|
||||
static struct pernet_operations kcm_net_ops = {
|
||||
|
@ -134,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
|
||||
__kfree_skb(skb);
|
||||
}
|
||||
|
||||
static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
|
||||
{
|
||||
WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
|
||||
mptcp_sk(sk)->rmem_fwd_alloc + size);
|
||||
}
|
||||
|
||||
static void mptcp_rmem_charge(struct sock *sk, int size)
|
||||
{
|
||||
mptcp_sk(sk)->rmem_fwd_alloc -= size;
|
||||
mptcp_rmem_fwd_alloc_add(sk, -size);
|
||||
}
|
||||
|
||||
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
|
||||
@ -177,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
|
||||
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
|
||||
{
|
||||
amount >>= PAGE_SHIFT;
|
||||
mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
|
||||
mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
|
||||
__sk_mem_reduce_allocated(sk, amount);
|
||||
}
|
||||
|
||||
@ -186,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
int reclaimable;
|
||||
|
||||
msk->rmem_fwd_alloc += size;
|
||||
mptcp_rmem_fwd_alloc_add(sk, size);
|
||||
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
|
||||
|
||||
/* see sk_mem_uncharge() for the rationale behind the following schema */
|
||||
@ -341,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
|
||||
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
|
||||
return false;
|
||||
|
||||
msk->rmem_fwd_alloc += amount;
|
||||
mptcp_rmem_fwd_alloc_add(sk, amount);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1800,7 +1806,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
}
|
||||
|
||||
/* data successfully copied into the write queue */
|
||||
sk->sk_forward_alloc -= total_ts;
|
||||
sk_forward_alloc_add(sk, -total_ts);
|
||||
copied += psize;
|
||||
dfrag->data_len += psize;
|
||||
frag_truesize += psize;
|
||||
@ -3257,8 +3263,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
|
||||
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
|
||||
* inet_sock_destruct() will dispose it
|
||||
*/
|
||||
sk->sk_forward_alloc += msk->rmem_fwd_alloc;
|
||||
msk->rmem_fwd_alloc = 0;
|
||||
sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
|
||||
WRITE_ONCE(msk->rmem_fwd_alloc, 0);
|
||||
mptcp_token_destroy(msk);
|
||||
mptcp_pm_free_anno_list(msk);
|
||||
mptcp_free_local_addr_list(msk);
|
||||
@ -3522,7 +3528,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
|
||||
|
||||
static int mptcp_forward_alloc_get(const struct sock *sk)
|
||||
{
|
||||
return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
|
||||
return READ_ONCE(sk->sk_forward_alloc) +
|
||||
READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
|
||||
}
|
||||
|
||||
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
|
||||
|
@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
|
||||
#define IP_SET_HASH_WITH_PROTO
|
||||
#define IP_SET_HASH_WITH_NETS
|
||||
#define IPSET_NET_COUNT 2
|
||||
#define IP_SET_HASH_WITH_NET0
|
||||
|
||||
/* IPv4 variant */
|
||||
|
||||
|
@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
|
||||
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
|
||||
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
|
||||
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
|
||||
[NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
|
||||
};
|
||||
|
||||
static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
|
||||
@ -3421,6 +3422,18 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
|
||||
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
|
||||
}
|
||||
|
||||
static void audit_log_rule_reset(const struct nft_table *table,
|
||||
unsigned int base_seq,
|
||||
unsigned int nentries)
|
||||
{
|
||||
char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
|
||||
table->name, base_seq);
|
||||
|
||||
audit_log_nfcfg(buf, table->family, nentries,
|
||||
AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
struct nft_rule_dump_ctx {
|
||||
char *table;
|
||||
char *chain;
|
||||
@ -3467,6 +3480,10 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
|
||||
cont_skip:
|
||||
(*idx)++;
|
||||
}
|
||||
|
||||
if (reset && *idx)
|
||||
audit_log_rule_reset(table, cb->seq, *idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3634,6 +3651,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
|
||||
if (err < 0)
|
||||
goto err_fill_rule_info;
|
||||
|
||||
if (reset)
|
||||
audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
|
||||
|
||||
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
|
||||
|
||||
err_fill_rule_info:
|
||||
@ -5624,13 +5644,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
|
||||
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
|
||||
}
|
||||
|
||||
static void audit_log_nft_set_reset(const struct nft_table *table,
|
||||
unsigned int base_seq,
|
||||
unsigned int nentries)
|
||||
{
|
||||
char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
|
||||
|
||||
audit_log_nfcfg(buf, table->family, nentries,
|
||||
AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
struct nft_set_dump_ctx {
|
||||
const struct nft_set *set;
|
||||
struct nft_ctx ctx;
|
||||
};
|
||||
|
||||
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
|
||||
const struct nft_set *set, bool reset)
|
||||
const struct nft_set *set, bool reset,
|
||||
unsigned int base_seq)
|
||||
{
|
||||
struct nft_set_elem_catchall *catchall;
|
||||
u8 genmask = nft_genmask_cur(net);
|
||||
@ -5646,6 +5678,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
|
||||
|
||||
elem.priv = catchall->elem;
|
||||
ret = nf_tables_fill_setelem(skb, set, &elem, reset);
|
||||
if (reset && !ret)
|
||||
audit_log_nft_set_reset(set->table, base_seq, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5725,12 +5759,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
|
||||
|
||||
if (!args.iter.err && args.iter.count == cb->args[0])
|
||||
args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
|
||||
rcu_read_unlock();
|
||||
|
||||
args.iter.err = nft_set_catchall_dump(net, skb, set,
|
||||
reset, cb->seq);
|
||||
nla_nest_end(skb, nest);
|
||||
nlmsg_end(skb, nlh);
|
||||
|
||||
if (reset && args.iter.count > args.iter.skip)
|
||||
audit_log_nft_set_reset(table, cb->seq,
|
||||
args.iter.count - args.iter.skip);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (args.iter.err && args.iter.err != -EMSGSIZE)
|
||||
return args.iter.err;
|
||||
if (args.iter.count == cb->args[0])
|
||||
@ -5955,13 +5994,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
|
||||
struct netlink_ext_ack *extack = info->extack;
|
||||
u8 genmask = nft_genmask_cur(info->net);
|
||||
u8 family = info->nfmsg->nfgen_family;
|
||||
int rem, err = 0, nelems = 0;
|
||||
struct net *net = info->net;
|
||||
struct nft_table *table;
|
||||
struct nft_set *set;
|
||||
struct nlattr *attr;
|
||||
struct nft_ctx ctx;
|
||||
bool reset = false;
|
||||
int rem, err = 0;
|
||||
|
||||
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
|
||||
genmask, 0);
|
||||
@ -6004,8 +6043,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
|
||||
NL_SET_BAD_ATTR(extack, attr);
|
||||
break;
|
||||
}
|
||||
nelems++;
|
||||
}
|
||||
|
||||
if (reset)
|
||||
audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
|
||||
nelems);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
|
||||
|
||||
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
|
||||
|
||||
if (f->opt_num > ARRAY_SIZE(f->opt))
|
||||
return -EINVAL;
|
||||
|
||||
if (!memchr(f->genre, 0, MAXGENRELEN) ||
|
||||
!memchr(f->subtype, 0, MAXGENRELEN) ||
|
||||
!memchr(f->version, 0, MAXGENRELEN))
|
||||
return -EINVAL;
|
||||
|
||||
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
|
||||
if (!kf)
|
||||
return -ENOMEM;
|
||||
|
@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
|
||||
return opt[offset + 1];
|
||||
}
|
||||
|
||||
static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
|
||||
{
|
||||
if (len % NFT_REG32_SIZE)
|
||||
dest[len / NFT_REG32_SIZE] = 0;
|
||||
|
||||
return skb_copy_bits(skb, offset, dest, len);
|
||||
}
|
||||
|
||||
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
|
||||
struct nft_regs *regs,
|
||||
const struct nft_pktinfo *pkt)
|
||||
@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
|
||||
}
|
||||
offset += priv->offset;
|
||||
|
||||
dest[priv->len / NFT_REG32_SIZE] = 0;
|
||||
if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
|
||||
if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
|
||||
goto err;
|
||||
return;
|
||||
err:
|
||||
@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
|
||||
}
|
||||
offset += priv->offset;
|
||||
|
||||
dest[priv->len / NFT_REG32_SIZE] = 0;
|
||||
if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
|
||||
if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
|
||||
goto err;
|
||||
return;
|
||||
err:
|
||||
@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
|
||||
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
|
||||
*dest = 1;
|
||||
} else {
|
||||
dest[priv->len / NFT_REG32_SIZE] = 0;
|
||||
if (priv->len % NFT_REG32_SIZE)
|
||||
dest[priv->len / NFT_REG32_SIZE] = 0;
|
||||
memcpy(dest, opt + offset, priv->len);
|
||||
}
|
||||
|
||||
@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
|
||||
if (!tcph)
|
||||
goto err;
|
||||
|
||||
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
|
||||
goto err;
|
||||
|
||||
tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
|
||||
opt = (u8 *)tcph;
|
||||
|
||||
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
|
||||
union {
|
||||
__be16 v16;
|
||||
@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
|
||||
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
|
||||
goto err;
|
||||
|
||||
if (skb_ensure_writable(pkt->skb,
|
||||
nft_thoff(pkt) + i + priv->len))
|
||||
goto err;
|
||||
|
||||
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
|
||||
&tcphdr_len);
|
||||
if (!tcph)
|
||||
goto err;
|
||||
|
||||
offset = i + priv->offset;
|
||||
|
||||
switch (priv->len) {
|
||||
@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
|
||||
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
|
||||
goto drop;
|
||||
|
||||
opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
|
||||
if (!opt)
|
||||
goto err;
|
||||
tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
|
||||
opt = (u8 *)tcph;
|
||||
|
||||
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
|
||||
unsigned int j;
|
||||
|
||||
@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
|
||||
offset + ntohs(sch->length) > pkt->skb->len)
|
||||
break;
|
||||
|
||||
dest[priv->len / NFT_REG32_SIZE] = 0;
|
||||
if (skb_copy_bits(pkt->skb, offset + priv->offset,
|
||||
dest, priv->len) < 0)
|
||||
if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
|
||||
dest, priv->len) < 0)
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
||||
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
|
||||
struct rb_node *node, *next, *parent, **p, *first = NULL;
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
u8 cur_genmask = nft_genmask_cur(net);
|
||||
u8 genmask = nft_genmask_next(net);
|
||||
int d, err;
|
||||
|
||||
@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
||||
if (!nft_set_elem_active(&rbe->ext, genmask))
|
||||
continue;
|
||||
|
||||
/* perform garbage collection to avoid bogus overlap reports. */
|
||||
if (nft_set_elem_expired(&rbe->ext)) {
|
||||
/* perform garbage collection to avoid bogus overlap reports
|
||||
* but skip new elements in this transaction.
|
||||
*/
|
||||
if (nft_set_elem_expired(&rbe->ext) &&
|
||||
nft_set_elem_active(&rbe->ext, cur_genmask)) {
|
||||
err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
|
||||
{
|
||||
const struct xt_sctp_info *info = par->matchinfo;
|
||||
|
||||
if (info->flag_count > ARRAY_SIZE(info->flag_info))
|
||||
return -EINVAL;
|
||||
if (info->flags & ~XT_SCTP_VALID_FLAGS)
|
||||
return -EINVAL;
|
||||
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
|
||||
|
@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
|
||||
return ret ^ data->invert;
|
||||
}
|
||||
|
||||
static int u32_mt_checkentry(const struct xt_mtchk_param *par)
|
||||
{
|
||||
const struct xt_u32 *data = par->matchinfo;
|
||||
const struct xt_u32_test *ct;
|
||||
unsigned int i;
|
||||
|
||||
if (data->ntests > ARRAY_SIZE(data->tests))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < data->ntests; ++i) {
|
||||
ct = &data->tests[i];
|
||||
|
||||
if (ct->nnums > ARRAY_SIZE(ct->location) ||
|
||||
ct->nvalues > ARRAY_SIZE(ct->value))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct xt_match xt_u32_mt_reg __read_mostly = {
|
||||
.name = "u32",
|
||||
.revision = 0,
|
||||
.family = NFPROTO_UNSPEC,
|
||||
.match = u32_mt,
|
||||
.checkentry = u32_mt_checkentry,
|
||||
.matchsize = sizeof(struct xt_u32),
|
||||
.me = THIS_MODULE,
|
||||
};
|
||||
|
@ -61,6 +61,7 @@ struct fq_pie_sched_data {
|
||||
struct pie_params p_params;
|
||||
u32 ecn_prob;
|
||||
u32 flows_cnt;
|
||||
u32 flows_cursor;
|
||||
u32 quantum;
|
||||
u32 memory_limit;
|
||||
u32 new_flow_count;
|
||||
@ -375,22 +376,32 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
|
||||
static void fq_pie_timer(struct timer_list *t)
|
||||
{
|
||||
struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
|
||||
unsigned long next, tupdate;
|
||||
struct Qdisc *sch = q->sch;
|
||||
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
|
||||
u32 idx;
|
||||
int max_cnt, i;
|
||||
|
||||
rcu_read_lock();
|
||||
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
|
||||
spin_lock(root_lock);
|
||||
|
||||
for (idx = 0; idx < q->flows_cnt; idx++)
|
||||
pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
|
||||
q->flows[idx].backlog);
|
||||
|
||||
/* reset the timer to fire after 'tupdate' jiffies. */
|
||||
if (q->p_params.tupdate)
|
||||
mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
|
||||
/* Limit this expensive loop to 2048 flows per round. */
|
||||
max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
|
||||
for (i = 0; i < max_cnt; i++) {
|
||||
pie_calculate_probability(&q->p_params,
|
||||
&q->flows[q->flows_cursor].vars,
|
||||
q->flows[q->flows_cursor].backlog);
|
||||
q->flows_cursor++;
|
||||
}
|
||||
|
||||
tupdate = q->p_params.tupdate;
|
||||
next = 0;
|
||||
if (q->flows_cursor >= q->flows_cnt) {
|
||||
q->flows_cursor = 0;
|
||||
next = tupdate;
|
||||
}
|
||||
if (tupdate)
|
||||
mod_timer(&q->adapt_timer, jiffies + next);
|
||||
spin_unlock(root_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
|
||||
.priv_size = sizeof(struct plug_sched_data),
|
||||
.enqueue = plug_enqueue,
|
||||
.dequeue = plug_dequeue,
|
||||
.peek = qdisc_peek_head,
|
||||
.peek = qdisc_peek_dequeued,
|
||||
.init = plug_init,
|
||||
.change = plug_change,
|
||||
.reset = qdisc_reset_queue,
|
||||
|
@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
|
||||
}
|
||||
|
||||
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
|
||||
static void agg_dequeue(struct qfq_aggregate *agg,
|
||||
struct qfq_class *cl, unsigned int len)
|
||||
static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
|
||||
struct qfq_class *cl, unsigned int len)
|
||||
{
|
||||
qdisc_dequeue_peeked(cl->qdisc);
|
||||
struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
|
||||
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
cl->deficit -= (int) len;
|
||||
|
||||
@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
|
||||
cl->deficit += agg->lmax;
|
||||
list_move_tail(&cl->alist, &agg->active);
|
||||
}
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
|
||||
@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
qdisc_qstats_backlog_dec(sch, skb);
|
||||
sch->q.qlen--;
|
||||
|
||||
skb = agg_dequeue(in_serv_agg, cl, len);
|
||||
|
||||
if (!skb) {
|
||||
sch->q.qlen++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
qdisc_qstats_backlog_dec(sch, skb);
|
||||
qdisc_bstats_update(sch, skb);
|
||||
|
||||
agg_dequeue(in_serv_agg, cl, len);
|
||||
/* If lmax is lowered, through qfq_change_class, for a class
|
||||
* owning pending packets with larger size than the new value
|
||||
* of lmax, then the following condition may hold.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user