mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "146 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak, dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap, memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb, userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp, ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits) mm/damon: hide kernel pointer from tracepoint event mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging mm/damon/dbgfs: remove an unnecessary variable mm/damon: move the implementation of damon_insert_region to damon.h mm/damon: add access checking for hugetlb pages Docs/admin-guide/mm/damon/usage: update for schemes statistics mm/damon/dbgfs: support all DAMOS stats Docs/admin-guide/mm/damon/reclaim: document statistics parameters mm/damon/reclaim: provide reclamation statistics mm/damon/schemes: account how many times quota limit has exceeded mm/damon/schemes: account scheme actions that successfully applied mm/damon: remove a mistakenly added comment for a future feature Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning Docs/admin-guide/mm/damon/usage: remove redundant information Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks mm/damon: convert macro functions to static inline functions mm/damon: modify damon_rand() macro to static inline function mm/damon: move damon_rand() definition into damon.h ...
This commit is contained in:
commit
f56caedaf9
@ -29,12 +29,14 @@ Brief summary of control files::
|
|||||||
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
||||||
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
|
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
|
||||||
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB usage limit
|
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB usage limit
|
||||||
|
hugetlb.<hugepagesize>.numa_stat # show the numa information of the hugetlb memory charged to this cgroup
|
||||||
|
|
||||||
For a system supporting three hugepage sizes (64k, 32M and 1G), the control
|
For a system supporting three hugepage sizes (64k, 32M and 1G), the control
|
||||||
files include::
|
files include::
|
||||||
|
|
||||||
hugetlb.1GB.limit_in_bytes
|
hugetlb.1GB.limit_in_bytes
|
||||||
hugetlb.1GB.max_usage_in_bytes
|
hugetlb.1GB.max_usage_in_bytes
|
||||||
|
hugetlb.1GB.numa_stat
|
||||||
hugetlb.1GB.usage_in_bytes
|
hugetlb.1GB.usage_in_bytes
|
||||||
hugetlb.1GB.failcnt
|
hugetlb.1GB.failcnt
|
||||||
hugetlb.1GB.rsvd.limit_in_bytes
|
hugetlb.1GB.rsvd.limit_in_bytes
|
||||||
@ -43,6 +45,7 @@ files include::
|
|||||||
hugetlb.1GB.rsvd.failcnt
|
hugetlb.1GB.rsvd.failcnt
|
||||||
hugetlb.64KB.limit_in_bytes
|
hugetlb.64KB.limit_in_bytes
|
||||||
hugetlb.64KB.max_usage_in_bytes
|
hugetlb.64KB.max_usage_in_bytes
|
||||||
|
hugetlb.64KB.numa_stat
|
||||||
hugetlb.64KB.usage_in_bytes
|
hugetlb.64KB.usage_in_bytes
|
||||||
hugetlb.64KB.failcnt
|
hugetlb.64KB.failcnt
|
||||||
hugetlb.64KB.rsvd.limit_in_bytes
|
hugetlb.64KB.rsvd.limit_in_bytes
|
||||||
@ -51,6 +54,7 @@ files include::
|
|||||||
hugetlb.64KB.rsvd.failcnt
|
hugetlb.64KB.rsvd.failcnt
|
||||||
hugetlb.32MB.limit_in_bytes
|
hugetlb.32MB.limit_in_bytes
|
||||||
hugetlb.32MB.max_usage_in_bytes
|
hugetlb.32MB.max_usage_in_bytes
|
||||||
|
hugetlb.32MB.numa_stat
|
||||||
hugetlb.32MB.usage_in_bytes
|
hugetlb.32MB.usage_in_bytes
|
||||||
hugetlb.32MB.failcnt
|
hugetlb.32MB.failcnt
|
||||||
hugetlb.32MB.rsvd.limit_in_bytes
|
hugetlb.32MB.rsvd.limit_in_bytes
|
||||||
|
@ -1268,6 +1268,9 @@ PAGE_SIZE multiple when read back.
|
|||||||
The number of processes belonging to this cgroup
|
The number of processes belonging to this cgroup
|
||||||
killed by any kind of OOM killer.
|
killed by any kind of OOM killer.
|
||||||
|
|
||||||
|
oom_group_kill
|
||||||
|
The number of times a group OOM has occurred.
|
||||||
|
|
||||||
memory.events.local
|
memory.events.local
|
||||||
Similar to memory.events but the fields in the file are local
|
Similar to memory.events but the fields in the file are local
|
||||||
to the cgroup i.e. not hierarchical. The file modified event
|
to the cgroup i.e. not hierarchical. The file modified event
|
||||||
@ -1311,6 +1314,9 @@ PAGE_SIZE multiple when read back.
|
|||||||
sock (npn)
|
sock (npn)
|
||||||
Amount of memory used in network transmission buffers
|
Amount of memory used in network transmission buffers
|
||||||
|
|
||||||
|
vmalloc (npn)
|
||||||
|
Amount of memory used for vmap backed memory.
|
||||||
|
|
||||||
shmem
|
shmem
|
||||||
Amount of cached filesystem data that is swap-backed,
|
Amount of cached filesystem data that is swap-backed,
|
||||||
such as tmpfs, shm segments, shared anonymous mmap()s
|
such as tmpfs, shm segments, shared anonymous mmap()s
|
||||||
@ -2260,6 +2266,11 @@ HugeTLB Interface Files
|
|||||||
are local to the cgroup i.e. not hierarchical. The file modified event
|
are local to the cgroup i.e. not hierarchical. The file modified event
|
||||||
generated on this file reflects only the local events.
|
generated on this file reflects only the local events.
|
||||||
|
|
||||||
|
hugetlb.<hugepagesize>.numa_stat
|
||||||
|
Similar to memory.numa_stat, it shows the numa information of the
|
||||||
|
hugetlb pages of <hugepagesize> in this cgroup. Only active in
|
||||||
|
use hugetlb pages are included. The per-node values are in bytes.
|
||||||
|
|
||||||
Misc
|
Misc
|
||||||
----
|
----
|
||||||
|
|
||||||
|
@ -208,6 +208,31 @@ PID of the DAMON thread.
|
|||||||
If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. Else,
|
If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. Else,
|
||||||
-1.
|
-1.
|
||||||
|
|
||||||
|
nr_reclaim_tried_regions
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Number of memory regions that tried to be reclaimed by DAMON_RECLAIM.
|
||||||
|
|
||||||
|
bytes_reclaim_tried_regions
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Total bytes of memory regions that tried to be reclaimed by DAMON_RECLAIM.
|
||||||
|
|
||||||
|
nr_reclaimed_regions
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Number of memory regions that successfully be reclaimed by DAMON_RECLAIM.
|
||||||
|
|
||||||
|
bytes_reclaimed_regions
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Total bytes of memory regions that successfully be reclaimed by DAMON_RECLAIM.
|
||||||
|
|
||||||
|
nr_quota_exceeds
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Number of times that the time/space quota limits have exceeded.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
=======
|
=======
|
||||||
|
|
||||||
|
@ -7,37 +7,40 @@ Detailed Usages
|
|||||||
DAMON provides below three interfaces for different users.
|
DAMON provides below three interfaces for different users.
|
||||||
|
|
||||||
- *DAMON user space tool.*
|
- *DAMON user space tool.*
|
||||||
This is for privileged people such as system administrators who want a
|
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||||
just-working human-friendly interface. Using this, users can use the DAMON’s
|
system administrators who want a just-working human-friendly interface.
|
||||||
major features in a human-friendly way. It may not be highly tuned for
|
Using this, users can use the DAMON’s major features in a human-friendly way.
|
||||||
special cases, though. It supports both virtual and physical address spaces
|
It may not be highly tuned for special cases, though. It supports both
|
||||||
monitoring.
|
virtual and physical address spaces monitoring. For more detail, please
|
||||||
|
refer to its `usage document
|
||||||
|
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||||
- *debugfs interface.*
|
- *debugfs interface.*
|
||||||
This is for privileged user space programmers who want more optimized use of
|
:ref:`This <debugfs_interface>` is for privileged user space programmers who
|
||||||
DAMON. Using this, users can use DAMON’s major features by reading
|
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||||
from and writing to special debugfs files. Therefore, you can write and use
|
features by reading from and writing to special debugfs files. Therefore,
|
||||||
your personalized DAMON debugfs wrapper programs that reads/writes the
|
you can write and use your personalized DAMON debugfs wrapper programs that
|
||||||
debugfs files instead of you. The DAMON user space tool is also a reference
|
reads/writes the debugfs files instead of you. The `DAMON user space tool
|
||||||
implementation of such programs. It supports both virtual and physical
|
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||||
address spaces monitoring.
|
supports both virtual and physical address spaces monitoring. Note that this
|
||||||
|
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||||
|
monitoring results. For detailed monitoring results, DAMON provides a
|
||||||
|
:ref:`tracepoint <tracepoint>`.
|
||||||
- *Kernel Space Programming Interface.*
|
- *Kernel Space Programming Interface.*
|
||||||
This is for kernel space programmers. Using this, users can utilize every
|
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
||||||
feature of DAMON most flexibly and efficiently by writing kernel space
|
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||||
DAMON application programs for you. You can even extend DAMON for various
|
writing kernel space DAMON application programs for you. You can even extend
|
||||||
address spaces.
|
DAMON for various address spaces. For detail, please refer to the interface
|
||||||
|
:doc:`document </vm/damon/api>`.
|
||||||
|
|
||||||
Nevertheless, you could write your own user space tool using the debugfs
|
|
||||||
interface. A reference implementation is available at
|
.. _debugfs_interface:
|
||||||
https://github.com/awslabs/damo. If you are a kernel programmer, you could
|
|
||||||
refer to :doc:`/vm/damon/api` for the kernel space programming interface. For
|
|
||||||
the reason, this document describes only the debugfs interface
|
|
||||||
|
|
||||||
debugfs Interface
|
debugfs Interface
|
||||||
=================
|
=================
|
||||||
|
|
||||||
DAMON exports five files, ``attrs``, ``target_ids``, ``init_regions``,
|
DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
|
||||||
``schemes`` and ``monitor_on`` under its debugfs directory,
|
``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
|
||||||
``<debugfs>/damon/``.
|
``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
|
||||||
|
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
@ -131,24 +134,38 @@ Schemes
|
|||||||
|
|
||||||
For usual DAMON-based data access aware memory management optimizations, users
|
For usual DAMON-based data access aware memory management optimizations, users
|
||||||
would simply want the system to apply a memory management action to a memory
|
would simply want the system to apply a memory management action to a memory
|
||||||
region of a specific size having a specific access frequency for a specific
|
region of a specific access pattern. DAMON receives such formalized operation
|
||||||
time. DAMON receives such formalized operation schemes from the user and
|
schemes from the user and applies those to the target processes.
|
||||||
applies those to the target processes. It also counts the total number and
|
|
||||||
size of regions that each scheme is applied. This statistics can be used for
|
|
||||||
online analysis or tuning of the schemes.
|
|
||||||
|
|
||||||
Users can get and set the schemes by reading from and writing to ``schemes``
|
Users can get and set the schemes by reading from and writing to ``schemes``
|
||||||
debugfs file. Reading the file also shows the statistics of each scheme. To
|
debugfs file. Reading the file also shows the statistics of each scheme. To
|
||||||
the file, each of the schemes should be represented in each line in below form:
|
the file, each of the schemes should be represented in each line in below
|
||||||
|
form::
|
||||||
|
|
||||||
min-size max-size min-acc max-acc min-age max-age action
|
<target access pattern> <action> <quota> <watermarks>
|
||||||
|
|
||||||
Note that the ranges are closed interval. Bytes for the size of regions
|
You can disable schemes by simply writing an empty string to the file.
|
||||||
(``min-size`` and ``max-size``), number of monitored accesses per aggregate
|
|
||||||
interval for access frequency (``min-acc`` and ``max-acc``), number of
|
Target Access Pattern
|
||||||
aggregate intervals for the age of regions (``min-age`` and ``max-age``), and a
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
predefined integer for memory management actions should be used. The supported
|
|
||||||
numbers and their meanings are as below.
|
The ``<target access pattern>`` is constructed with three ranges in below
|
||||||
|
form::
|
||||||
|
|
||||||
|
min-size max-size min-acc max-acc min-age max-age
|
||||||
|
|
||||||
|
Specifically, bytes for the size of regions (``min-size`` and ``max-size``),
|
||||||
|
number of monitored accesses per aggregate interval for access frequency
|
||||||
|
(``min-acc`` and ``max-acc``), number of aggregate intervals for the age of
|
||||||
|
regions (``min-age`` and ``max-age``) are specified. Note that the ranges are
|
||||||
|
closed interval.
|
||||||
|
|
||||||
|
Action
|
||||||
|
~~~~~~
|
||||||
|
|
||||||
|
The ``<action>`` is a predefined integer for memory management actions, which
|
||||||
|
DAMON will apply to the regions having the target access pattern. The
|
||||||
|
supported numbers and their meanings are as below.
|
||||||
|
|
||||||
- 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
- 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
||||||
- 1: Call ``madvise()`` for the region with ``MADV_COLD``
|
- 1: Call ``madvise()`` for the region with ``MADV_COLD``
|
||||||
@ -157,20 +174,82 @@ numbers and their meanings are as below.
|
|||||||
- 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
- 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||||
- 5: Do nothing but count the statistics
|
- 5: Do nothing but count the statistics
|
||||||
|
|
||||||
You can disable schemes by simply writing an empty string to the file. For
|
Quota
|
||||||
example, below commands applies a scheme saying "If a memory region of size in
|
~~~~~
|
||||||
[4KiB, 8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
|
||||||
interval in [10, 20], page out the region", check the entered scheme again, and
|
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||||
finally remove the scheme. ::
|
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||||
|
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||||
|
size quota for the scheme via the ``<quota>`` in below form::
|
||||||
|
|
||||||
|
<ms> <sz> <reset interval> <priority weights>
|
||||||
|
|
||||||
|
This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying
|
||||||
|
the action to memory regions of the ``target access pattern`` within the
|
||||||
|
``<reset interval>`` milliseconds, and to apply the action to only up to
|
||||||
|
``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
|
||||||
|
``<ms>`` and ``<sz>`` zero disables the quota limits.
|
||||||
|
|
||||||
|
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||||
|
regions of the ``target access pattern`` based on their size, access frequency,
|
||||||
|
and age. For personalized prioritization, users can set the weights for the
|
||||||
|
three properties in ``<priority weights>`` in below form::
|
||||||
|
|
||||||
|
<size weight> <access frequency weight> <age weight>
|
||||||
|
|
||||||
|
Watermarks
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
Some schemes would need to run based on current value of the system's specific
|
||||||
|
metrics like free memory ratio. For such cases, users can specify watermarks
|
||||||
|
for the condition.::
|
||||||
|
|
||||||
|
<metric> <check interval> <high mark> <middle mark> <low mark>
|
||||||
|
|
||||||
|
``<metric>`` is a predefined integer for the metric to be checked. The
|
||||||
|
supported numbers and their meanings are as below.
|
||||||
|
|
||||||
|
- 0: Ignore the watermarks
|
||||||
|
- 1: System's free memory rate (per thousand)
|
||||||
|
|
||||||
|
The value of the metric is checked every ``<check interval>`` microseconds.
|
||||||
|
|
||||||
|
If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the
|
||||||
|
scheme is deactivated. If the value is lower than ``<mid mark>``, the scheme
|
||||||
|
is activated.
|
||||||
|
|
||||||
|
.. _damos_stats:
|
||||||
|
|
||||||
|
Statistics
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
It also counts the total number and bytes of regions that each scheme is tried
|
||||||
|
to be applied, the two numbers for the regions that each scheme is successfully
|
||||||
|
applied, and the total number of the quota limit exceeds. This statistics can
|
||||||
|
be used for online analysis or tuning of the schemes.
|
||||||
|
|
||||||
|
The statistics can be shown by reading the ``schemes`` file. Reading the file
|
||||||
|
will show each scheme you entered in each line, and the five numbers for the
|
||||||
|
statistics will be added at the end of each line.
|
||||||
|
|
||||||
|
Example
|
||||||
|
~~~~~~~
|
||||||
|
|
||||||
|
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||||
|
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||||
|
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||||
|
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||||
|
limitation, page out memory regions having longer age first. Also, check the
|
||||||
|
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||||
|
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||||
|
memory rate becomes larger than 60%, or lower than 30%".::
|
||||||
|
|
||||||
# cd <debugfs>/damon
|
# cd <debugfs>/damon
|
||||||
# echo "4096 8192 0 5 10 20 2" > schemes
|
# scheme="4096 8192 0 5 10 20 2" # target access pattern and action
|
||||||
# cat schemes
|
# scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
|
||||||
4096 8192 0 5 10 20 2 0 0
|
# scheme+=" 0 0 100" # prioritization weights
|
||||||
# echo > schemes
|
# scheme+=" 1 5000000 600 500 300" # watermarks
|
||||||
|
# echo "$scheme" > schemes
|
||||||
The last two integers in the 4th line of above example is the total number and
|
|
||||||
the total size of the regions that the scheme is applied.
|
|
||||||
|
|
||||||
|
|
||||||
Turning On/Off
|
Turning On/Off
|
||||||
@ -195,6 +274,54 @@ the monitoring is turned on. If you write to the files while DAMON is running,
|
|||||||
an error code such as ``-EBUSY`` will be returned.
|
an error code such as ``-EBUSY`` will be returned.
|
||||||
|
|
||||||
|
|
||||||
|
Monitoring Thread PID
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
DAMON does requested monitoring with a kernel thread called ``kdamond``. You
|
||||||
|
can get the pid of the thread by reading the ``kdamond_pid`` file. When the
|
||||||
|
monitoring is turned off, reading the file returns ``none``. ::
|
||||||
|
|
||||||
|
# cd <debugfs>/damon
|
||||||
|
# cat monitor_on
|
||||||
|
off
|
||||||
|
# cat kdamond_pid
|
||||||
|
none
|
||||||
|
# echo on > monitor_on
|
||||||
|
# cat kdamond_pid
|
||||||
|
18594
|
||||||
|
|
||||||
|
|
||||||
|
Using Multiple Monitoring Threads
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
One ``kdamond`` thread is created for each monitoring context. You can create
|
||||||
|
and remove monitoring contexts for multiple ``kdamond`` required use case using
|
||||||
|
the ``mk_contexts`` and ``rm_contexts`` files.
|
||||||
|
|
||||||
|
Writing the name of the new context to the ``mk_contexts`` file creates a
|
||||||
|
directory of the name on the DAMON debugfs directory. The directory will have
|
||||||
|
DAMON debugfs files for the context. ::
|
||||||
|
|
||||||
|
# cd <debugfs>/damon
|
||||||
|
# ls foo
|
||||||
|
# ls: cannot access 'foo': No such file or directory
|
||||||
|
# echo foo > mk_contexts
|
||||||
|
# ls foo
|
||||||
|
# attrs init_regions kdamond_pid schemes target_ids
|
||||||
|
|
||||||
|
If the context is not needed anymore, you can remove it and the corresponding
|
||||||
|
directory by putting the name of the context to the ``rm_contexts`` file. ::
|
||||||
|
|
||||||
|
# echo foo > rm_contexts
|
||||||
|
# ls foo
|
||||||
|
# ls: cannot access 'foo': No such file or directory
|
||||||
|
|
||||||
|
Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the
|
||||||
|
root directory only.
|
||||||
|
|
||||||
|
|
||||||
|
.. _tracepoint:
|
||||||
|
|
||||||
Tracepoint for Monitoring Results
|
Tracepoint for Monitoring Results
|
||||||
=================================
|
=================================
|
||||||
|
|
||||||
|
@ -408,7 +408,7 @@ follows:
|
|||||||
Memory Policy APIs
|
Memory Policy APIs
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Linux supports 3 system calls for controlling memory policy. These APIS
|
Linux supports 4 system calls for controlling memory policy. These APIS
|
||||||
always affect only the calling task, the calling task's address space, or
|
always affect only the calling task, the calling task's address space, or
|
||||||
some shared object mapped into the calling task's address space.
|
some shared object mapped into the calling task's address space.
|
||||||
|
|
||||||
@ -460,6 +460,20 @@ requested via the 'flags' argument.
|
|||||||
|
|
||||||
See the mbind(2) man page for more details.
|
See the mbind(2) man page for more details.
|
||||||
|
|
||||||
|
Set home node for a Range of Task's Address Spacec::
|
||||||
|
|
||||||
|
long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
|
||||||
|
unsigned long home_node,
|
||||||
|
unsigned long flags);
|
||||||
|
|
||||||
|
sys_set_mempolicy_home_node set the home node for a VMA policy present in the
|
||||||
|
task's address range. The system call updates the home node only for the existing
|
||||||
|
mempolicy range. Other address ranges are ignored. A home node is the NUMA node
|
||||||
|
closest to which page allocation will come from. Specifying the home node override
|
||||||
|
the default allocation policy to allocate memory close to the local node for an
|
||||||
|
executing CPU.
|
||||||
|
|
||||||
|
|
||||||
Memory Policy Command Line Interface
|
Memory Policy Command Line Interface
|
||||||
====================================
|
====================================
|
||||||
|
|
||||||
|
@ -948,7 +948,7 @@ how much memory needs to be free before kswapd goes back to sleep.
|
|||||||
|
|
||||||
The unit is in fractions of 10,000. The default value of 10 means the
|
The unit is in fractions of 10,000. The default value of 10 means the
|
||||||
distances between watermarks are 0.1% of the available memory in the
|
distances between watermarks are 0.1% of the available memory in the
|
||||||
node/system. The maximum value is 1000, or 10% of memory.
|
node/system. The maximum value is 3000, or 30% of memory.
|
||||||
|
|
||||||
A high rate of threads entering direct reclaim (allocstall) or kswapd
|
A high rate of threads entering direct reclaim (allocstall) or kswapd
|
||||||
going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
|
going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
|
||||||
|
@ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data).
|
|||||||
The "pathname" shows the name associated file for this mapping. If the mapping
|
The "pathname" shows the name associated file for this mapping. If the mapping
|
||||||
is not associated with a file:
|
is not associated with a file:
|
||||||
|
|
||||||
======= ====================================
|
============= ====================================
|
||||||
[heap] the heap of the program
|
[heap] the heap of the program
|
||||||
[stack] the stack of the main process
|
[stack] the stack of the main process
|
||||||
[vdso] the "virtual dynamic shared object",
|
[vdso] the "virtual dynamic shared object",
|
||||||
the kernel system call handler
|
the kernel system call handler
|
||||||
======= ====================================
|
[anon:<name>] an anonymous mapping that has been
|
||||||
|
named by userspace
|
||||||
|
============= ====================================
|
||||||
|
|
||||||
or if empty, the mapping is anonymous.
|
or if empty, the mapping is anonymous.
|
||||||
|
|
||||||
|
@ -66,9 +66,11 @@ PTE Page Table Helpers
|
|||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| pte_mknotpresent | Invalidates a mapped PTE |
|
| pte_mknotpresent | Invalidates a mapped PTE |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| ptep_get_and_clear | Clears a PTE |
|
| ptep_clear | Clears a PTE |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| ptep_get_and_clear_full | Clears a PTE |
|
| ptep_get_and_clear | Clears and returns PTE |
|
||||||
|
+---------------------------+--------------------------------------------------+
|
||||||
|
| ptep_get_and_clear_full | Clears and returns PTE (batched PTE unmap) |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| ptep_test_and_clear_young | Clears young from a PTE |
|
| ptep_test_and_clear_young | Clears young from a PTE |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
@ -247,12 +249,12 @@ SWAP Page Table Helpers
|
|||||||
| __swp_to_pmd_entry | Creates a mapped PMD from a swapped entry (arch) |
|
| __swp_to_pmd_entry | Creates a mapped PMD from a swapped entry (arch) |
|
||||||
+---------------------------+--------------------------------------------------+
|
+---------------------------+--------------------------------------------------+
|
||||||
| is_migration_entry | Tests a migration (read or write) swapped entry |
|
| is_migration_entry | Tests a migration (read or write) swapped entry |
|
||||||
+---------------------------+--------------------------------------------------+
|
+-------------------------------+----------------------------------------------+
|
||||||
| is_write_migration_entry | Tests a write migration swapped entry |
|
| is_writable_migration_entry | Tests a write migration swapped entry |
|
||||||
+---------------------------+--------------------------------------------------+
|
+-------------------------------+----------------------------------------------+
|
||||||
| make_migration_entry_read | Converts into read migration swapped entry |
|
| make_readable_migration_entry | Creates a read migration swapped entry |
|
||||||
+---------------------------+--------------------------------------------------+
|
+-------------------------------+----------------------------------------------+
|
||||||
| make_migration_entry | Creates a migration swapped entry (read or write)|
|
| make_writable_migration_entry | Creates a write migration swapped entry |
|
||||||
+---------------------------+--------------------------------------------------+
|
+-------------------------------+----------------------------------------------+
|
||||||
|
|
||||||
[1] https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/
|
[1] https://lore.kernel.org/linux-mm/20181017020930.GN30832@redhat.com/
|
||||||
|
@ -31,10 +31,12 @@ algorithms. If you are looking for advice on simply allocating memory, see the
|
|||||||
page_migration
|
page_migration
|
||||||
page_frags
|
page_frags
|
||||||
page_owner
|
page_owner
|
||||||
|
page_table_check
|
||||||
remap_file_pages
|
remap_file_pages
|
||||||
slub
|
slub
|
||||||
split_page_table_lock
|
split_page_table_lock
|
||||||
transhuge
|
transhuge
|
||||||
unevictable-lru
|
unevictable-lru
|
||||||
|
vmalloced-kernel-stacks
|
||||||
z3fold
|
z3fold
|
||||||
zsmalloc
|
zsmalloc
|
||||||
|
@ -263,15 +263,15 @@ Monitoring Migration
|
|||||||
The following events (counters) can be used to monitor page migration.
|
The following events (counters) can be used to monitor page migration.
|
||||||
|
|
||||||
1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a
|
1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a
|
||||||
page was migrated. If the page was a non-THP page, then this counter is
|
page was migrated. If the page was a non-THP and non-hugetlb page, then
|
||||||
increased by one. If the page was a THP, then this counter is increased by
|
this counter is increased by one. If the page was a THP or hugetlb, then
|
||||||
the number of THP subpages. For example, migration of a single 2MB THP that
|
this counter is increased by the number of THP or hugetlb subpages.
|
||||||
has 4KB-size base pages (subpages) will cause this counter to increase by
|
For example, migration of a single 2MB THP that has 4KB-size base pages
|
||||||
512.
|
(subpages) will cause this counter to increase by 512.
|
||||||
|
|
||||||
2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for
|
2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for
|
||||||
PGMIGRATE_SUCCESS, above: this will be increased by the number of subpages,
|
PGMIGRATE_SUCCESS, above: this will be increased by the number of subpages,
|
||||||
if it was a THP.
|
if it was a THP or hugetlb.
|
||||||
|
|
||||||
3. THP_MIGRATION_SUCCESS: A THP was migrated without being split.
|
3. THP_MIGRATION_SUCCESS: A THP was migrated without being split.
|
||||||
|
|
||||||
|
56
Documentation/vm/page_table_check.rst
Normal file
56
Documentation/vm/page_table_check.rst
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
.. _page_table_check:
|
||||||
|
|
||||||
|
================
|
||||||
|
Page Table Check
|
||||||
|
================
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
Page table check allows to hardern the kernel by ensuring that some types of
|
||||||
|
the memory corruptions are prevented.
|
||||||
|
|
||||||
|
Page table check performs extra verifications at the time when new pages become
|
||||||
|
accessible from the userspace by getting their page table entries (PTEs PMDs
|
||||||
|
etc.) added into the table.
|
||||||
|
|
||||||
|
In case of detected corruption, the kernel is crashed. There is a small
|
||||||
|
performance and memory overhead associated with the page table check. Therefore,
|
||||||
|
it is disabled by default, but can be optionally enabled on systems where the
|
||||||
|
extra hardening outweighs the performance costs. Also, because page table check
|
||||||
|
is synchronous, it can help with debugging double map memory corruption issues,
|
||||||
|
by crashing kernel at the time wrong mapping occurs instead of later which is
|
||||||
|
often the case with memory corruptions bugs.
|
||||||
|
|
||||||
|
Double mapping detection logic
|
||||||
|
==============================
|
||||||
|
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
| Current Mapping | New mapping | Permissions | Rule |
|
||||||
|
+===================+===================+===================+==================+
|
||||||
|
| Anonymous | Anonymous | Read | Allow |
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
| Anonymous | Anonymous | Read / Write | Prohibit |
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
| Anonymous | Named | Any | Prohibit |
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
| Named | Anonymous | Any | Prohibit |
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
| Named | Named | Any | Allow |
|
||||||
|
+-------------------+-------------------+-------------------+------------------+
|
||||||
|
|
||||||
|
Enabling Page Table Check
|
||||||
|
=========================
|
||||||
|
|
||||||
|
Build kernel with:
|
||||||
|
|
||||||
|
- PAGE_TABLE_CHECK=y
|
||||||
|
Note, it can only be enabled on platforms where ARCH_SUPPORTS_PAGE_TABLE_CHECK
|
||||||
|
is available.
|
||||||
|
|
||||||
|
- Boot with 'page_table_check=on' kernel parameter.
|
||||||
|
|
||||||
|
Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page
|
||||||
|
table support without extra kernel parameter.
|
153
Documentation/vm/vmalloced-kernel-stacks.rst
Normal file
153
Documentation/vm/vmalloced-kernel-stacks.rst
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=====================================
|
||||||
|
Virtually Mapped Kernel Stack Support
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
:Author: Shuah Khan <skhan@linuxfoundation.org>
|
||||||
|
|
||||||
|
.. contents:: :local:
|
||||||
|
|
||||||
|
Overview
|
||||||
|
--------
|
||||||
|
|
||||||
|
This is a compilation of information from the code and original patch
|
||||||
|
series that introduced the `Virtually Mapped Kernel Stacks feature
|
||||||
|
<https://lwn.net/Articles/694348/>`
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
Kernel stack overflows are often hard to debug and make the kernel
|
||||||
|
susceptible to exploits. Problems could show up at a later time making
|
||||||
|
it difficult to isolate and root-cause.
|
||||||
|
|
||||||
|
Virtually-mapped kernel stacks with guard pages causes kernel stack
|
||||||
|
overflows to be caught immediately rather than causing difficult to
|
||||||
|
diagnose corruptions.
|
||||||
|
|
||||||
|
HAVE_ARCH_VMAP_STACK and VMAP_STACK configuration options enable
|
||||||
|
support for virtually mapped stacks with guard pages. This feature
|
||||||
|
causes reliable faults when the stack overflows. The usability of
|
||||||
|
the stack trace after overflow and response to the overflow itself
|
||||||
|
is architecture dependent.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
As of this writing, arm64, powerpc, riscv, s390, um, and x86 have
|
||||||
|
support for VMAP_STACK.
|
||||||
|
|
||||||
|
HAVE_ARCH_VMAP_STACK
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Architectures that can support Virtually Mapped Kernel Stacks should
|
||||||
|
enable this bool configuration option. The requirements are:
|
||||||
|
|
||||||
|
- vmalloc space must be large enough to hold many kernel stacks. This
|
||||||
|
may rule out many 32-bit architectures.
|
||||||
|
- Stacks in vmalloc space need to work reliably. For example, if
|
||||||
|
vmap page tables are created on demand, either this mechanism
|
||||||
|
needs to work while the stack points to a virtual address with
|
||||||
|
unpopulated page tables or arch code (switch_to() and switch_mm(),
|
||||||
|
most likely) needs to ensure that the stack's page table entries
|
||||||
|
are populated before running on a possibly unpopulated stack.
|
||||||
|
- If the stack overflows into a guard page, something reasonable
|
||||||
|
should happen. The definition of "reasonable" is flexible, but
|
||||||
|
instantly rebooting without logging anything would be unfriendly.
|
||||||
|
|
||||||
|
VMAP_STACK
|
||||||
|
----------
|
||||||
|
|
||||||
|
VMAP_STACK bool configuration option when enabled allocates virtually
|
||||||
|
mapped task stacks. This option depends on HAVE_ARCH_VMAP_STACK.
|
||||||
|
|
||||||
|
- Enable this if you want the use virtually-mapped kernel stacks
|
||||||
|
with guard pages. This causes kernel stack overflows to be caught
|
||||||
|
immediately rather than causing difficult-to-diagnose corruption.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Using this feature with KASAN requires architecture support
|
||||||
|
for backing virtual mappings with real shadow memory, and
|
||||||
|
KASAN_VMALLOC must be enabled.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
VMAP_STACK is enabled, it is not possible to run DMA on stack
|
||||||
|
allocated data.
|
||||||
|
|
||||||
|
Kernel configuration options and dependencies keep changing. Refer to
|
||||||
|
the latest code base:
|
||||||
|
|
||||||
|
`Kconfig <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/Kconfig>`
|
||||||
|
|
||||||
|
Allocation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
When a new kernel thread is created, thread stack is allocated from
|
||||||
|
virtually contiguous memory pages from the page level allocator. These
|
||||||
|
pages are mapped into contiguous kernel virtual space with PAGE_KERNEL
|
||||||
|
protections.
|
||||||
|
|
||||||
|
alloc_thread_stack_node() calls __vmalloc_node_range() to allocate stack
|
||||||
|
with PAGE_KERNEL protections.
|
||||||
|
|
||||||
|
- Allocated stacks are cached and later reused by new threads, so memcg
|
||||||
|
accounting is performed manually on assigning/releasing stacks to tasks.
|
||||||
|
Hence, __vmalloc_node_range is called without __GFP_ACCOUNT.
|
||||||
|
- vm_struct is cached to be able to find when thread free is initiated
|
||||||
|
in interrupt context. free_thread_stack() can be called in interrupt
|
||||||
|
context.
|
||||||
|
- On arm64, all VMAP's stacks need to have the same alignment to ensure
|
||||||
|
that VMAP'd stack overflow detection works correctly. Arch specific
|
||||||
|
vmap stack allocator takes care of this detail.
|
||||||
|
- This does not address interrupt stacks - according to the original patch
|
||||||
|
|
||||||
|
Thread stack allocation is initiated from clone(), fork(), vfork(),
|
||||||
|
kernel_thread() via kernel_clone(). Leaving a few hints for searching
|
||||||
|
the code base to understand when and how thread stack is allocated.
|
||||||
|
|
||||||
|
Bulk of the code is in:
|
||||||
|
`kernel/fork.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/fork.c>`.
|
||||||
|
|
||||||
|
stack_vm_area pointer in task_struct keeps track of the virtually allocated
|
||||||
|
stack and a non-null stack_vm_area pointer serves as a indication that the
|
||||||
|
virtually mapped kernel stacks are enabled.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
struct vm_struct *stack_vm_area;
|
||||||
|
|
||||||
|
Stack overflow handling
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Leading and trailing guard pages help detect stack overflows. When stack
|
||||||
|
overflows into the guard pages, handlers have to be careful not overflow
|
||||||
|
the stack again. When handlers are called, it is likely that very little
|
||||||
|
stack space is left.
|
||||||
|
|
||||||
|
On x86, this is done by handling the page fault indicating the kernel
|
||||||
|
stack overflow on the double-fault stack.
|
||||||
|
|
||||||
|
Testing VMAP allocation with guard pages
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
How do we ensure that VMAP_STACK is actually allocating with a leading
|
||||||
|
and trailing guard page? The following lkdtm tests can help detect any
|
||||||
|
regressions.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void lkdtm_STACK_GUARD_PAGE_LEADING()
|
||||||
|
void lkdtm_STACK_GUARD_PAGE_TRAILING()
|
||||||
|
|
||||||
|
Conclusions
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- A percpu cache of vmalloced stacks appears to be a bit faster than a
|
||||||
|
high-order stack allocation, at least when the cache hits.
|
||||||
|
- THREAD_INFO_IN_TASK gets rid of arch-specific thread_info entirely and
|
||||||
|
simply embed the thread_info (containing only flags) and 'int cpu' into
|
||||||
|
task_struct.
|
||||||
|
- The thread stack can be free'ed as soon as the task is dead (without
|
||||||
|
waiting for RCU) and then, if vmapped stacks are in use, cache the
|
||||||
|
entire stack for reuse on the same cpu.
|
@ -14541,6 +14541,15 @@ F: include/net/page_pool.h
|
|||||||
F: include/trace/events/page_pool.h
|
F: include/trace/events/page_pool.h
|
||||||
F: net/core/page_pool.c
|
F: net/core/page_pool.c
|
||||||
|
|
||||||
|
PAGE TABLE CHECK
|
||||||
|
M: Pasha Tatashin <pasha.tatashin@soleen.com>
|
||||||
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
L: linux-mm@kvack.org
|
||||||
|
S: Maintained
|
||||||
|
F: Documentation/vm/page_table_check.rst
|
||||||
|
F: include/linux/page_table_check.h
|
||||||
|
F: mm/page_table_check.c
|
||||||
|
|
||||||
PANASONIC LAPTOP ACPI EXTRAS DRIVER
|
PANASONIC LAPTOP ACPI EXTRAS DRIVER
|
||||||
M: Kenneth Chan <kenneth.t.chan@gmail.com>
|
M: Kenneth Chan <kenneth.t.chan@gmail.com>
|
||||||
L: platform-driver-x86@vger.kernel.org
|
L: platform-driver-x86@vger.kernel.org
|
||||||
|
@ -1297,6 +1297,9 @@ config HAVE_ARCH_PFN_VALID
|
|||||||
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config ARCH_SUPPORTS_PAGE_TABLE_CHECK
|
||||||
|
bool
|
||||||
|
|
||||||
config ARCH_SPLIT_ARG64
|
config ARCH_SPLIT_ARG64
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
|
@ -489,3 +489,4 @@
|
|||||||
# 557 reserved for memfd_secret
|
# 557 reserved for memfd_secret
|
||||||
558 common process_mrelease sys_process_mrelease
|
558 common process_mrelease sys_process_mrelease
|
||||||
559 common futex_waitv sys_futex_waitv
|
559 common futex_waitv sys_futex_waitv
|
||||||
|
560 common set_mempolicy_home_node sys_ni_syscall
|
||||||
|
@ -165,17 +165,15 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -149,8 +149,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
|||||||
/*
|
/*
|
||||||
* Fault retry nuances, mmap_lock already relinquished by core mm
|
* Fault retry nuances, mmap_lock already relinquished by core mm
|
||||||
*/
|
*/
|
||||||
if (unlikely((fault & VM_FAULT_RETRY) &&
|
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||||
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
@ -322,7 +322,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (!(fault & VM_FAULT_ERROR)) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@ -463,3 +463,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||||
|
|
||||||
#define __NR_compat_syscalls 450
|
#define __NR_compat_syscalls 451
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define __ARCH_WANT_SYS_CLONE
|
#define __ARCH_WANT_SYS_CLONE
|
||||||
|
@ -905,6 +905,8 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
|
|||||||
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
||||||
#define __NR_futex_waitv 449
|
#define __NR_futex_waitv 449
|
||||||
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||||
|
#define __NR_set_mempolicy_home_node 450
|
||||||
|
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Please add new compat syscalls above this comment and update
|
* Please add new compat syscalls above this comment and update
|
||||||
|
@ -36,7 +36,7 @@ void *module_alloc(unsigned long size)
|
|||||||
module_alloc_end = MODULES_END;
|
module_alloc_end = MODULES_END;
|
||||||
|
|
||||||
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
|
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
|
||||||
module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
|
module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
|
||||||
NUMA_NO_NODE, __builtin_return_address(0));
|
NUMA_NO_NODE, __builtin_return_address(0));
|
||||||
|
|
||||||
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
|
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
|
||||||
@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
|
|||||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||||
__builtin_return_address(0));
|
__builtin_return_address(0));
|
||||||
|
|
||||||
if (p && (kasan_module_alloc(p, size) < 0)) {
|
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||||
vfree(p);
|
vfree(p);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -608,10 +608,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
|
mm_flags |= FAULT_FLAG_TRIED;
|
||||||
mm_flags |= FAULT_FLAG_TRIED;
|
goto retry;
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
|
@ -98,11 +98,9 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
|
|||||||
|
|
||||||
/* The most common case -- we are done. */
|
/* The most common case -- we are done. */
|
||||||
if (likely(!(fault & VM_FAULT_ERROR))) {
|
if (likely(!(fault & VM_FAULT_ERROR))) {
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
goto retry;
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -848,7 +848,7 @@ register_unwind_table (struct module *mod)
|
|||||||
{
|
{
|
||||||
struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
|
struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
|
||||||
struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
|
struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
|
||||||
struct unw_table_entry tmp, *e1, *e2, *core, *init;
|
struct unw_table_entry *e1, *e2, *core, *init;
|
||||||
unsigned long num_init = 0, num_core = 0;
|
unsigned long num_init = 0, num_core = 0;
|
||||||
|
|
||||||
/* First, count how many init and core unwind-table entries there are. */
|
/* First, count how many init and core unwind-table entries there are. */
|
||||||
@ -865,9 +865,7 @@ register_unwind_table (struct module *mod)
|
|||||||
for (e1 = start; e1 < end; ++e1) {
|
for (e1 = start; e1 < end; ++e1) {
|
||||||
for (e2 = e1 + 1; e2 < end; ++e2) {
|
for (e2 = e1 + 1; e2 < end; ++e2) {
|
||||||
if (e2->start_offset < e1->start_offset) {
|
if (e2->start_offset < e1->start_offset) {
|
||||||
tmp = *e1;
|
swap(*e1, *e2);
|
||||||
*e1 = *e2;
|
|
||||||
*e2 = tmp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -208,10 +208,7 @@ sort_regions (struct rsvd_region *rsvd_region, int max)
|
|||||||
while (max--) {
|
while (max--) {
|
||||||
for (j = 0; j < max; ++j) {
|
for (j = 0; j < max; ++j) {
|
||||||
if (rsvd_region[j].start > rsvd_region[j+1].start) {
|
if (rsvd_region[j].start > rsvd_region[j+1].start) {
|
||||||
struct rsvd_region tmp;
|
swap(rsvd_region[j], rsvd_region[j + 1]);
|
||||||
tmp = rsvd_region[j];
|
|
||||||
rsvd_region[j] = rsvd_region[j + 1];
|
|
||||||
rsvd_region[j + 1] = tmp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -370,3 +370,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -264,6 +264,7 @@ static struct attribute * cache_default_attrs[] = {
|
|||||||
&shared_cpu_map.attr,
|
&shared_cpu_map.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
ATTRIBUTE_GROUPS(cache_default);
|
||||||
|
|
||||||
#define to_object(k) container_of(k, struct cache_info, kobj)
|
#define to_object(k) container_of(k, struct cache_info, kobj)
|
||||||
#define to_attr(a) container_of(a, struct cache_attr, attr)
|
#define to_attr(a) container_of(a, struct cache_attr, attr)
|
||||||
@ -284,7 +285,7 @@ static const struct sysfs_ops cache_sysfs_ops = {
|
|||||||
|
|
||||||
static struct kobj_type cache_ktype = {
|
static struct kobj_type cache_ktype = {
|
||||||
.sysfs_ops = &cache_sysfs_ops,
|
.sysfs_ops = &cache_sysfs_ops,
|
||||||
.default_attrs = cache_default_attrs,
|
.default_groups = cache_default_groups,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kobj_type cache_ktype_percpu_entry = {
|
static struct kobj_type cache_ktype_percpu_entry = {
|
||||||
|
@ -171,7 +171,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
|
|||||||
* @n_pages: number of contiguous pages to allocate
|
* @n_pages: number of contiguous pages to allocate
|
||||||
*
|
*
|
||||||
* Allocate the specified number of contiguous uncached pages on the
|
* Allocate the specified number of contiguous uncached pages on the
|
||||||
* the requested node. If not enough contiguous uncached pages are available
|
* requested node. If not enough contiguous uncached pages are available
|
||||||
* on the requested node, roundrobin starting with the next higher node.
|
* on the requested node, roundrobin starting with the next higher node.
|
||||||
*/
|
*/
|
||||||
unsigned long uncached_alloc_page(int starting_nid, int n_pages)
|
unsigned long uncached_alloc_page(int starting_nid, int n_pages)
|
||||||
|
@ -156,17 +156,15 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -449,3 +449,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -153,18 +153,16 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -455,3 +455,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -232,18 +232,16 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -388,3 +388,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 n32 process_mrelease sys_process_mrelease
|
448 n32 process_mrelease sys_process_mrelease
|
||||||
449 n32 futex_waitv sys_futex_waitv
|
449 n32 futex_waitv sys_futex_waitv
|
||||||
|
450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -364,3 +364,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 n64 process_mrelease sys_process_mrelease
|
448 n64 process_mrelease sys_process_mrelease
|
||||||
449 n64 futex_waitv sys_futex_waitv
|
449 n64 futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -437,3 +437,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 o32 process_mrelease sys_process_mrelease
|
448 o32 process_mrelease sys_process_mrelease
|
||||||
449 o32 futex_waitv sys_futex_waitv
|
449 o32 futex_waitv sys_futex_waitv
|
||||||
|
450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -171,18 +171,17 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||||||
goto do_sigbus;
|
goto do_sigbus;
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/*
|
if (fault & VM_FAULT_RETRY) {
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
flags |= FAULT_FLAG_TRIED;
|
||||||
* have already released it in __lock_page_or_retry
|
|
||||||
* in mm/filemap.c.
|
|
||||||
*/
|
|
||||||
|
|
||||||
goto retry;
|
/*
|
||||||
}
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
|
* have already released it in __lock_page_or_retry
|
||||||
|
* in mm/filemap.c.
|
||||||
|
*/
|
||||||
|
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -230,16 +230,14 @@ void do_page_fault(unsigned long entry, unsigned long addr,
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -149,18 +149,16 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -177,18 +177,16 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
/*RGD modeled on Cris */
|
||||||
/*RGD modeled on Cris */
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -447,3 +447,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -324,16 +324,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
/*
|
||||||
/*
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* have already released it in __lock_page_or_retry
|
||||||
* have already released it in __lock_page_or_retry
|
* in mm/filemap.c.
|
||||||
* in mm/filemap.c.
|
*/
|
||||||
*/
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
goto retry;
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
return;
|
return;
|
||||||
|
@ -529,3 +529,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -517,10 +517,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
* case.
|
* case.
|
||||||
*/
|
*/
|
||||||
if (unlikely(fault & VM_FAULT_RETRY)) {
|
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
goto retry;
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
|
@ -330,7 +330,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
|
|||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
|
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -37,14 +37,15 @@
|
|||||||
|
|
||||||
void *module_alloc(unsigned long size)
|
void *module_alloc(unsigned long size)
|
||||||
{
|
{
|
||||||
|
gfp_t gfp_mask = GFP_KERNEL;
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
if (PAGE_ALIGN(size) > MODULES_LEN)
|
if (PAGE_ALIGN(size) > MODULES_LEN)
|
||||||
return NULL;
|
return NULL;
|
||||||
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
|
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
|
||||||
GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
|
gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
|
||||||
__builtin_return_address(0));
|
__builtin_return_address(0));
|
||||||
if (p && (kasan_module_alloc(p, size) < 0)) {
|
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||||
vfree(p);
|
vfree(p);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -452,3 +452,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -452,21 +452,21 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
|
|||||||
if (unlikely(fault & VM_FAULT_ERROR))
|
if (unlikely(fault & VM_FAULT_ERROR))
|
||||||
goto out_up;
|
goto out_up;
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
|
||||||
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
|
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
||||||
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
/*
|
||||||
/* FAULT_FLAG_RETRY_NOWAIT has been set,
|
* FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
|
||||||
* mmap_lock has not been released */
|
* not been released
|
||||||
current->thread.gmap_pfault = 1;
|
*/
|
||||||
fault = VM_FAULT_PFAULT;
|
current->thread.gmap_pfault = 1;
|
||||||
goto out_up;
|
fault = VM_FAULT_PFAULT;
|
||||||
}
|
goto out_up;
|
||||||
flags &= ~FAULT_FLAG_RETRY_NOWAIT;
|
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
mmap_read_lock(mm);
|
|
||||||
goto retry;
|
|
||||||
}
|
}
|
||||||
|
flags &= ~FAULT_FLAG_RETRY_NOWAIT;
|
||||||
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
mmap_read_lock(mm);
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
|
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
|
||||||
address = __gmap_link(gmap, current->thread.gmap_addr,
|
address = __gmap_link(gmap, current->thread.gmap_addr,
|
||||||
|
@ -452,3 +452,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -485,17 +485,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
|
|||||||
if (mm_fault_error(regs, error_code, address, fault))
|
if (mm_fault_error(regs, error_code, address, fault))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -495,3 +495,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -200,17 +200,15 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -437,17 +437,15 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
* have already released it in __lock_page_or_retry
|
* have already released it in __lock_page_or_retry
|
||||||
* in mm/filemap.c.
|
* in mm/filemap.c.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
|
@ -87,12 +87,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
|
|||||||
}
|
}
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (fault & VM_FAULT_RETRY) {
|
flags |= FAULT_FLAG_TRIED;
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd = pmd_off(mm, address);
|
pmd = pmd_off(mm, address);
|
||||||
|
@ -104,6 +104,7 @@ config X86
|
|||||||
select ARCH_SUPPORTS_ACPI
|
select ARCH_SUPPORTS_ACPI
|
||||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||||
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||||
|
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64
|
||||||
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
|
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
|
||||||
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
|
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
|
||||||
select ARCH_SUPPORTS_LTO_CLANG
|
select ARCH_SUPPORTS_LTO_CLANG
|
||||||
|
@ -454,3 +454,4 @@
|
|||||||
447 i386 memfd_secret sys_memfd_secret
|
447 i386 memfd_secret sys_memfd_secret
|
||||||
448 i386 process_mrelease sys_process_mrelease
|
448 i386 process_mrelease sys_process_mrelease
|
||||||
449 i386 futex_waitv sys_futex_waitv
|
449 i386 futex_waitv sys_futex_waitv
|
||||||
|
450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -371,6 +371,7 @@
|
|||||||
447 common memfd_secret sys_memfd_secret
|
447 common memfd_secret sys_memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
|
||||||
#
|
#
|
||||||
# Due to a historical design error, certain syscalls are numbered differently
|
# Due to a historical design error, certain syscalls are numbered differently
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <asm/pkru.h>
|
#include <asm/pkru.h>
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
#include <asm-generic/pgtable_uffd.h>
|
#include <asm-generic/pgtable_uffd.h>
|
||||||
|
#include <linux/page_table_check.h>
|
||||||
|
|
||||||
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||||
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
||||||
@ -753,7 +754,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
|
|||||||
return true;
|
return true;
|
||||||
|
|
||||||
if ((pte_flags(a) & _PAGE_PROTNONE) &&
|
if ((pte_flags(a) & _PAGE_PROTNONE) &&
|
||||||
mm_tlb_flush_pending(mm))
|
atomic_read(&mm->tlb_flush_pending))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -1007,18 +1008,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
|
|||||||
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep, pte_t pte)
|
pte_t *ptep, pte_t pte)
|
||||||
{
|
{
|
||||||
|
page_table_check_pte_set(mm, addr, ptep, pte);
|
||||||
set_pte(ptep, pte);
|
set_pte(ptep, pte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||||
pmd_t *pmdp, pmd_t pmd)
|
pmd_t *pmdp, pmd_t pmd)
|
||||||
{
|
{
|
||||||
|
page_table_check_pmd_set(mm, addr, pmdp, pmd);
|
||||||
set_pmd(pmdp, pmd);
|
set_pmd(pmdp, pmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||||
pud_t *pudp, pud_t pud)
|
pud_t *pudp, pud_t pud)
|
||||||
{
|
{
|
||||||
|
page_table_check_pud_set(mm, addr, pudp, pud);
|
||||||
native_set_pud(pudp, pud);
|
native_set_pud(pudp, pud);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1049,6 +1053,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
|||||||
pte_t *ptep)
|
pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t pte = native_ptep_get_and_clear(ptep);
|
pte_t pte = native_ptep_get_and_clear(ptep);
|
||||||
|
page_table_check_pte_clear(mm, addr, pte);
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1064,12 +1069,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
|||||||
* care about updates and native needs no locking
|
* care about updates and native needs no locking
|
||||||
*/
|
*/
|
||||||
pte = native_local_ptep_get_and_clear(ptep);
|
pte = native_local_ptep_get_and_clear(ptep);
|
||||||
|
page_table_check_pte_clear(mm, addr, pte);
|
||||||
} else {
|
} else {
|
||||||
pte = ptep_get_and_clear(mm, addr, ptep);
|
pte = ptep_get_and_clear(mm, addr, ptep);
|
||||||
}
|
}
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define __HAVE_ARCH_PTEP_CLEAR
|
||||||
|
static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
|
||||||
|
pte_t *ptep)
|
||||||
|
{
|
||||||
|
if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK))
|
||||||
|
ptep_get_and_clear(mm, addr, ptep);
|
||||||
|
else
|
||||||
|
pte_clear(mm, addr, ptep);
|
||||||
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
||||||
static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
@ -1110,14 +1126,22 @@ static inline int pmd_write(pmd_t pmd)
|
|||||||
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
||||||
pmd_t *pmdp)
|
pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
return native_pmdp_get_and_clear(pmdp);
|
pmd_t pmd = native_pmdp_get_and_clear(pmdp);
|
||||||
|
|
||||||
|
page_table_check_pmd_clear(mm, addr, pmd);
|
||||||
|
|
||||||
|
return pmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
|
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
|
||||||
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
|
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
|
||||||
unsigned long addr, pud_t *pudp)
|
unsigned long addr, pud_t *pudp)
|
||||||
{
|
{
|
||||||
return native_pudp_get_and_clear(pudp);
|
pud_t pud = native_pudp_get_and_clear(pudp);
|
||||||
|
|
||||||
|
page_table_check_pud_clear(mm, addr, pud);
|
||||||
|
|
||||||
|
return pud;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
|
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
|
||||||
@ -1138,6 +1162,7 @@ static inline int pud_write(pud_t pud)
|
|||||||
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
||||||
unsigned long address, pmd_t *pmdp, pmd_t pmd)
|
unsigned long address, pmd_t *pmdp, pmd_t pmd)
|
||||||
{
|
{
|
||||||
|
page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
|
||||||
if (IS_ENABLED(CONFIG_SMP)) {
|
if (IS_ENABLED(CONFIG_SMP)) {
|
||||||
return xchg(pmdp, pmd);
|
return xchg(pmdp, pmd);
|
||||||
} else {
|
} else {
|
||||||
|
@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void)
|
|||||||
|
|
||||||
void *module_alloc(unsigned long size)
|
void *module_alloc(unsigned long size)
|
||||||
{
|
{
|
||||||
|
gfp_t gfp_mask = GFP_KERNEL;
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
if (PAGE_ALIGN(size) > MODULES_LEN)
|
if (PAGE_ALIGN(size) > MODULES_LEN)
|
||||||
@ -74,10 +75,10 @@ void *module_alloc(unsigned long size)
|
|||||||
|
|
||||||
p = __vmalloc_node_range(size, MODULE_ALIGN,
|
p = __vmalloc_node_range(size, MODULE_ALIGN,
|
||||||
MODULES_VADDR + get_module_load_offset(),
|
MODULES_VADDR + get_module_load_offset(),
|
||||||
MODULES_END, GFP_KERNEL,
|
MODULES_END, gfp_mask,
|
||||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
|
||||||
__builtin_return_address(0));
|
__builtin_return_address(0));
|
||||||
if (p && (kasan_module_alloc(p, size) < 0)) {
|
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||||
vfree(p);
|
vfree(p);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1413,8 +1413,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||||||
* and if there is a fatal signal pending there is no guarantee
|
* and if there is a fatal signal pending there is no guarantee
|
||||||
* that we made any progress. Handle this case first.
|
* that we made any progress. Handle this case first.
|
||||||
*/
|
*/
|
||||||
if (unlikely((fault & VM_FAULT_RETRY) &&
|
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||||
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
@ -420,3 +420,4 @@
|
|||||||
# 447 reserved for memfd_secret
|
# 447 reserved for memfd_secret
|
||||||
448 common process_mrelease sys_process_mrelease
|
448 common process_mrelease sys_process_mrelease
|
||||||
449 common futex_waitv sys_futex_waitv
|
449 common futex_waitv sys_futex_waitv
|
||||||
|
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||||
|
@ -127,17 +127,16 @@ void do_page_fault(struct pt_regs *regs)
|
|||||||
goto do_sigbus;
|
goto do_sigbus;
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
|
||||||
flags |= FAULT_FLAG_TRIED;
|
|
||||||
|
|
||||||
/* No need to mmap_read_unlock(mm) as we would
|
if (fault & VM_FAULT_RETRY) {
|
||||||
* have already released it in __lock_page_or_retry
|
flags |= FAULT_FLAG_TRIED;
|
||||||
* in mm/filemap.c.
|
|
||||||
*/
|
|
||||||
|
|
||||||
goto retry;
|
/* No need to mmap_read_unlock(mm) as we would
|
||||||
}
|
* have already released it in __lock_page_or_retry
|
||||||
|
* in mm/filemap.c.
|
||||||
|
*/
|
||||||
|
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -1903,14 +1903,7 @@ static struct attribute *zram_disk_attrs[] = {
|
|||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct attribute_group zram_disk_attr_group = {
|
ATTRIBUTE_GROUPS(zram_disk);
|
||||||
.attrs = zram_disk_attrs,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct attribute_group *zram_disk_attr_groups[] = {
|
|
||||||
&zram_disk_attr_group,
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and initialize new zram device. the function returns
|
* Allocate and initialize new zram device. the function returns
|
||||||
@ -1983,7 +1976,7 @@ static int zram_add(void)
|
|||||||
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
||||||
|
|
||||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
|
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
|
||||||
ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
|
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_cleanup_disk;
|
goto out_cleanup_disk;
|
||||||
|
|
||||||
|
@ -127,11 +127,35 @@ ATTRIBUTE_GROUPS(dax_drv);
|
|||||||
|
|
||||||
static int dax_bus_match(struct device *dev, struct device_driver *drv);
|
static int dax_bus_match(struct device *dev, struct device_driver *drv);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Static dax regions are regions created by an external subsystem
|
||||||
|
* nvdimm where a single range is assigned. Its boundaries are by the external
|
||||||
|
* subsystem and are usually limited to one physical memory range. For example,
|
||||||
|
* for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
|
||||||
|
* single contiguous range)
|
||||||
|
*
|
||||||
|
* On dynamic dax regions, the assigned region can be partitioned by dax core
|
||||||
|
* into multiple subdivisions. A subdivision is represented into one
|
||||||
|
* /dev/daxN.M device composed by one or more potentially discontiguous ranges.
|
||||||
|
*
|
||||||
|
* When allocating a dax region, drivers must set whether it's static
|
||||||
|
* (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned
|
||||||
|
* to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
|
||||||
|
* devices it is NULL but afterwards allocated by dax core on device ->probe().
|
||||||
|
* Care is needed to make sure that dynamic dax devices are torn down with a
|
||||||
|
* cleared @pgmap field (see kill_dev_dax()).
|
||||||
|
*/
|
||||||
static bool is_static(struct dax_region *dax_region)
|
static bool is_static(struct dax_region *dax_region)
|
||||||
{
|
{
|
||||||
return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
|
return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool static_dev_dax(struct dev_dax *dev_dax)
|
||||||
|
{
|
||||||
|
return is_static(dev_dax->region);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(static_dev_dax);
|
||||||
|
|
||||||
static u64 dev_dax_size(struct dev_dax *dev_dax)
|
static u64 dev_dax_size(struct dev_dax *dev_dax)
|
||||||
{
|
{
|
||||||
u64 size = 0;
|
u64 size = 0;
|
||||||
@ -361,6 +385,14 @@ void kill_dev_dax(struct dev_dax *dev_dax)
|
|||||||
|
|
||||||
kill_dax(dax_dev);
|
kill_dax(dax_dev);
|
||||||
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
|
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dynamic dax region have the pgmap allocated via dev_kzalloc()
|
||||||
|
* and thus freed by devm. Clear the pgmap to not have stale pgmap
|
||||||
|
* ranges on probe() from previous reconfigurations of region devices.
|
||||||
|
*/
|
||||||
|
if (!static_dev_dax(dev_dax))
|
||||||
|
dev_dax->pgmap = NULL;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kill_dev_dax);
|
EXPORT_SYMBOL_GPL(kill_dev_dax);
|
||||||
|
|
||||||
|
@ -39,6 +39,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
|
|||||||
__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
|
__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
|
||||||
void dax_driver_unregister(struct dax_device_driver *dax_drv);
|
void dax_driver_unregister(struct dax_device_driver *dax_drv);
|
||||||
void kill_dev_dax(struct dev_dax *dev_dax);
|
void kill_dev_dax(struct dev_dax *dev_dax);
|
||||||
|
bool static_dev_dax(struct dev_dax *dev_dax);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* While run_dax() is potentially a generic operation that could be
|
* While run_dax() is potentially a generic operation that could be
|
||||||
|
@ -73,11 +73,39 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
|
||||||
|
unsigned long fault_size)
|
||||||
|
{
|
||||||
|
unsigned long i, nr_pages = fault_size / PAGE_SIZE;
|
||||||
|
struct file *filp = vmf->vma->vm_file;
|
||||||
|
struct dev_dax *dev_dax = filp->private_data;
|
||||||
|
pgoff_t pgoff;
|
||||||
|
|
||||||
|
/* mapping is only set on the head */
|
||||||
|
if (dev_dax->pgmap->vmemmap_shift)
|
||||||
|
nr_pages = 1;
|
||||||
|
|
||||||
|
pgoff = linear_page_index(vmf->vma,
|
||||||
|
ALIGN(vmf->address, fault_size));
|
||||||
|
|
||||||
|
for (i = 0; i < nr_pages; i++) {
|
||||||
|
struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
|
||||||
|
|
||||||
|
page = compound_head(page);
|
||||||
|
if (page->mapping)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
page->mapping = filp->f_mapping;
|
||||||
|
page->index = pgoff + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
|
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
|
||||||
struct vm_fault *vmf, pfn_t *pfn)
|
struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
struct device *dev = &dev_dax->dev;
|
struct device *dev = &dev_dax->dev;
|
||||||
phys_addr_t phys;
|
phys_addr_t phys;
|
||||||
|
pfn_t pfn;
|
||||||
unsigned int fault_size = PAGE_SIZE;
|
unsigned int fault_size = PAGE_SIZE;
|
||||||
|
|
||||||
if (check_vma(dev_dax, vmf->vma, __func__))
|
if (check_vma(dev_dax, vmf->vma, __func__))
|
||||||
@ -98,18 +126,21 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
|
|||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
||||||
|
|
||||||
return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
|
dax_set_mapping(vmf, pfn, fault_size);
|
||||||
|
|
||||||
|
return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
|
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
|
||||||
struct vm_fault *vmf, pfn_t *pfn)
|
struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
unsigned long pmd_addr = vmf->address & PMD_MASK;
|
unsigned long pmd_addr = vmf->address & PMD_MASK;
|
||||||
struct device *dev = &dev_dax->dev;
|
struct device *dev = &dev_dax->dev;
|
||||||
phys_addr_t phys;
|
phys_addr_t phys;
|
||||||
pgoff_t pgoff;
|
pgoff_t pgoff;
|
||||||
|
pfn_t pfn;
|
||||||
unsigned int fault_size = PMD_SIZE;
|
unsigned int fault_size = PMD_SIZE;
|
||||||
|
|
||||||
if (check_vma(dev_dax, vmf->vma, __func__))
|
if (check_vma(dev_dax, vmf->vma, __func__))
|
||||||
@ -138,19 +169,22 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
|
|||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
||||||
|
|
||||||
return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
|
dax_set_mapping(vmf, pfn, fault_size);
|
||||||
|
|
||||||
|
return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
||||||
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
|
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
|
||||||
struct vm_fault *vmf, pfn_t *pfn)
|
struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
unsigned long pud_addr = vmf->address & PUD_MASK;
|
unsigned long pud_addr = vmf->address & PUD_MASK;
|
||||||
struct device *dev = &dev_dax->dev;
|
struct device *dev = &dev_dax->dev;
|
||||||
phys_addr_t phys;
|
phys_addr_t phys;
|
||||||
pgoff_t pgoff;
|
pgoff_t pgoff;
|
||||||
|
pfn_t pfn;
|
||||||
unsigned int fault_size = PUD_SIZE;
|
unsigned int fault_size = PUD_SIZE;
|
||||||
|
|
||||||
|
|
||||||
@ -180,13 +214,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
|
|||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
|
||||||
|
|
||||||
return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
|
dax_set_mapping(vmf, pfn, fault_size);
|
||||||
|
|
||||||
|
return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
|
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
|
||||||
struct vm_fault *vmf, pfn_t *pfn)
|
struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
return VM_FAULT_FALLBACK;
|
return VM_FAULT_FALLBACK;
|
||||||
}
|
}
|
||||||
@ -196,10 +232,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
|
|||||||
enum page_entry_size pe_size)
|
enum page_entry_size pe_size)
|
||||||
{
|
{
|
||||||
struct file *filp = vmf->vma->vm_file;
|
struct file *filp = vmf->vma->vm_file;
|
||||||
unsigned long fault_size;
|
|
||||||
vm_fault_t rc = VM_FAULT_SIGBUS;
|
vm_fault_t rc = VM_FAULT_SIGBUS;
|
||||||
int id;
|
int id;
|
||||||
pfn_t pfn;
|
|
||||||
struct dev_dax *dev_dax = filp->private_data;
|
struct dev_dax *dev_dax = filp->private_data;
|
||||||
|
|
||||||
dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
|
dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
|
||||||
@ -209,43 +243,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
|
|||||||
id = dax_read_lock();
|
id = dax_read_lock();
|
||||||
switch (pe_size) {
|
switch (pe_size) {
|
||||||
case PE_SIZE_PTE:
|
case PE_SIZE_PTE:
|
||||||
fault_size = PAGE_SIZE;
|
rc = __dev_dax_pte_fault(dev_dax, vmf);
|
||||||
rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
|
|
||||||
break;
|
break;
|
||||||
case PE_SIZE_PMD:
|
case PE_SIZE_PMD:
|
||||||
fault_size = PMD_SIZE;
|
rc = __dev_dax_pmd_fault(dev_dax, vmf);
|
||||||
rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
|
|
||||||
break;
|
break;
|
||||||
case PE_SIZE_PUD:
|
case PE_SIZE_PUD:
|
||||||
fault_size = PUD_SIZE;
|
rc = __dev_dax_pud_fault(dev_dax, vmf);
|
||||||
rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
rc = VM_FAULT_SIGBUS;
|
rc = VM_FAULT_SIGBUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rc == VM_FAULT_NOPAGE) {
|
|
||||||
unsigned long i;
|
|
||||||
pgoff_t pgoff;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In the device-dax case the only possibility for a
|
|
||||||
* VM_FAULT_NOPAGE result is when device-dax capacity is
|
|
||||||
* mapped. No need to consider the zero page, or racing
|
|
||||||
* conflicting mappings.
|
|
||||||
*/
|
|
||||||
pgoff = linear_page_index(vmf->vma, vmf->address
|
|
||||||
& ~(fault_size - 1));
|
|
||||||
for (i = 0; i < fault_size / PAGE_SIZE; i++) {
|
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
|
|
||||||
if (page->mapping)
|
|
||||||
continue;
|
|
||||||
page->mapping = filp->f_mapping;
|
|
||||||
page->index = pgoff + i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dax_read_unlock(id);
|
dax_read_unlock(id);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@ -398,17 +407,34 @@ int dev_dax_probe(struct dev_dax *dev_dax)
|
|||||||
void *addr;
|
void *addr;
|
||||||
int rc, i;
|
int rc, i;
|
||||||
|
|
||||||
pgmap = dev_dax->pgmap;
|
if (static_dev_dax(dev_dax)) {
|
||||||
if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
|
if (dev_dax->nr_range > 1) {
|
||||||
"static pgmap / multi-range device conflict\n"))
|
dev_warn(dev,
|
||||||
return -EINVAL;
|
"static pgmap / multi-range device conflict\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
if (!pgmap) {
|
pgmap = dev_dax->pgmap;
|
||||||
pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
|
} else {
|
||||||
* (dev_dax->nr_range - 1), GFP_KERNEL);
|
if (dev_dax->pgmap) {
|
||||||
|
dev_warn(dev,
|
||||||
|
"dynamic-dax with pre-populated page map\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pgmap = devm_kzalloc(dev,
|
||||||
|
struct_size(pgmap, ranges, dev_dax->nr_range - 1),
|
||||||
|
GFP_KERNEL);
|
||||||
if (!pgmap)
|
if (!pgmap)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
pgmap->nr_range = dev_dax->nr_range;
|
pgmap->nr_range = dev_dax->nr_range;
|
||||||
|
dev_dax->pgmap = pgmap;
|
||||||
|
|
||||||
|
for (i = 0; i < dev_dax->nr_range; i++) {
|
||||||
|
struct range *range = &dev_dax->ranges[i].range;
|
||||||
|
pgmap->ranges[i] = *range;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < dev_dax->nr_range; i++) {
|
for (i = 0; i < dev_dax->nr_range; i++) {
|
||||||
@ -420,12 +446,12 @@ int dev_dax_probe(struct dev_dax *dev_dax)
|
|||||||
i, range->start, range->end);
|
i, range->start, range->end);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
/* don't update the range for static pgmap */
|
|
||||||
if (!dev_dax->pgmap)
|
|
||||||
pgmap->ranges[i] = *range;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pgmap->type = MEMORY_DEVICE_GENERIC;
|
pgmap->type = MEMORY_DEVICE_GENERIC;
|
||||||
|
if (dev_dax->align > PAGE_SIZE)
|
||||||
|
pgmap->vmemmap_shift =
|
||||||
|
order_base_2(dev_dax->align >> PAGE_SHIFT);
|
||||||
addr = devm_memremap_pages(dev, pgmap);
|
addr = devm_memremap_pages(dev, pgmap);
|
||||||
if (IS_ERR(addr))
|
if (IS_ERR(addr))
|
||||||
return PTR_ERR(addr);
|
return PTR_ERR(addr);
|
||||||
|
@ -98,15 +98,14 @@ static int siw_create_tx_threads(void)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
siw_tx_thread[cpu] =
|
siw_tx_thread[cpu] =
|
||||||
kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
|
kthread_run_on_cpu(siw_run_sq,
|
||||||
"siw_tx/%d", cpu);
|
(unsigned long *)(long)cpu,
|
||||||
|
cpu, "siw_tx/%u");
|
||||||
if (IS_ERR(siw_tx_thread[cpu])) {
|
if (IS_ERR(siw_tx_thread[cpu])) {
|
||||||
siw_tx_thread[cpu] = NULL;
|
siw_tx_thread[cpu] = NULL;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
kthread_bind(siw_tx_thread[cpu], cpu);
|
|
||||||
|
|
||||||
wake_up_process(siw_tx_thread[cpu]);
|
|
||||||
assigned++;
|
assigned++;
|
||||||
}
|
}
|
||||||
return assigned;
|
return assigned;
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <linux/serial_core.h>
|
#include <linux/serial_core.h>
|
||||||
#include <linux/sysfs.h>
|
#include <linux/sysfs.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
|
#include <linux/kmemleak.h>
|
||||||
|
|
||||||
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
|
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
@ -524,9 +525,12 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
|
|||||||
size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
||||||
|
|
||||||
if (size &&
|
if (size &&
|
||||||
early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
|
early_init_dt_reserve_memory_arch(base, size, nomap) == 0) {
|
||||||
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
|
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
|
||||||
uname, &base, (unsigned long)(size / SZ_1M));
|
uname, &base, (unsigned long)(size / SZ_1M));
|
||||||
|
if (!nomap)
|
||||||
|
kmemleak_alloc_phys(base, size, 0, 0);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
|
pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
|
||||||
uname, &base, (unsigned long)(size / SZ_1M));
|
uname, &base, (unsigned long)(size / SZ_1M));
|
||||||
|
@ -27,8 +27,8 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/fiemap.h>
|
#include <linux/fiemap.h>
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include <linux/iomap.h>
|
#include <linux/iomap.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include "ext4_jbd2.h"
|
#include "ext4_jbd2.h"
|
||||||
#include "ext4_extents.h"
|
#include "ext4_extents.h"
|
||||||
#include "xattr.h"
|
#include "xattr.h"
|
||||||
@ -4404,8 +4404,7 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)
|
|||||||
err = ext4_es_remove_extent(inode, last_block,
|
err = ext4_es_remove_extent(inode, last_block,
|
||||||
EXT_MAX_BLOCKS - last_block);
|
EXT_MAX_BLOCKS - last_block);
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
cond_resched();
|
memalloc_retry_wait(GFP_ATOMIC);
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
if (err)
|
if (err)
|
||||||
@ -4413,8 +4412,7 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)
|
|||||||
retry_remove_space:
|
retry_remove_space:
|
||||||
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
cond_resched();
|
memalloc_retry_wait(GFP_ATOMIC);
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
||||||
goto retry_remove_space;
|
goto retry_remove_space;
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <linux/iomap.h>
|
#include <linux/iomap.h>
|
||||||
#include <linux/fiemap.h>
|
#include <linux/fiemap.h>
|
||||||
#include <linux/iversion.h>
|
#include <linux/iversion.h>
|
||||||
#include <linux/backing-dev.h>
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include "ext4_jbd2.h"
|
#include "ext4_jbd2.h"
|
||||||
#include "ext4.h"
|
#include "ext4.h"
|
||||||
@ -1929,8 +1929,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
|
|||||||
retry:
|
retry:
|
||||||
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
cond_resched();
|
memalloc_retry_wait(GFP_ATOMIC);
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/backing-dev.h>
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include "ext4_jbd2.h"
|
#include "ext4_jbd2.h"
|
||||||
#include "xattr.h"
|
#include "xattr.h"
|
||||||
@ -523,12 +523,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
|||||||
ret = PTR_ERR(bounce_page);
|
ret = PTR_ERR(bounce_page);
|
||||||
if (ret == -ENOMEM &&
|
if (ret == -ENOMEM &&
|
||||||
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
|
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
|
||||||
gfp_flags = GFP_NOFS;
|
gfp_t new_gfp_flags = GFP_NOFS;
|
||||||
if (io->io_bio)
|
if (io->io_bio)
|
||||||
ext4_io_submit(io);
|
ext4_io_submit(io);
|
||||||
else
|
else
|
||||||
gfp_flags |= __GFP_NOFAIL;
|
new_gfp_flags |= __GFP_NOFAIL;
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
memalloc_retry_wait(gfp_flags);
|
||||||
|
gfp_flags = new_gfp_flags;
|
||||||
goto retry_encrypt;
|
goto retry_encrypt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/mpage.h>
|
#include <linux/mpage.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include <linux/pagevec.h>
|
#include <linux/pagevec.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/bio.h>
|
#include <linux/bio.h>
|
||||||
@ -2542,7 +2542,7 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
|
|||||||
/* flush pending IOs and wait for a while in the ENOMEM case */
|
/* flush pending IOs and wait for a while in the ENOMEM case */
|
||||||
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
|
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
|
||||||
f2fs_flush_merged_writes(fio->sbi);
|
f2fs_flush_merged_writes(fio->sbi);
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
gfp_flags |= __GFP_NOFAIL;
|
gfp_flags |= __GFP_NOFAIL;
|
||||||
goto retry_encrypt;
|
goto retry_encrypt;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
*/
|
*/
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
@ -15,6 +14,7 @@
|
|||||||
#include <linux/freezer.h>
|
#include <linux/freezer.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include "f2fs.h"
|
#include "f2fs.h"
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
@ -1375,8 +1375,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
|
|||||||
if (err) {
|
if (err) {
|
||||||
clear_page_private_gcing(page);
|
clear_page_private_gcing(page);
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
if (is_dirty)
|
if (is_dirty)
|
||||||
|
@ -8,8 +8,8 @@
|
|||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include "f2fs.h"
|
#include "f2fs.h"
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
@ -562,7 +562,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
|
|||||||
inode = f2fs_iget(sb, ino);
|
inode = f2fs_iget(sb, ino);
|
||||||
if (IS_ERR(inode)) {
|
if (IS_ERR(inode)) {
|
||||||
if (PTR_ERR(inode) == -ENOMEM) {
|
if (PTR_ERR(inode) == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
#include <linux/mpage.h>
|
#include <linux/mpage.h>
|
||||||
#include <linux/backing-dev.h>
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/pagevec.h>
|
#include <linux/pagevec.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
@ -2750,7 +2750,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
|
|||||||
retry:
|
retry:
|
||||||
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
|
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
|
||||||
if (!ipage) {
|
if (!ipage) {
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <asm/unaligned.h>
|
#include <asm/unaligned.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include "f2fs.h"
|
#include "f2fs.h"
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
#include "segment.h"
|
#include "segment.h"
|
||||||
@ -587,7 +588,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
|
|||||||
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
|
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
goto retry_dn;
|
goto retry_dn;
|
||||||
}
|
}
|
||||||
goto out;
|
goto out;
|
||||||
@ -670,8 +671,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
|
|||||||
err = check_index_in_prev_nodes(sbi, dest, &dn);
|
err = check_index_in_prev_nodes(sbi, dest, &dn);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
goto retry_prev;
|
goto retry_prev;
|
||||||
}
|
}
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <linux/f2fs_fs.h>
|
#include <linux/f2fs_fs.h>
|
||||||
#include <linux/bio.h>
|
#include <linux/bio.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/prefetch.h>
|
#include <linux/prefetch.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
@ -245,9 +246,7 @@ static int __revoke_inmem_pages(struct inode *inode,
|
|||||||
LOOKUP_NODE);
|
LOOKUP_NODE);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
cond_resched();
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
err = -EAGAIN;
|
err = -EAGAIN;
|
||||||
@ -424,9 +423,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode)
|
|||||||
err = f2fs_do_write_data_page(&fio);
|
err = f2fs_do_write_data_page(&fio);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
cond_resched();
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/statfs.h>
|
#include <linux/statfs.h>
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/parser.h>
|
#include <linux/parser.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
@ -2415,8 +2415,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
|
|||||||
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
|
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
|
||||||
if (IS_ERR(page)) {
|
if (IS_ERR(page)) {
|
||||||
if (PTR_ERR(page) == -ENOMEM) {
|
if (PTR_ERR(page) == -ENOMEM) {
|
||||||
congestion_wait(BLK_RW_ASYNC,
|
memalloc_retry_wait(GFP_NOFS);
|
||||||
DEFAULT_IO_TIMEOUT);
|
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
|
||||||
|
@ -409,10 +409,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
|
|||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* end == 0 indicates that the entire range after
|
* end == 0 indicates that the entire range after start should be
|
||||||
* start should be unmapped.
|
* unmapped. Note, end is exclusive, whereas the interval tree takes
|
||||||
|
* an inclusive "last".
|
||||||
*/
|
*/
|
||||||
vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
|
vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
|
||||||
unsigned long v_offset;
|
unsigned long v_offset;
|
||||||
unsigned long v_end;
|
unsigned long v_end;
|
||||||
|
|
||||||
|
49
fs/inode.c
49
fs/inode.c
@ -526,6 +526,55 @@ void __remove_inode_hash(struct inode *inode)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__remove_inode_hash);
|
EXPORT_SYMBOL(__remove_inode_hash);
|
||||||
|
|
||||||
|
void dump_mapping(const struct address_space *mapping)
|
||||||
|
{
|
||||||
|
struct inode *host;
|
||||||
|
const struct address_space_operations *a_ops;
|
||||||
|
struct hlist_node *dentry_first;
|
||||||
|
struct dentry *dentry_ptr;
|
||||||
|
struct dentry dentry;
|
||||||
|
unsigned long ino;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If mapping is an invalid pointer, we don't want to crash
|
||||||
|
* accessing it, so probe everything depending on it carefully.
|
||||||
|
*/
|
||||||
|
if (get_kernel_nofault(host, &mapping->host) ||
|
||||||
|
get_kernel_nofault(a_ops, &mapping->a_ops)) {
|
||||||
|
pr_warn("invalid mapping:%px\n", mapping);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!host) {
|
||||||
|
pr_warn("aops:%ps\n", a_ops);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
|
||||||
|
get_kernel_nofault(ino, &host->i_ino)) {
|
||||||
|
pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!dentry_first) {
|
||||||
|
pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
|
||||||
|
if (get_kernel_nofault(dentry, dentry_ptr)) {
|
||||||
|
pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
|
||||||
|
a_ops, ino, dentry_ptr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if dentry is corrupted, the %pd handler may still crash,
|
||||||
|
* but it's unlikely that we reach here with a corrupt mapping
|
||||||
|
*/
|
||||||
|
pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
|
||||||
|
}
|
||||||
|
|
||||||
void clear_inode(struct inode *inode)
|
void clear_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -430,7 +430,7 @@ static int ioctl_file_dedupe_range(struct file *file,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = offsetof(struct file_dedupe_range __user, info[count]);
|
size = offsetof(struct file_dedupe_range, info[count]);
|
||||||
if (size > PAGE_SIZE) {
|
if (size > PAGE_SIZE) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
/**
|
/*
|
||||||
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
|
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
|
* Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
|
||||||
|
@ -2040,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
|
|||||||
int i, idx;
|
int i, idx;
|
||||||
struct ocfs2_extent_list *el, *left_el, *right_el;
|
struct ocfs2_extent_list *el, *left_el, *right_el;
|
||||||
struct ocfs2_extent_rec *left_rec, *right_rec;
|
struct ocfs2_extent_rec *left_rec, *right_rec;
|
||||||
struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
|
struct buffer_head *root_bh;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the counts and position values within all the
|
* Update the counts and position values within all the
|
||||||
|
@ -1799,20 +1799,20 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
|
|||||||
*/
|
*/
|
||||||
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
|
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
|
||||||
cluster_of_pages, mmap_page);
|
cluster_of_pages, mmap_page);
|
||||||
if (ret && ret != -EAGAIN) {
|
if (ret) {
|
||||||
mlog_errno(ret);
|
/*
|
||||||
goto out_quota;
|
* ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
|
||||||
}
|
* the target page. In this case, we exit with no error and no target
|
||||||
|
* page. This will trigger the caller, page_mkwrite(), to re-try
|
||||||
|
* the operation.
|
||||||
|
*/
|
||||||
|
if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) {
|
||||||
|
BUG_ON(wc->w_target_page);
|
||||||
|
ret = 0;
|
||||||
|
goto out_quota;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
mlog_errno(ret);
|
||||||
* ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
|
|
||||||
* the target page. In this case, we exit with no error and no target
|
|
||||||
* page. This will trigger the caller, page_mkwrite(), to re-try
|
|
||||||
* the operation.
|
|
||||||
*/
|
|
||||||
if (ret == -EAGAIN) {
|
|
||||||
BUG_ON(wc->w_target_page);
|
|
||||||
ret = 0;
|
|
||||||
goto out_quota;
|
goto out_quota;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,7 +120,8 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
|
|||||||
define_mask(KTHREAD),
|
define_mask(KTHREAD),
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
|
static struct attribute *mlog_default_attrs[MLOG_MAX_BITS] = {NULL, };
|
||||||
|
ATTRIBUTE_GROUPS(mlog_default);
|
||||||
|
|
||||||
static ssize_t mlog_show(struct kobject *obj, struct attribute *attr,
|
static ssize_t mlog_show(struct kobject *obj, struct attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
@ -144,8 +145,8 @@ static const struct sysfs_ops mlog_attr_ops = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static struct kobj_type mlog_ktype = {
|
static struct kobj_type mlog_ktype = {
|
||||||
.default_attrs = mlog_attr_ptrs,
|
.default_groups = mlog_default_groups,
|
||||||
.sysfs_ops = &mlog_attr_ops,
|
.sysfs_ops = &mlog_attr_ops,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kset mlog_kset = {
|
static struct kset mlog_kset = {
|
||||||
@ -157,10 +158,10 @@ int mlog_sys_init(struct kset *o2cb_kset)
|
|||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
while (mlog_attrs[i].attr.mode) {
|
while (mlog_attrs[i].attr.mode) {
|
||||||
mlog_attr_ptrs[i] = &mlog_attrs[i].attr;
|
mlog_default_attrs[i] = &mlog_attrs[i].attr;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
mlog_attr_ptrs[i] = NULL;
|
mlog_default_attrs[i] = NULL;
|
||||||
|
|
||||||
kobject_set_name(&mlog_kset.kobj, "logmask");
|
kobject_set_name(&mlog_kset.kobj, "logmask");
|
||||||
mlog_kset.kobj.kset = o2cb_kset;
|
mlog_kset.kobj.kset = o2cb_kset;
|
||||||
|
@ -3343,7 +3343,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
|
|||||||
struct ocfs2_dir_entry *de, *last_de = NULL;
|
struct ocfs2_dir_entry *de, *last_de = NULL;
|
||||||
char *de_buf, *limit;
|
char *de_buf, *limit;
|
||||||
unsigned long offset = 0;
|
unsigned long offset = 0;
|
||||||
unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
|
unsigned int rec_len, new_rec_len, free_space;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This calculates how many free bytes we'd have in block zero, should
|
* This calculates how many free bytes we'd have in block zero, should
|
||||||
|
@ -94,6 +94,7 @@ static struct attribute *ocfs2_filecheck_attrs[] = {
|
|||||||
&ocfs2_filecheck_attr_set.attr,
|
&ocfs2_filecheck_attr_set.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
ATTRIBUTE_GROUPS(ocfs2_filecheck);
|
||||||
|
|
||||||
static void ocfs2_filecheck_release(struct kobject *kobj)
|
static void ocfs2_filecheck_release(struct kobject *kobj)
|
||||||
{
|
{
|
||||||
@ -138,7 +139,7 @@ static const struct sysfs_ops ocfs2_filecheck_ops = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static struct kobj_type ocfs2_ktype_filecheck = {
|
static struct kobj_type ocfs2_ktype_filecheck = {
|
||||||
.default_attrs = ocfs2_filecheck_attrs,
|
.default_groups = ocfs2_filecheck_groups,
|
||||||
.sysfs_ops = &ocfs2_filecheck_ops,
|
.sysfs_ops = &ocfs2_filecheck_ops,
|
||||||
.release = ocfs2_filecheck_release,
|
.release = ocfs2_filecheck_release,
|
||||||
};
|
};
|
||||||
|
@ -1669,8 +1669,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
|
|||||||
status = jbd2_journal_load(journal);
|
status = jbd2_journal_load(journal);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
if (!igrab(inode))
|
BUG_ON(!igrab(inode));
|
||||||
BUG();
|
|
||||||
jbd2_journal_destroy(journal);
|
jbd2_journal_destroy(journal);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -1699,8 +1698,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
|
|||||||
if (status < 0)
|
if (status < 0)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
|
||||||
if (!igrab(inode))
|
BUG_ON(!igrab(inode));
|
||||||
BUG();
|
|
||||||
|
|
||||||
jbd2_journal_destroy(journal);
|
jbd2_journal_destroy(journal);
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/pagewalk.h>
|
#include <linux/pagewalk.h>
|
||||||
#include <linux/vmacache.h>
|
#include <linux/vmacache.h>
|
||||||
|
#include <linux/mm_inline.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/huge_mm.h>
|
#include <linux/huge_mm.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
@ -308,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
|||||||
|
|
||||||
name = arch_vma_name(vma);
|
name = arch_vma_name(vma);
|
||||||
if (!name) {
|
if (!name) {
|
||||||
|
const char *anon_name;
|
||||||
|
|
||||||
if (!mm) {
|
if (!mm) {
|
||||||
name = "[vdso]";
|
name = "[vdso]";
|
||||||
goto done;
|
goto done;
|
||||||
@ -319,8 +322,16 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_stack(vma))
|
if (is_stack(vma)) {
|
||||||
name = "[stack]";
|
name = "[stack]";
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
anon_name = vma_anon_name(vma);
|
||||||
|
if (anon_name) {
|
||||||
|
seq_pad(m, ' ');
|
||||||
|
seq_printf(m, "[anon:%s]", anon_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
#include <linux/xattr.h>
|
#include <linux/xattr.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
|
|
||||||
#include "squashfs_fs.h"
|
#include "squashfs_fs.h"
|
||||||
#include "squashfs_fs_sb.h"
|
#include "squashfs_fs_sb.h"
|
||||||
@ -112,6 +113,24 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
|
|||||||
return decompressor;
|
return decompressor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int squashfs_bdi_init(struct super_block *sb)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
unsigned int major = MAJOR(sb->s_dev);
|
||||||
|
unsigned int minor = MINOR(sb->s_dev);
|
||||||
|
|
||||||
|
bdi_put(sb->s_bdi);
|
||||||
|
sb->s_bdi = &noop_backing_dev_info;
|
||||||
|
|
||||||
|
err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
sb->s_bdi->ra_pages = 0;
|
||||||
|
sb->s_bdi->io_pages = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||||
{
|
{
|
||||||
@ -127,6 +146,20 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||||||
|
|
||||||
TRACE("Entered squashfs_fill_superblock\n");
|
TRACE("Entered squashfs_fill_superblock\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* squashfs provides 'backing_dev_info' in order to disable read-ahead. For
|
||||||
|
* squashfs, I/O is not deferred, it is done immediately in readpage,
|
||||||
|
* which means the user would always have to wait their own I/O. So the effect
|
||||||
|
* of readahead is very weak for squashfs. squashfs_bdi_init will set
|
||||||
|
* sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for
|
||||||
|
* squashfs.
|
||||||
|
*/
|
||||||
|
err = squashfs_bdi_init(sb);
|
||||||
|
if (err) {
|
||||||
|
errorf(fc, "squashfs init bdi failed");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
|
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
|
||||||
if (sb->s_fs_info == NULL) {
|
if (sb->s_fs_info == NULL) {
|
||||||
ERROR("Failed to allocate squashfs_sb_info\n");
|
ERROR("Failed to allocate squashfs_sb_info\n");
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
#include <linux/mm_inline.h>
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/mmu_notifier.h>
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
@ -877,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
new_flags, vma->anon_vma,
|
new_flags, vma->anon_vma,
|
||||||
vma->vm_file, vma->vm_pgoff,
|
vma->vm_file, vma->vm_pgoff,
|
||||||
vma_policy(vma),
|
vma_policy(vma),
|
||||||
NULL_VM_UFFD_CTX);
|
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||||
if (prev)
|
if (prev)
|
||||||
vma = prev;
|
vma = prev;
|
||||||
else
|
else
|
||||||
@ -1436,7 +1437,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
||||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||||
vma_policy(vma),
|
vma_policy(vma),
|
||||||
((struct vm_userfaultfd_ctx){ ctx }));
|
((struct vm_userfaultfd_ctx){ ctx }),
|
||||||
|
vma_anon_name(vma));
|
||||||
if (prev) {
|
if (prev) {
|
||||||
vma = prev;
|
vma = prev;
|
||||||
goto next;
|
goto next;
|
||||||
@ -1613,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
||||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||||
vma_policy(vma),
|
vma_policy(vma),
|
||||||
NULL_VM_UFFD_CTX);
|
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||||
if (prev) {
|
if (prev) {
|
||||||
vma = prev;
|
vma = prev;
|
||||||
goto next;
|
goto next;
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
* All Rights Reserved.
|
* All Rights Reserved.
|
||||||
*/
|
*/
|
||||||
#include "xfs.h"
|
#include "xfs.h"
|
||||||
#include <linux/backing-dev.h>
|
|
||||||
#include "xfs_message.h"
|
#include "xfs_message.h"
|
||||||
#include "xfs_trace.h"
|
#include "xfs_trace.h"
|
||||||
|
|
||||||
@ -26,6 +25,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
|
|||||||
"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
|
"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
|
||||||
current->comm, current->pid,
|
current->comm, current->pid,
|
||||||
(unsigned int)size, __func__, lflags);
|
(unsigned int)size, __func__, lflags);
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
memalloc_retry_wait(lflags);
|
||||||
} while (1);
|
} while (1);
|
||||||
}
|
}
|
||||||
|
@ -394,7 +394,7 @@ xfs_buf_alloc_pages(
|
|||||||
}
|
}
|
||||||
|
|
||||||
XFS_STATS_INC(bp->b_mount, xb_page_retries);
|
XFS_STATS_INC(bp->b_mount, xb_page_retries);
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ / 50);
|
memalloc_retry_wait(gfp_mask);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -295,7 +295,6 @@ extern bool libceph_compatible(void *data);
|
|||||||
|
|
||||||
extern const char *ceph_msg_type_name(int type);
|
extern const char *ceph_msg_type_name(int type);
|
||||||
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
|
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
|
||||||
extern void *ceph_kvmalloc(size_t size, gfp_t flags);
|
|
||||||
|
|
||||||
struct fs_parameter;
|
struct fs_parameter;
|
||||||
struct fc_log;
|
struct fc_log;
|
||||||
|
@ -11,12 +11,19 @@
|
|||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/time64.h>
|
#include <linux/time64.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
#include <linux/random.h>
|
||||||
|
|
||||||
/* Minimal region size. Every damon_region is aligned by this. */
|
/* Minimal region size. Every damon_region is aligned by this. */
|
||||||
#define DAMON_MIN_REGION PAGE_SIZE
|
#define DAMON_MIN_REGION PAGE_SIZE
|
||||||
/* Max priority score for DAMON-based operation schemes */
|
/* Max priority score for DAMON-based operation schemes */
|
||||||
#define DAMOS_MAX_SCORE (99)
|
#define DAMOS_MAX_SCORE (99)
|
||||||
|
|
||||||
|
/* Get a random number in [l, r) */
|
||||||
|
static inline unsigned long damon_rand(unsigned long l, unsigned long r)
|
||||||
|
{
|
||||||
|
return l + prandom_u32_max(r - l);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct damon_addr_range - Represents an address region of [@start, @end).
|
* struct damon_addr_range - Represents an address region of [@start, @end).
|
||||||
* @start: Start address of the region (inclusive).
|
* @start: Start address of the region (inclusive).
|
||||||
@ -185,6 +192,22 @@ struct damos_watermarks {
|
|||||||
bool activated;
|
bool activated;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct damos_stat - Statistics on a given scheme.
|
||||||
|
* @nr_tried: Total number of regions that the scheme is tried to be applied.
|
||||||
|
* @sz_tried: Total size of regions that the scheme is tried to be applied.
|
||||||
|
* @nr_applied: Total number of regions that the scheme is applied.
|
||||||
|
* @sz_applied: Total size of regions that the scheme is applied.
|
||||||
|
* @qt_exceeds: Total number of times the quota of the scheme has exceeded.
|
||||||
|
*/
|
||||||
|
struct damos_stat {
|
||||||
|
unsigned long nr_tried;
|
||||||
|
unsigned long sz_tried;
|
||||||
|
unsigned long nr_applied;
|
||||||
|
unsigned long sz_applied;
|
||||||
|
unsigned long qt_exceeds;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
|
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
|
||||||
* @min_sz_region: Minimum size of target regions.
|
* @min_sz_region: Minimum size of target regions.
|
||||||
@ -196,8 +219,7 @@ struct damos_watermarks {
|
|||||||
* @action: &damo_action to be applied to the target regions.
|
* @action: &damo_action to be applied to the target regions.
|
||||||
* @quota: Control the aggressiveness of this scheme.
|
* @quota: Control the aggressiveness of this scheme.
|
||||||
* @wmarks: Watermarks for automated (in)activation of this scheme.
|
* @wmarks: Watermarks for automated (in)activation of this scheme.
|
||||||
* @stat_count: Total number of regions that this scheme is applied.
|
* @stat: Statistics of this scheme.
|
||||||
* @stat_sz: Total size of regions that this scheme is applied.
|
|
||||||
* @list: List head for siblings.
|
* @list: List head for siblings.
|
||||||
*
|
*
|
||||||
* For each aggregation interval, DAMON finds regions which fit in the
|
* For each aggregation interval, DAMON finds regions which fit in the
|
||||||
@ -228,8 +250,7 @@ struct damos {
|
|||||||
enum damos_action action;
|
enum damos_action action;
|
||||||
struct damos_quota quota;
|
struct damos_quota quota;
|
||||||
struct damos_watermarks wmarks;
|
struct damos_watermarks wmarks;
|
||||||
unsigned long stat_count;
|
struct damos_stat stat;
|
||||||
unsigned long stat_sz;
|
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -274,7 +295,8 @@ struct damon_ctx;
|
|||||||
* as an integer in [0, &DAMOS_MAX_SCORE].
|
* as an integer in [0, &DAMOS_MAX_SCORE].
|
||||||
* @apply_scheme is called from @kdamond when a region for user provided
|
* @apply_scheme is called from @kdamond when a region for user provided
|
||||||
* DAMON-based operation scheme is found. It should apply the scheme's action
|
* DAMON-based operation scheme is found. It should apply the scheme's action
|
||||||
* to the region. This is not used for &DAMON_ARBITRARY_TARGET case.
|
* to the region and return bytes of the region that the action is successfully
|
||||||
|
* applied.
|
||||||
* @target_valid should check whether the target is still valid for the
|
* @target_valid should check whether the target is still valid for the
|
||||||
* monitoring.
|
* monitoring.
|
||||||
* @cleanup is called from @kdamond just before its termination.
|
* @cleanup is called from @kdamond just before its termination.
|
||||||
@ -288,8 +310,9 @@ struct damon_primitive {
|
|||||||
int (*get_scheme_score)(struct damon_ctx *context,
|
int (*get_scheme_score)(struct damon_ctx *context,
|
||||||
struct damon_target *t, struct damon_region *r,
|
struct damon_target *t, struct damon_region *r,
|
||||||
struct damos *scheme);
|
struct damos *scheme);
|
||||||
int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t,
|
unsigned long (*apply_scheme)(struct damon_ctx *context,
|
||||||
struct damon_region *r, struct damos *scheme);
|
struct damon_target *t, struct damon_region *r,
|
||||||
|
struct damos *scheme);
|
||||||
bool (*target_valid)(void *target);
|
bool (*target_valid)(void *target);
|
||||||
void (*cleanup)(struct damon_ctx *context);
|
void (*cleanup)(struct damon_ctx *context);
|
||||||
};
|
};
|
||||||
@ -392,14 +415,20 @@ struct damon_ctx {
|
|||||||
struct list_head schemes;
|
struct list_head schemes;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define damon_next_region(r) \
|
static inline struct damon_region *damon_next_region(struct damon_region *r)
|
||||||
(container_of(r->list.next, struct damon_region, list))
|
{
|
||||||
|
return container_of(r->list.next, struct damon_region, list);
|
||||||
|
}
|
||||||
|
|
||||||
#define damon_prev_region(r) \
|
static inline struct damon_region *damon_prev_region(struct damon_region *r)
|
||||||
(container_of(r->list.prev, struct damon_region, list))
|
{
|
||||||
|
return container_of(r->list.prev, struct damon_region, list);
|
||||||
|
}
|
||||||
|
|
||||||
#define damon_last_region(t) \
|
static inline struct damon_region *damon_last_region(struct damon_target *t)
|
||||||
(list_last_entry(&t->regions_list, struct damon_region, list))
|
{
|
||||||
|
return list_last_entry(&t->regions_list, struct damon_region, list);
|
||||||
|
}
|
||||||
|
|
||||||
#define damon_for_each_region(r, t) \
|
#define damon_for_each_region(r, t) \
|
||||||
list_for_each_entry(r, &t->regions_list, list)
|
list_for_each_entry(r, &t->regions_list, list)
|
||||||
@ -422,9 +451,18 @@ struct damon_ctx {
|
|||||||
#ifdef CONFIG_DAMON
|
#ifdef CONFIG_DAMON
|
||||||
|
|
||||||
struct damon_region *damon_new_region(unsigned long start, unsigned long end);
|
struct damon_region *damon_new_region(unsigned long start, unsigned long end);
|
||||||
inline void damon_insert_region(struct damon_region *r,
|
|
||||||
|
/*
|
||||||
|
* Add a region between two other regions
|
||||||
|
*/
|
||||||
|
static inline void damon_insert_region(struct damon_region *r,
|
||||||
struct damon_region *prev, struct damon_region *next,
|
struct damon_region *prev, struct damon_region *next,
|
||||||
struct damon_target *t);
|
struct damon_target *t)
|
||||||
|
{
|
||||||
|
__list_add(&r->list, &prev->list, &next->list);
|
||||||
|
t->nr_regions++;
|
||||||
|
}
|
||||||
|
|
||||||
void damon_add_region(struct damon_region *r, struct damon_target *t);
|
void damon_add_region(struct damon_region *r, struct damon_target *t);
|
||||||
void damon_destroy_region(struct damon_region *r, struct damon_target *t);
|
void damon_destroy_region(struct damon_region *r, struct damon_target *t);
|
||||||
|
|
||||||
@ -461,34 +499,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
|
|||||||
#endif /* CONFIG_DAMON */
|
#endif /* CONFIG_DAMON */
|
||||||
|
|
||||||
#ifdef CONFIG_DAMON_VADDR
|
#ifdef CONFIG_DAMON_VADDR
|
||||||
|
|
||||||
/* Monitoring primitives for virtual memory address spaces */
|
|
||||||
void damon_va_init(struct damon_ctx *ctx);
|
|
||||||
void damon_va_update(struct damon_ctx *ctx);
|
|
||||||
void damon_va_prepare_access_checks(struct damon_ctx *ctx);
|
|
||||||
unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
|
|
||||||
bool damon_va_target_valid(void *t);
|
bool damon_va_target_valid(void *t);
|
||||||
void damon_va_cleanup(struct damon_ctx *ctx);
|
|
||||||
int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t,
|
|
||||||
struct damon_region *r, struct damos *scheme);
|
|
||||||
int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
|
|
||||||
struct damon_region *r, struct damos *scheme);
|
|
||||||
void damon_va_set_primitives(struct damon_ctx *ctx);
|
void damon_va_set_primitives(struct damon_ctx *ctx);
|
||||||
|
|
||||||
#endif /* CONFIG_DAMON_VADDR */
|
#endif /* CONFIG_DAMON_VADDR */
|
||||||
|
|
||||||
#ifdef CONFIG_DAMON_PADDR
|
#ifdef CONFIG_DAMON_PADDR
|
||||||
|
|
||||||
/* Monitoring primitives for the physical memory address space */
|
|
||||||
void damon_pa_prepare_access_checks(struct damon_ctx *ctx);
|
|
||||||
unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
|
|
||||||
bool damon_pa_target_valid(void *t);
|
bool damon_pa_target_valid(void *t);
|
||||||
int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t,
|
|
||||||
struct damon_region *r, struct damos *scheme);
|
|
||||||
int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
|
|
||||||
struct damon_region *r, struct damos *scheme);
|
|
||||||
void damon_pa_set_primitives(struct damon_ctx *ctx);
|
void damon_pa_set_primitives(struct damon_ctx *ctx);
|
||||||
|
|
||||||
#endif /* CONFIG_DAMON_PADDR */
|
#endif /* CONFIG_DAMON_PADDR */
|
||||||
|
|
||||||
#endif /* _DAMON_H */
|
#endif /* _DAMON_H */
|
||||||
|
@ -3093,6 +3093,7 @@ extern void unlock_new_inode(struct inode *);
|
|||||||
extern void discard_new_inode(struct inode *);
|
extern void discard_new_inode(struct inode *);
|
||||||
extern unsigned int get_next_ino(void);
|
extern unsigned int get_next_ino(void);
|
||||||
extern void evict_inodes(struct super_block *sb);
|
extern void evict_inodes(struct super_block *sb);
|
||||||
|
void dump_mapping(const struct address_space *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Userspace may rely on the the inode number being non-zero. For example, glibc
|
* Userspace may rely on the the inode number being non-zero. For example, glibc
|
||||||
|
@ -302,7 +302,9 @@ struct vm_area_struct;
|
|||||||
* lowest zone as a type of emergency reserve.
|
* lowest zone as a type of emergency reserve.
|
||||||
*
|
*
|
||||||
* %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
|
* %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
|
||||||
* address.
|
* address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
|
||||||
|
* because the DMA32 kmalloc cache array is not implemented.
|
||||||
|
* (Reason: there is no such user in kernel).
|
||||||
*
|
*
|
||||||
* %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
|
* %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
|
||||||
* do not need to be directly accessible by the kernel but that cannot
|
* do not need to be directly accessible by the kernel but that cannot
|
||||||
@ -598,9 +600,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order);
|
|||||||
struct folio *folio_alloc(gfp_t gfp, unsigned order);
|
struct folio *folio_alloc(gfp_t gfp, unsigned order);
|
||||||
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
|
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
|
||||||
struct vm_area_struct *vma, unsigned long addr,
|
struct vm_area_struct *vma, unsigned long addr,
|
||||||
int node, bool hugepage);
|
bool hugepage);
|
||||||
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
|
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
|
||||||
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
|
alloc_pages_vma(gfp_mask, order, vma, addr, true)
|
||||||
#else
|
#else
|
||||||
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
|
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
|
||||||
{
|
{
|
||||||
@ -610,14 +612,14 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
|
|||||||
{
|
{
|
||||||
return __folio_alloc_node(gfp, order, numa_node_id());
|
return __folio_alloc_node(gfp, order, numa_node_id());
|
||||||
}
|
}
|
||||||
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
|
#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\
|
||||||
alloc_pages(gfp_mask, order)
|
alloc_pages(gfp_mask, order)
|
||||||
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
|
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
|
||||||
alloc_pages(gfp_mask, order)
|
alloc_pages(gfp_mask, order)
|
||||||
#endif
|
#endif
|
||||||
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
|
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
|
||||||
#define alloc_page_vma(gfp_mask, vma, addr) \
|
#define alloc_page_vma(gfp_mask, vma, addr) \
|
||||||
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
|
alloc_pages_vma(gfp_mask, 0, vma, addr, false)
|
||||||
|
|
||||||
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
|
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
|
||||||
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
|
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
|
||||||
|
@ -622,8 +622,8 @@ struct hstate {
|
|||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CGROUP_HUGETLB
|
#ifdef CONFIG_CGROUP_HUGETLB
|
||||||
/* cgroup control files */
|
/* cgroup control files */
|
||||||
struct cftype cgroup_files_dfl[7];
|
struct cftype cgroup_files_dfl[8];
|
||||||
struct cftype cgroup_files_legacy[9];
|
struct cftype cgroup_files_legacy[10];
|
||||||
#endif
|
#endif
|
||||||
char name[HSTATE_NAME_LEN];
|
char name[HSTATE_NAME_LEN];
|
||||||
};
|
};
|
||||||
|
@ -36,6 +36,11 @@ enum hugetlb_memory_event {
|
|||||||
HUGETLB_NR_MEMORY_EVENTS,
|
HUGETLB_NR_MEMORY_EVENTS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct hugetlb_cgroup_per_node {
|
||||||
|
/* hugetlb usage in pages over all hstates. */
|
||||||
|
unsigned long usage[HUGE_MAX_HSTATE];
|
||||||
|
};
|
||||||
|
|
||||||
struct hugetlb_cgroup {
|
struct hugetlb_cgroup {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
|
|
||||||
@ -57,6 +62,8 @@ struct hugetlb_cgroup {
|
|||||||
|
|
||||||
/* Handle for "hugetlb.events.local" */
|
/* Handle for "hugetlb.events.local" */
|
||||||
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
|
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
|
||||||
|
|
||||||
|
struct hugetlb_cgroup_per_node *nodeinfo[];
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct hugetlb_cgroup *
|
static inline struct hugetlb_cgroup *
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user