mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 18:08:20 +00:00
- Yosry Ahmed brought back some cgroup v1 stats in OOM logs.
- Yosry has also eliminated cgroup's atomic rstat flushing. - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability. - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning. - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface. - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree. - Johannes Weiner has done some cleanup work on the compaction code. - David Hildenbrand has contributed additional selftests for get_user_pages(). - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code. - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code. - Huang Ying has done some maintenance on the swap code's usage of device refcounting. - Christoph Hellwig has some cleanups for the filemap/directio code. - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses. - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings. - John Hubbard has a series of fixes to the MM selftesting code. - ZhangPeng continues the folio conversion campaign. - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock. - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8. - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management. - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code. - Vishal Moola also has done some folio conversion work. - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZJejewAKCRDdBJ7gKXxA joggAPwKMfT9lvDBEUnJagY7dbDPky1cSYZdJKxxM2cApGa42gEA6Cl8HRAWqSOh J0qXCzqaaN8+BuEyLGDVPaXur9KirwY= =B7yQ -----END PGP SIGNATURE----- Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
This commit is contained in:
commit
6e17c6de3d
@ -297,7 +297,7 @@ Lock order is as follows::
|
||||
|
||||
Page lock (PG_locked bit of page->flags)
|
||||
mm->page_table_lock or split pte_lock
|
||||
lock_page_memcg (memcg->move_lock)
|
||||
folio_memcg_lock (memcg->move_lock)
|
||||
mapping->i_pages lock
|
||||
lruvec->lru_lock.
|
||||
|
||||
|
@ -1580,6 +1580,13 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
Healthy workloads are not expected to reach this limit.
|
||||
|
||||
memory.swap.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
|
||||
The max swap usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
|
||||
memory.swap.max
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is "max".
|
||||
|
@ -119,9 +119,9 @@ set size has chronologically changed.::
|
||||
Data Access Pattern Aware Memory Management
|
||||
===========================================
|
||||
|
||||
Below three commands make every memory region of size >=4K that doesn't
|
||||
accessed for >=60 seconds in your workload to be swapped out. ::
|
||||
Below command makes every memory region of size >=4K that has not accessed for
|
||||
>=60 seconds in your workload to be swapped out. ::
|
||||
|
||||
$ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme
|
||||
$ echo "4K max 0 0 60s max pageout" >> test_scheme
|
||||
$ damo schemes -c test_scheme <pid of your workload>
|
||||
$ sudo damo schemes --damos_access_rate 0 0 --damos_sz_region 4K max \
|
||||
--damos_age 60s max --damos_action pageout \
|
||||
<pid of your workload>
|
||||
|
@ -10,9 +10,8 @@ DAMON provides below interfaces for different users.
|
||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||
system administrators who want a just-working human-friendly interface.
|
||||
Using this, users can use the DAMON’s major features in a human-friendly way.
|
||||
It may not be highly tuned for special cases, though. It supports both
|
||||
virtual and physical address spaces monitoring. For more detail, please
|
||||
refer to its `usage document
|
||||
It may not be highly tuned for special cases, though. For more detail,
|
||||
please refer to its `usage document
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||
- *sysfs interface.*
|
||||
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||
@ -20,11 +19,7 @@ DAMON provides below interfaces for different users.
|
||||
features by reading from and writing to special sysfs files. Therefore,
|
||||
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||
supports both virtual and physical address spaces monitoring. Note that this
|
||||
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||
monitoring results. For detailed monitoring results, DAMON provides a
|
||||
:ref:`tracepoint <tracepoint>`.
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs.
|
||||
- *debugfs interface. (DEPRECATED!)*
|
||||
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||
<sysfs_interface>`. This is deprecated, so users should move to the
|
||||
@ -139,7 +134,7 @@ scheme of the kdamond. Writing ``clear_schemes_tried_regions`` to ``state``
|
||||
file clears the DAMON-based operating scheme action tried regions directory for
|
||||
each DAMON-based operation scheme of the kdamond. For details of the
|
||||
DAMON-based operation scheme action tried regions directory, please refer to
|
||||
:ref:tried_regions section <sysfs_schemes_tried_regions>`.
|
||||
:ref:`tried_regions section <sysfs_schemes_tried_regions>`.
|
||||
|
||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||
|
||||
@ -259,12 +254,9 @@ be equal or smaller than ``start`` of directory ``N+1``.
|
||||
contexts/<N>/schemes/
|
||||
---------------------
|
||||
|
||||
For usual DAMON-based data access aware memory management optimizations, users
|
||||
would normally want the system to apply a memory management action to a memory
|
||||
region of a specific access pattern. DAMON receives such formalized operation
|
||||
schemes from the user and applies those to the target memory regions. Users
|
||||
can get and set the schemes by reading from and writing to files under this
|
||||
directory.
|
||||
The directory for DAMON-based Operation Schemes (:ref:`DAMOS
|
||||
<damon_design_damos>`). Users can get and set the schemes by reading from and
|
||||
writing to files under this directory.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
@ -277,12 +269,12 @@ In each scheme directory, five directories (``access_pattern``, ``quotas``,
|
||||
``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and one file
|
||||
(``action``) exist.
|
||||
|
||||
The ``action`` file is for setting and getting what action you want to apply to
|
||||
memory regions having specific access pattern of the interest. The keywords
|
||||
that can be written to and read from the file and their meaning are as below.
|
||||
The ``action`` file is for setting and getting the scheme's :ref:`action
|
||||
<damon_design_damos_action>`. The keywords that can be written to and read
|
||||
from the file and their meaning are as below.
|
||||
|
||||
Note that support of each action depends on the running DAMON operations set
|
||||
`implementation <sysfs_contexts>`.
|
||||
:ref:`implementation <sysfs_contexts>`.
|
||||
|
||||
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
|
||||
Supported by ``vaddr`` and ``fvaddr`` operations set.
|
||||
@ -304,32 +296,21 @@ Note that support of each action depends on the running DAMON operations set
|
||||
schemes/<N>/access_pattern/
|
||||
---------------------------
|
||||
|
||||
The target access pattern of each DAMON-based operation scheme is constructed
|
||||
with three ranges including the size of the region in bytes, number of
|
||||
monitored accesses per aggregate interval, and number of aggregated intervals
|
||||
for the age of the region.
|
||||
The directory for the target access :ref:`pattern
|
||||
<damon_design_damos_access_pattern>` of the given DAMON-based operation scheme.
|
||||
|
||||
Under the ``access_pattern`` directory, three directories (``sz``,
|
||||
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
|
||||
exist. You can set and get the access pattern for the given scheme by writing
|
||||
to and reading from the ``min`` and ``max`` files under ``sz``,
|
||||
``nr_accesses``, and ``age`` directories, respectively.
|
||||
``nr_accesses``, and ``age`` directories, respectively. Note that the ``min``
|
||||
and the ``max`` form a closed interval.
|
||||
|
||||
schemes/<N>/quotas/
|
||||
-------------------
|
||||
|
||||
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||
size quota for each scheme. In detail, users can ask DAMON to try to use only
|
||||
up to specific time (``time quota``) for applying the action, and to apply the
|
||||
action to only up to specific amount (``size quota``) of memory regions having
|
||||
the target access pattern within a given time interval (``reset interval``).
|
||||
|
||||
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||
regions of the ``target access pattern`` based on their size, access frequency,
|
||||
and age. For personalized prioritization, users can set the weights for the
|
||||
three properties.
|
||||
The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
|
||||
DAMON-based operation scheme.
|
||||
|
||||
Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||
``reset_interval_ms``) and one directory (``weights``) having three files
|
||||
@ -337,23 +318,26 @@ Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||
|
||||
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||
``reset interval`` in milliseconds by writing the values to the three files,
|
||||
respectively. You can also set the prioritization weights for size, access
|
||||
frequency, and age in per-thousand unit by writing the values to the three
|
||||
files under the ``weights`` directory.
|
||||
respectively. Then, DAMON tries to use only up to ``time quota`` milliseconds
|
||||
for applying the ``action`` to memory regions of the ``access_pattern``, and to
|
||||
apply the action to only up to ``bytes`` bytes of memory regions within the
|
||||
``reset_interval_ms``. Setting both ``ms`` and ``bytes`` zero disables the
|
||||
quota limits.
|
||||
|
||||
You can also set the :ref:`prioritization weights
|
||||
<damon_design_damos_quotas_prioritization>` for size, access frequency, and age
|
||||
in per-thousand unit by writing the values to the three files under the
|
||||
``weights`` directory.
|
||||
|
||||
schemes/<N>/watermarks/
|
||||
-----------------------
|
||||
|
||||
To allow easy activation and deactivation of each scheme based on system
|
||||
status, DAMON provides a feature called watermarks. The feature receives five
|
||||
values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
|
||||
``metric`` is the system metric such as free memory ratio that can be measured.
|
||||
If the metric value of the system is higher than the value in ``high`` or lower
|
||||
than ``low`` at the memoent, the scheme is deactivated. If the value is lower
|
||||
than ``mid``, the scheme is activated.
|
||||
The directory for the :ref:`watermarks <damon_design_damos_watermarks>` of the
|
||||
given DAMON-based operation scheme.
|
||||
|
||||
Under the watermarks directory, five files (``metric``, ``interval_us``,
|
||||
``high``, ``mid``, and ``low``) for setting each value exist. You can set and
|
||||
``high``, ``mid``, and ``low``) for setting the metric, the time interval
|
||||
between check of the metric, and the three watermarks exist. You can set and
|
||||
get the five values by writing to the files, respectively.
|
||||
|
||||
Keywords and meanings of those that can be written to the ``metric`` file are
|
||||
@ -367,12 +351,8 @@ The ``interval`` should written in microseconds unit.
|
||||
schemes/<N>/filters/
|
||||
--------------------
|
||||
|
||||
Users could know something more than the kernel for specific types of memory.
|
||||
In the case, users could do their own management for the memory and hence
|
||||
doesn't want DAMOS bothers that. Users could limit DAMOS by setting the access
|
||||
pattern of the scheme and/or the monitoring regions for the purpose, but that
|
||||
can be inefficient in some cases. In such cases, users could set non-access
|
||||
pattern driven filters using files in this directory.
|
||||
The directory for the :ref:`filters <damon_design_damos_filters>` of the given
|
||||
DAMON-based operation scheme.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_filters``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
@ -432,13 +412,17 @@ starting from ``0`` under this directory. Each directory contains files
|
||||
exposing detailed information about each of the memory region that the
|
||||
corresponding scheme's ``action`` has tried to be applied under this directory,
|
||||
during next :ref:`aggregation interval <sysfs_monitoring_attrs>`. The
|
||||
information includes address range, ``nr_accesses``, , and ``age`` of the
|
||||
region.
|
||||
information includes address range, ``nr_accesses``, and ``age`` of the region.
|
||||
|
||||
The directories will be removed when another special keyword,
|
||||
``clear_schemes_tried_regions``, is written to the relevant
|
||||
``kdamonds/<N>/state`` file.
|
||||
|
||||
The expected usage of this directory is investigations of schemes' behaviors,
|
||||
and query-like efficient data access monitoring results retrievals. For the
|
||||
latter use case, in particular, users can set the ``action`` as ``stat`` and
|
||||
set the ``access pattern`` as their interested pattern that they want to query.
|
||||
|
||||
tried_regions/<N>/
|
||||
------------------
|
||||
|
||||
@ -600,15 +584,10 @@ update.
|
||||
Schemes
|
||||
-------
|
||||
|
||||
For usual DAMON-based data access aware memory management optimizations, users
|
||||
would simply want the system to apply a memory management action to a memory
|
||||
region of a specific access pattern. DAMON receives such formalized operation
|
||||
schemes from the user and applies those to the target processes.
|
||||
|
||||
Users can get and set the schemes by reading from and writing to ``schemes``
|
||||
debugfs file. Reading the file also shows the statistics of each scheme. To
|
||||
the file, each of the schemes should be represented in each line in below
|
||||
form::
|
||||
Users can get and set the DAMON-based operation :ref:`schemes
|
||||
<damon_design_damos>` by reading from and writing to ``schemes`` debugfs file.
|
||||
Reading the file also shows the statistics of each scheme. To the file, each
|
||||
of the schemes should be represented in each line in below form::
|
||||
|
||||
<target access pattern> <action> <quota> <watermarks>
|
||||
|
||||
@ -617,8 +596,9 @@ You can disable schemes by simply writing an empty string to the file.
|
||||
Target Access Pattern
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The ``<target access pattern>`` is constructed with three ranges in below
|
||||
form::
|
||||
The target access :ref:`pattern <damon_design_damos_access_pattern>` of the
|
||||
scheme. The ``<target access pattern>`` is constructed with three ranges in
|
||||
below form::
|
||||
|
||||
min-size max-size min-acc max-acc min-age max-age
|
||||
|
||||
@ -631,9 +611,9 @@ closed interval.
|
||||
Action
|
||||
~~~~~~
|
||||
|
||||
The ``<action>`` is a predefined integer for memory management actions, which
|
||||
DAMON will apply to the regions having the target access pattern. The
|
||||
supported numbers and their meanings are as below.
|
||||
The ``<action>`` is a predefined integer for memory management :ref:`actions
|
||||
<damon_design_damos_action>`. The supported numbers and their meanings are as
|
||||
below.
|
||||
|
||||
- 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``. Ignored if
|
||||
``target`` is ``paddr``.
|
||||
@ -649,10 +629,8 @@ supported numbers and their meanings are as below.
|
||||
Quota
|
||||
~~~~~
|
||||
|
||||
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||
size quota for the scheme via the ``<quota>`` in below form::
|
||||
Users can set the :ref:`quotas <damon_design_damos_quotas>` of the given scheme
|
||||
via the ``<quota>`` in below form::
|
||||
|
||||
<ms> <sz> <reset interval> <priority weights>
|
||||
|
||||
@ -662,19 +640,17 @@ the action to memory regions of the ``target access pattern`` within the
|
||||
``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
|
||||
``<ms>`` and ``<sz>`` zero disables the quota limits.
|
||||
|
||||
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||
regions of the ``target access pattern`` based on their size, access frequency,
|
||||
and age. For personalized prioritization, users can set the weights for the
|
||||
three properties in ``<priority weights>`` in below form::
|
||||
For the :ref:`prioritization <damon_design_damos_quotas_prioritization>`, users
|
||||
can set the weights for the three properties in ``<priority weights>`` in below
|
||||
form::
|
||||
|
||||
<size weight> <access frequency weight> <age weight>
|
||||
|
||||
Watermarks
|
||||
~~~~~~~~~~
|
||||
|
||||
Some schemes would need to run based on current value of the system's specific
|
||||
metrics like free memory ratio. For such cases, users can specify watermarks
|
||||
for the condition.::
|
||||
Users can specify :ref:`watermarks <damon_design_damos_watermarks>` of the
|
||||
given scheme via ``<watermarks>`` in below form::
|
||||
|
||||
<metric> <check interval> <high mark> <middle mark> <low mark>
|
||||
|
||||
@ -797,10 +773,12 @@ root directory only.
|
||||
Tracepoint for Monitoring Results
|
||||
=================================
|
||||
|
||||
DAMON provides the monitoring results via a tracepoint,
|
||||
``damon:damon_aggregated``. While the monitoring is turned on, you could
|
||||
record the tracepoint events and show results using tracepoint supporting tools
|
||||
like ``perf``. For example::
|
||||
Users can get the monitoring results via the :ref:`tried_regions
|
||||
<sysfs_schemes_tried_regions>` or a tracepoint, ``damon:damon_aggregated``.
|
||||
While the tried regions directory is useful for getting a snapshot, the
|
||||
tracepoint is useful for getting a full record of the results. While the
|
||||
monitoring is turned on, you could record the tracepoint events and show
|
||||
results using tracepoint supporting tools like ``perf``. For example::
|
||||
|
||||
# echo on > monitor_on
|
||||
# perf record -e damon:damon_aggregated &
|
||||
|
@ -107,9 +107,12 @@ effectively disables ``panic_on_warn`` for KASAN reports.
|
||||
Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
|
||||
parameter can be used to control panic and reporting behaviour:
|
||||
|
||||
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
|
||||
report or also panic the kernel (default: ``report``). The panic happens even
|
||||
if ``kasan_multi_shot`` is enabled.
|
||||
- ``kasan.fault=report``, ``=panic``, or ``=panic_on_write`` controls whether
|
||||
to only print a KASAN report, panic the kernel, or panic the kernel on
|
||||
invalid writes only (default: ``report``). The panic happens even if
|
||||
``kasan_multi_shot`` is enabled. Note that when using asynchronous mode of
|
||||
Hardware Tag-Based KASAN, ``kasan.fault=panic_on_write`` always panics on
|
||||
asynchronously checked accesses (including reads).
|
||||
|
||||
Software and Hardware Tag-Based KASAN modes (see the section about various
|
||||
modes below) support altering stack trace collection behavior:
|
||||
|
@ -36,6 +36,7 @@ Running the selftests (hotplug tests are run in limited mode)
|
||||
|
||||
To build the tests::
|
||||
|
||||
$ make headers
|
||||
$ make -C tools/testing/selftests
|
||||
|
||||
To run the tests::
|
||||
|
@ -4,31 +4,55 @@
|
||||
Design
|
||||
======
|
||||
|
||||
Configurable Layers
|
||||
===================
|
||||
|
||||
DAMON provides data access monitoring functionality while making the accuracy
|
||||
and the overhead controllable. The fundamental access monitorings require
|
||||
primitives that dependent on and optimized for the target address space. On
|
||||
the other hand, the accuracy and overhead tradeoff mechanism, which is the core
|
||||
of DAMON, is in the pure logic space. DAMON separates the two parts in
|
||||
different layers and defines its interface to allow various low level
|
||||
primitives implementations configurable with the core logic. We call the low
|
||||
level primitives implementations monitoring operations.
|
||||
Overall Architecture
|
||||
====================
|
||||
|
||||
Due to this separated design and the configurable interface, users can extend
|
||||
DAMON for any address space by configuring the core logics with appropriate
|
||||
monitoring operations. If appropriate one is not provided, users can implement
|
||||
the operations on their own.
|
||||
DAMON subsystem is configured with three layers including
|
||||
|
||||
- Operations Set: Implements fundamental operations for DAMON that depends on
|
||||
the given monitoring target address-space and available set of
|
||||
software/hardware primitives,
|
||||
- Core: Implements core logics including monitoring overhead/accurach control
|
||||
and access-aware system operations on top of the operations set layer, and
|
||||
- Modules: Implements kernel modules for various purposes that provides
|
||||
interfaces for the user space, on top of the core layer.
|
||||
|
||||
|
||||
Configurable Operations Set
|
||||
---------------------------
|
||||
|
||||
For data access monitoring and additional low level work, DAMON needs a set of
|
||||
implementations for specific operations that are dependent on and optimized for
|
||||
the given target address space. On the other hand, the accuracy and overhead
|
||||
tradeoff mechanism, which is the core logic of DAMON, is in the pure logic
|
||||
space. DAMON separates the two parts in different layers, namely DAMON
|
||||
Operations Set and DAMON Core Logics Layers, respectively. It further defines
|
||||
the interface between the layers to allow various operations sets to be
|
||||
configured with the core logic.
|
||||
|
||||
Due to this design, users can extend DAMON for any address space by configuring
|
||||
the core logic to use the appropriate operations set. If any appropriate set
|
||||
is unavailable, users can implement one on their own.
|
||||
|
||||
For example, physical memory, virtual memory, swap space, those for specific
|
||||
processes, NUMA nodes, files, and backing memory devices would be supportable.
|
||||
Also, if some architectures or devices support special optimized access check
|
||||
primitives, those will be easily configurable.
|
||||
Also, if some architectures or devices supporting special optimized access
|
||||
check primitives, those will be easily configurable.
|
||||
|
||||
|
||||
Reference Implementations of Address Space Specific Monitoring Operations
|
||||
=========================================================================
|
||||
Programmable Modules
|
||||
--------------------
|
||||
|
||||
Core layer of DAMON is implemented as a framework, and exposes its application
|
||||
programming interface to all kernel space components such as subsystems and
|
||||
modules. For common use cases of DAMON, DAMON subsystem provides kernel
|
||||
modules that built on top of the core layer using the API, which can be easily
|
||||
used by the user space end users.
|
||||
|
||||
|
||||
Operations Set Layer
|
||||
====================
|
||||
|
||||
The monitoring operations are defined in two parts:
|
||||
|
||||
@ -90,8 +114,12 @@ conflict with the reclaim logic using ``PG_idle`` and ``PG_young`` page flags,
|
||||
as Idle page tracking does.
|
||||
|
||||
|
||||
Address Space Independent Core Mechanisms
|
||||
=========================================
|
||||
Core Logics
|
||||
===========
|
||||
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
Below four sections describe each of the DAMON core mechanisms and the five
|
||||
monitoring attributes, ``sampling interval``, ``aggregation interval``,
|
||||
@ -100,7 +128,7 @@ regions``.
|
||||
|
||||
|
||||
Access Frequency Monitoring
|
||||
---------------------------
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The output of DAMON says what pages are how frequently accessed for a given
|
||||
duration. The resolution of the access frequency is controlled by setting
|
||||
@ -127,7 +155,7 @@ size of the target workload grows.
|
||||
|
||||
|
||||
Region Based Sampling
|
||||
---------------------
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To avoid the unbounded increase of the overhead, DAMON groups adjacent pages
|
||||
that assumed to have the same access frequencies into a region. As long as the
|
||||
@ -144,7 +172,7 @@ assumption is not guaranteed.
|
||||
|
||||
|
||||
Adaptive Regions Adjustment
|
||||
---------------------------
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Even somehow the initial monitoring target regions are well constructed to
|
||||
fulfill the assumption (pages in same region have similar access frequencies),
|
||||
@ -162,8 +190,22 @@ In this way, DAMON provides its best-effort quality and minimal overhead while
|
||||
keeping the bounds users set for their trade-off.
|
||||
|
||||
|
||||
Age Tracking
|
||||
~~~~~~~~~~~~
|
||||
|
||||
By analyzing the monitoring results, users can also find how long the current
|
||||
access pattern of a region has maintained. That could be used for good
|
||||
understanding of the access pattern. For example, page placement algorithm
|
||||
utilizing both the frequency and the recency could be implemented using that.
|
||||
To make such access pattern maintained period analysis easier, DAMON maintains
|
||||
yet another counter called ``age`` in each region. For each ``aggregation
|
||||
interval``, DAMON checks if the region's size and access frequency
|
||||
(``nr_accesses``) has significantly changed. If so, the counter is reset to
|
||||
zero. Otherwise, the counter is increased.
|
||||
|
||||
|
||||
Dynamic Target Space Updates Handling
|
||||
-------------------------------------
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The monitoring target address range could dynamically changed. For example,
|
||||
virtual memory could be dynamically mapped and unmapped. Physical memory could
|
||||
@ -174,3 +216,246 @@ monitoring operations to check dynamic changes including memory mapping changes
|
||||
and applies it to monitoring operations-related data structures such as the
|
||||
abstracted monitoring target memory area only for each of a user-specified time
|
||||
interval (``update interval``).
|
||||
|
||||
|
||||
.. _damon_design_damos:
|
||||
|
||||
Operation Schemes
|
||||
-----------------
|
||||
|
||||
One common purpose of data access monitoring is access-aware system efficiency
|
||||
optimizations. For example,
|
||||
|
||||
paging out memory regions that are not accessed for more than two minutes
|
||||
|
||||
or
|
||||
|
||||
using THP for memory regions that are larger than 2 MiB and showing a high
|
||||
access frequency for more than one minute.
|
||||
|
||||
One straightforward approach for such schemes would be profile-guided
|
||||
optimizations. That is, getting data access monitoring results of the
|
||||
workloads or the system using DAMON, finding memory regions of special
|
||||
characteristics by profiling the monitoring results, and making system
|
||||
operation changes for the regions. The changes could be made by modifying or
|
||||
providing advice to the software (the application and/or the kernel), or
|
||||
reconfiguring the hardware. Both offline and online approaches could be
|
||||
available.
|
||||
|
||||
Among those, providing advice to the kernel at runtime would be flexible and
|
||||
effective, and therefore widely be used. However, implementing such schemes
|
||||
could impose unnecessary redundancy and inefficiency. The profiling could be
|
||||
redundant if the type of interest is common. Exchanging the information
|
||||
including monitoring results and operation advice between kernel and user
|
||||
spaces could be inefficient.
|
||||
|
||||
To allow users to reduce such redundancy and inefficiencies by offloading the
|
||||
works, DAMON provides a feature called Data Access Monitoring-based Operation
|
||||
Schemes (DAMOS). It lets users specify their desired schemes at a high
|
||||
level. For such specifications, DAMON starts monitoring, finds regions having
|
||||
the access pattern of interest, and applies the user-desired operation actions
|
||||
to the regions as soon as found.
|
||||
|
||||
|
||||
.. _damon_design_damos_action:
|
||||
|
||||
Operation Action
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The management action that the users desire to apply to the regions of their
|
||||
interest. For example, paging out, prioritizing for next reclamation victim
|
||||
selection, advising ``khugepaged`` to collapse or split, or doing nothing but
|
||||
collecting statistics of the regions.
|
||||
|
||||
The list of supported actions is defined in DAMOS, but the implementation of
|
||||
each action is in the DAMON operations set layer because the implementation
|
||||
normally depends on the monitoring target address space. For example, the code
|
||||
for paging specific virtual address ranges out would be different from that for
|
||||
physical address ranges. And the monitoring operations implementation sets are
|
||||
not mandated to support all actions of the list. Hence, the availability of
|
||||
specific DAMOS action depends on what operations set is selected to be used
|
||||
together.
|
||||
|
||||
Applying an action to a region is considered as changing the region's
|
||||
characteristics. Hence, DAMOS resets the age of regions when an action is
|
||||
applied to those.
|
||||
|
||||
|
||||
.. _damon_design_damos_access_pattern:
|
||||
|
||||
Target Access Pattern
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The access pattern of the schemes' interest. The patterns are constructed with
|
||||
the properties that DAMON's monitoring results provide, specifically the size,
|
||||
the access frequency, and the age. Users can describe their access pattern of
|
||||
interest by setting minimum and maximum values of the three properties. If a
|
||||
region's three properties are in the ranges, DAMOS classifies it as one of the
|
||||
regions that the scheme is having an interest in.
|
||||
|
||||
|
||||
.. _damon_design_damos_quotas:
|
||||
|
||||
Quotas
|
||||
~~~~~~
|
||||
|
||||
DAMOS upper-bound overhead control feature. DAMOS could incur high overhead if
|
||||
the target access pattern is not properly tuned. For example, if a huge memory
|
||||
region having the access pattern of interest is found, applying the scheme's
|
||||
action to all pages of the huge region could consume unacceptably large system
|
||||
resources. Preventing such issues by tuning the access pattern could be
|
||||
challenging, especially if the access patterns of the workloads are highly
|
||||
dynamic.
|
||||
|
||||
To mitigate that situation, DAMOS provides an upper-bound overhead control
|
||||
feature called quotas. It lets users specify an upper limit of time that DAMOS
|
||||
can use for applying the action, and/or a maximum bytes of memory regions that
|
||||
the action can be applied within a user-specified time duration.
|
||||
|
||||
|
||||
.. _damon_design_damos_quotas_prioritization:
|
||||
|
||||
Prioritization
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
A mechanism for making a good decision under the quotas. When the action
|
||||
cannot be applied to all regions of interest due to the quotas, DAMOS
|
||||
prioritizes regions and applies the action to only regions having high enough
|
||||
priorities so that it will not exceed the quotas.
|
||||
|
||||
The prioritization mechanism should be different for each action. For example,
|
||||
rarely accessed (colder) memory regions would be prioritized for page-out
|
||||
scheme action. In contrast, the colder regions would be deprioritized for huge
|
||||
page collapse scheme action. Hence, the prioritization mechanisms for each
|
||||
action are implemented in each DAMON operations set, together with the actions.
|
||||
|
||||
Though the implementation is up to the DAMON operations set, it would be common
|
||||
to calculate the priority using the access pattern properties of the regions.
|
||||
Some users would want the mechanisms to be personalized for their specific
|
||||
case. For example, some users would want the mechanism to weigh the recency
|
||||
(``age``) more than the access frequency (``nr_accesses``). DAMOS allows users
|
||||
to specify the weight of each access pattern property and passes the
|
||||
information to the underlying mechanism. Nevertheless, how and even whether
|
||||
the weight will be respected are up to the underlying prioritization mechanism
|
||||
implementation.
|
||||
|
||||
|
||||
.. _damon_design_damos_watermarks:
|
||||
|
||||
Watermarks
|
||||
~~~~~~~~~~
|
||||
|
||||
Conditional DAMOS (de)activation automation. Users might want DAMOS to run
|
||||
only under certain situations. For example, when a sufficient amount of free
|
||||
memory is guaranteed, running a scheme for proactive reclamation would only
|
||||
consume unnecessary system resources. To avoid such consumption, the user would
|
||||
need to manually monitor some metrics such as free memory ratio, and turn
|
||||
DAMON/DAMOS on or off.
|
||||
|
||||
DAMOS allows users to offload such works using three watermarks. It allows the
|
||||
users to configure the metric of their interest, and three watermark values,
|
||||
namely high, middle, and low. If the value of the metric becomes above the
|
||||
high watermark or below the low watermark, the scheme is deactivated. If the
|
||||
metric becomes below the mid watermark but above the low watermark, the scheme
|
||||
is activated. If all schemes are deactivated by the watermarks, the monitoring
|
||||
is also deactivated. In this case, the DAMON worker thread only periodically
|
||||
checks the watermarks and therefore incurs nearly zero overhead.
|
||||
|
||||
|
||||
.. _damon_design_damos_filters:
|
||||
|
||||
Filters
|
||||
~~~~~~~
|
||||
|
||||
Non-access pattern-based target memory regions filtering. If users run
|
||||
self-written programs or have good profiling tools, they could know something
|
||||
more than the kernel, such as future access patterns or some special
|
||||
requirements for specific types of memory. For example, some users may know
|
||||
only anonymous pages can impact their program's performance. They can also
|
||||
have a list of latency-critical processes.
|
||||
|
||||
To let users optimize DAMOS schemes with such special knowledge, DAMOS provides
|
||||
a feature called DAMOS filters. The feature allows users to set an arbitrary
|
||||
number of filters for each scheme. Each filter specifies the type of target
|
||||
memory, and whether it should exclude the memory of the type (filter-out), or
|
||||
all except the memory of the type (filter-in).
|
||||
|
||||
As of this writing, anonymous page type and memory cgroup type are supported by
|
||||
the feature. Some filter target types can require additional arguments. For
|
||||
example, the memory cgroup filter type asks users to specify the file path of
|
||||
the memory cgroup for the filter. Hence, users can apply specific schemes to
|
||||
only anonymous pages, non-anonymous pages, pages of specific cgroups, all pages
|
||||
excluding those of specific cgroups, and any combination of those.
|
||||
|
||||
|
||||
Application Programming Interface
|
||||
---------------------------------
|
||||
|
||||
The programming interface for kernel space data access-aware applications.
|
||||
DAMON is a framework, so it does nothing by itself. Instead, it only helps
|
||||
other kernel components such as subsystems and modules building their data
|
||||
access-aware applications using DAMON's core features. For this, DAMON exposes
|
||||
its all features to other kernel components via its application programming
|
||||
interface, namely ``include/linux/damon.h``. Please refer to the API
|
||||
:doc:`document </mm/damon/api>` for details of the interface.
|
||||
|
||||
|
||||
Modules
|
||||
=======
|
||||
|
||||
Because the core of DAMON is a framework for kernel components, it doesn't
|
||||
provide any direct interface for the user space. Such interfaces should be
|
||||
implemented by each DAMON API user kernel components, instead. DAMON subsystem
|
||||
itself implements such DAMON API user modules, which are supposed to be used
|
||||
for general purpose DAMON control and special purpose data access-aware system
|
||||
operations, and provides stable application binary interfaces (ABI) for the
|
||||
user space. The user space can build their efficient data access-aware
|
||||
applications using the interfaces.
|
||||
|
||||
|
||||
General Purpose User Interface Modules
|
||||
--------------------------------------
|
||||
|
||||
DAMON modules that provide user space ABIs for general purpose DAMON usage in
|
||||
runtime.
|
||||
|
||||
DAMON user interface modules, namely 'DAMON sysfs interface' and 'DAMON debugfs
|
||||
interface' are DAMON API user kernel modules that provide ABIs to the
|
||||
user-space. Please note that DAMON debugfs interface is currently deprecated.
|
||||
|
||||
Like many other ABIs, the modules create files on sysfs and debugfs, allow
|
||||
users to specify their requests to and get the answers from DAMON by writing to
|
||||
and reading from the files. As a response to such I/O, DAMON user interface
|
||||
modules control DAMON and retrieve the results as user requested via the DAMON
|
||||
API, and return the results to the user-space.
|
||||
|
||||
The ABIs are designed to be used for user space applications development,
|
||||
rather than human beings' fingers. Human users are recommended to use such
|
||||
user space tools. One such Python-written user space tool is available at
|
||||
Github (https://github.com/awslabs/damo), Pypi
|
||||
(https://pypistats.org/packages/damo), and Fedora
|
||||
(https://packages.fedoraproject.org/pkgs/python-damo/damo/).
|
||||
|
||||
Please refer to the ABI :doc:`document </admin-guide/mm/damon/usage>` for
|
||||
details of the interfaces.
|
||||
|
||||
|
||||
Special-Purpose Access-aware Kernel Modules
|
||||
-------------------------------------------
|
||||
|
||||
DAMON modules that provide user space ABI for specific purpose DAMON usage.
|
||||
|
||||
DAMON sysfs/debugfs user interfaces are for full control of all DAMON features
|
||||
in runtime. For each special-purpose system-wide data access-aware system
|
||||
operations such as proactive reclamation or LRU lists balancing, the interfaces
|
||||
could be simplified by removing unnecessary knobs for the specific purpose, and
|
||||
extended for boot-time and even compile time control. Default values of DAMON
|
||||
control parameters for the usage would also need to be optimized for the
|
||||
purpose.
|
||||
|
||||
To support such cases, yet more DAMON API user kernel modules that provide more
|
||||
simple and optimized user space interfaces are available. Currently, two
|
||||
modules for proactive reclamation and LRU lists manipulation are provided. For
|
||||
more detail, please read the usage documents for those
|
||||
(:doc:`/admin-guide/mm/damon/reclaim` and
|
||||
:doc:`/admin-guide/mm/damon/lru_sort`).
|
||||
|
@ -4,29 +4,6 @@
|
||||
Frequently Asked Questions
|
||||
==========================
|
||||
|
||||
Why a new subsystem, instead of extending perf or other user space tools?
|
||||
=========================================================================
|
||||
|
||||
First, because it needs to be lightweight as much as possible so that it can be
|
||||
used online, any unnecessary overhead such as kernel - user space context
|
||||
switching cost should be avoided. Second, DAMON aims to be used by other
|
||||
programs including the kernel. Therefore, having a dependency on specific
|
||||
tools like perf is not desirable. These are the two biggest reasons why DAMON
|
||||
is implemented in the kernel space.
|
||||
|
||||
|
||||
Can 'idle pages tracking' or 'perf mem' substitute DAMON?
|
||||
=========================================================
|
||||
|
||||
Idle page tracking is a low level primitive for access check of the physical
|
||||
address space. 'perf mem' is similar, though it can use sampling to minimize
|
||||
the overhead. On the other hand, DAMON is a higher-level framework for the
|
||||
monitoring of various address spaces. It is focused on memory management
|
||||
optimization and provides sophisticated accuracy/overhead handling mechanisms.
|
||||
Therefore, 'idle pages tracking' and 'perf mem' could provide a subset of
|
||||
DAMON's output, but cannot substitute DAMON.
|
||||
|
||||
|
||||
Does DAMON support virtual memory only?
|
||||
=======================================
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
DAMON Maintainer Entry Profile
|
||||
==============================
|
||||
|
||||
The DAMON subsystem covers the files that listed in 'DATA ACCESS MONITOR'
|
||||
The DAMON subsystem covers the files that are listed in 'DATA ACCESS MONITOR'
|
||||
section of 'MAINTAINERS' file.
|
||||
|
||||
The mailing lists for the subsystem are damon@lists.linux.dev and
|
||||
@ -15,7 +15,7 @@ SCM Trees
|
||||
|
||||
There are multiple Linux trees for DAMON development. Patches under
|
||||
development or testing are queued in damon/next [2]_ by the DAMON maintainer.
|
||||
Suffieicntly reviewed patches will be queued in mm-unstable [1]_ by the memory
|
||||
Sufficiently reviewed patches will be queued in mm-unstable [1]_ by the memory
|
||||
management subsystem maintainer. After more sufficient tests, the patches will
|
||||
be queued in mm-stable [3]_ , and finally pull-requested to the mainline by the
|
||||
memory management subsystem maintainer.
|
||||
|
@ -73,14 +73,13 @@ In kernel use of migrate_pages()
|
||||
It also prevents the swapper or other scans from encountering
|
||||
the page.
|
||||
|
||||
2. We need to have a function of type new_page_t that can be
|
||||
2. We need to have a function of type new_folio_t that can be
|
||||
passed to migrate_pages(). This function should figure out
|
||||
how to allocate the correct new page given the old page.
|
||||
how to allocate the correct new folio given the old folio.
|
||||
|
||||
3. The migrate_pages() function is called which attempts
|
||||
to do the migration. It will call the function to allocate
|
||||
the new page for each page that is considered for
|
||||
moving.
|
||||
the new folio for each folio that is considered for moving.
|
||||
|
||||
How migrate_pages() works
|
||||
=========================
|
||||
|
@ -14,15 +14,20 @@ tables. Access to higher level tables protected by mm->page_table_lock.
|
||||
There are helpers to lock/unlock a table and other accessor functions:
|
||||
|
||||
- pte_offset_map_lock()
|
||||
maps pte and takes PTE table lock, returns pointer to the taken
|
||||
lock;
|
||||
maps PTE and takes PTE table lock, returns pointer to PTE with
|
||||
pointer to its PTE table lock, or returns NULL if no PTE table;
|
||||
- pte_offset_map_nolock()
|
||||
maps PTE, returns pointer to PTE with pointer to its PTE table
|
||||
lock (not taken), or returns NULL if no PTE table;
|
||||
- pte_offset_map()
|
||||
maps PTE, returns pointer to PTE, or returns NULL if no PTE table;
|
||||
- pte_unmap()
|
||||
unmaps PTE table;
|
||||
- pte_unmap_unlock()
|
||||
unlocks and unmaps PTE table;
|
||||
- pte_alloc_map_lock()
|
||||
allocates PTE table if needed and take the lock, returns pointer
|
||||
to taken lock or NULL if allocation failed;
|
||||
- pte_lockptr()
|
||||
returns pointer to PTE table lock;
|
||||
allocates PTE table if needed and takes its lock, returns pointer to
|
||||
PTE with pointer to its lock, or returns NULL if allocation failed;
|
||||
- pmd_lock()
|
||||
takes PMD table lock, returns pointer to taken lock;
|
||||
- pmd_lockptr()
|
||||
|
@ -55,7 +55,7 @@ mbind()设置一个新的内存策略。一个进程的页面也可以通过sys_
|
||||
消失。它还可以防止交换器或其他扫描器遇到该页。
|
||||
|
||||
|
||||
2. 我们需要有一个new_page_t类型的函数,可以传递给migrate_pages()。这个函数应该计算
|
||||
2. 我们需要有一个new_folio_t类型的函数,可以传递给migrate_pages()。这个函数应该计算
|
||||
出如何在给定的旧页面中分配正确的新页面。
|
||||
|
||||
3. migrate_pages()函数被调用,它试图进行迁移。它将调用该函数为每个被考虑迁移的页面分
|
||||
|
@ -4487,6 +4487,13 @@ S: Supported
|
||||
F: Documentation/filesystems/caching/cachefiles.rst
|
||||
F: fs/cachefiles/
|
||||
|
||||
CACHESTAT: PAGE CACHE STATS FOR A FILE
|
||||
M: Nhat Pham <nphamcs@gmail.com>
|
||||
M: Johannes Weiner <hannes@cmpxchg.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: tools/testing/selftests/cachestat/test_cachestat.c
|
||||
|
||||
CADENCE MIPI-CSI2 BRIDGES
|
||||
M: Maxime Ripard <mripard@kernel.org>
|
||||
L: linux-media@vger.kernel.org
|
||||
|
@ -490,3 +490,4 @@
|
||||
558 common process_mrelease sys_process_mrelease
|
||||
559 common futex_waitv sys_futex_waitv
|
||||
560 common set_mempolicy_home_node sys_ni_syscall
|
||||
561 common cachestat sys_cachestat
|
||||
|
@ -74,6 +74,9 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
||||
return 0;
|
||||
|
||||
pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
|
||||
if (unlikely(!pte))
|
||||
return 0;
|
||||
|
||||
if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
|
||||
!pte_write(*pte) || !pte_dirty(*pte))) {
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
|
@ -117,8 +117,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
|
||||
* must use the nested version. This also means we need to
|
||||
* open-code the spin-locking.
|
||||
*/
|
||||
ptl = pte_lockptr(vma->vm_mm, pmd);
|
||||
pte = pte_offset_map(pmd, address);
|
||||
if (!pte)
|
||||
return 0;
|
||||
|
||||
ptl = pte_lockptr(vma->vm_mm, pmd);
|
||||
do_pte_lock(ptl);
|
||||
|
||||
ret = do_adjust_pte(vma, address, pfn, pte);
|
||||
|
@ -85,6 +85,9 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
|
||||
break;
|
||||
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
if (!pte)
|
||||
break;
|
||||
|
||||
pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
|
||||
#ifndef CONFIG_ARM_LPAE
|
||||
pr_cont(", *ppte=%08llx",
|
||||
|
@ -464,3 +464,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -120,6 +120,7 @@ config ARM64
|
||||
select CRC32
|
||||
select DCACHE_WORD_ACCESS
|
||||
select DYNAMIC_FTRACE if FUNCTION_TRACER
|
||||
select DMA_BOUNCE_UNALIGNED_KMALLOC
|
||||
select DMA_DIRECT_REMAP
|
||||
select EDAC_SUPPORT
|
||||
select FRAME_POINTER
|
||||
|
@ -33,6 +33,7 @@
|
||||
* the CPU.
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN (128)
|
||||
#define ARCH_KMALLOC_MINALIGN (8)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
@ -90,6 +91,8 @@ static inline int cache_line_size_of_cpu(void)
|
||||
|
||||
int cache_line_size(void);
|
||||
|
||||
#define dma_get_cache_alignment cache_line_size
|
||||
|
||||
/*
|
||||
* Read the effective value of CTR_EL0.
|
||||
*
|
||||
|
@ -39,7 +39,7 @@
|
||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||
|
||||
#define __NR_compat_syscalls 451
|
||||
#define __NR_compat_syscalls 452
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
||||
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||
#define __NR_set_mempolicy_home_node 450
|
||||
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
|
||||
#define __NR_cachestat 451
|
||||
__SYSCALL(__NR_cachestat, sys_cachestat)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
@ -416,10 +416,9 @@ long get_mte_ctrl(struct task_struct *task)
|
||||
static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
|
||||
struct iovec *kiov, unsigned int gup_flags)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
void __user *buf = kiov->iov_base;
|
||||
size_t len = kiov->iov_len;
|
||||
int ret;
|
||||
int err = 0;
|
||||
int write = gup_flags & FOLL_WRITE;
|
||||
|
||||
if (!access_ok(buf, len))
|
||||
@ -429,14 +428,16 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
|
||||
return -EIO;
|
||||
|
||||
while (len) {
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long tags, offset;
|
||||
void *maddr;
|
||||
struct page *page = NULL;
|
||||
struct page *page = get_user_page_vma_remote(mm, addr,
|
||||
gup_flags, &vma);
|
||||
|
||||
ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
|
||||
&vma, NULL);
|
||||
if (ret <= 0)
|
||||
if (IS_ERR_OR_NULL(page)) {
|
||||
err = page == NULL ? -EIO : PTR_ERR(page);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only copy tags if the page has been mapped as PROT_MTE
|
||||
@ -446,7 +447,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
|
||||
* was never mapped with PROT_MTE.
|
||||
*/
|
||||
if (!(vma->vm_flags & VM_MTE)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
err = -EOPNOTSUPP;
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
@ -479,7 +480,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
|
||||
kiov->iov_len = buf - kiov->iov_base;
|
||||
if (!kiov->iov_len) {
|
||||
/* check for error accessing the tracee's address space */
|
||||
if (ret <= 0)
|
||||
if (err)
|
||||
return -EIO;
|
||||
else
|
||||
return -EFAULT;
|
||||
|
@ -1103,7 +1103,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
|
||||
bool recover = esr & KASAN_ESR_RECOVER;
|
||||
bool write = esr & KASAN_ESR_WRITE;
|
||||
size_t size = KASAN_ESR_SIZE(esr);
|
||||
u64 addr = regs->regs[0];
|
||||
void *addr = (void *)regs->regs[0];
|
||||
u64 pc = regs->pc;
|
||||
|
||||
kasan_report(addr, size, write, pc);
|
||||
|
@ -188,6 +188,9 @@ static void show_pte(unsigned long addr)
|
||||
break;
|
||||
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
if (!ptep)
|
||||
break;
|
||||
|
||||
pte = READ_ONCE(*ptep);
|
||||
pr_cont(", pte=%016llx", pte_val(pte));
|
||||
pte_unmap(ptep);
|
||||
@ -328,7 +331,7 @@ static void report_tag_fault(unsigned long addr, unsigned long esr,
|
||||
* find out access size.
|
||||
*/
|
||||
bool is_write = !!(esr & ESR_ELx_WNR);
|
||||
kasan_report(addr, 0, is_write, regs->pc);
|
||||
kasan_report((void *)addr, 0, is_write, regs->pc);
|
||||
}
|
||||
#else
|
||||
/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
|
||||
|
@ -307,14 +307,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return NULL;
|
||||
|
||||
WARN_ON(addr & (sz - 1));
|
||||
/*
|
||||
* Note that if this code were ever ported to the
|
||||
* 32-bit arm platform then it will cause trouble in
|
||||
* the case where CONFIG_HIGHPTE is set, since there
|
||||
* will be no pte_unmap() to correspond with this
|
||||
* pte_alloc_map().
|
||||
*/
|
||||
ptep = pte_alloc_map(mm, pmdp, addr);
|
||||
ptep = pte_alloc_huge(mm, pmdp, addr);
|
||||
} else if (sz == PMD_SIZE) {
|
||||
if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
|
||||
ptep = huge_pmd_share(mm, vma, addr, pudp);
|
||||
@ -366,7 +359,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
return (pte_t *)pmdp;
|
||||
|
||||
if (sz == CONT_PTE_SIZE)
|
||||
return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
|
||||
return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -466,7 +466,12 @@ void __init bootmem_init(void)
|
||||
*/
|
||||
void __init mem_init(void)
|
||||
{
|
||||
swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE);
|
||||
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
|
||||
|
||||
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
|
||||
swiotlb = true;
|
||||
|
||||
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
|
||||
|
||||
/* this will put all unused low memory onto the freelists */
|
||||
memblock_free_all();
|
||||
|
@ -371,3 +371,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -41,7 +41,7 @@ huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
if (pud) {
|
||||
pmd = pmd_alloc(mm, pud, taddr);
|
||||
if (pmd)
|
||||
pte = pte_alloc_map(mm, pmd, taddr);
|
||||
pte = pte_alloc_huge(mm, pmd, taddr);
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
@ -64,7 +64,7 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
if (pud_present(*pud)) {
|
||||
pmd = pmd_offset(pud, taddr);
|
||||
if (pmd_present(*pmd))
|
||||
pte = pte_offset_map(pmd, taddr);
|
||||
pte = pte_offset_huge(pmd, taddr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ static inline void load_ksp_mmu(struct task_struct *task)
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
pte_t *pte = NULL;
|
||||
unsigned long mmuar;
|
||||
|
||||
local_irq_save(flags);
|
||||
@ -139,7 +139,7 @@ static inline void load_ksp_mmu(struct task_struct *task)
|
||||
|
||||
pte = (mmuar >= PAGE_OFFSET) ? pte_offset_kernel(pmd, mmuar)
|
||||
: pte_offset_map(pmd, mmuar);
|
||||
if (pte_none(*pte) || !pte_present(*pte))
|
||||
if (!pte || pte_none(*pte) || !pte_present(*pte))
|
||||
goto bug;
|
||||
|
||||
set_pte(pte, pte_mkyoung(*pte));
|
||||
@ -161,6 +161,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
|
||||
bug:
|
||||
pr_info("ksp load failed: mm=0x%p ksp=0x08%lx\n", mm, mmuar);
|
||||
end:
|
||||
if (pte && mmuar < PAGE_OFFSET)
|
||||
pte_unmap(pte);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -488,6 +488,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
|
||||
if (!pmd_present(*pmd))
|
||||
goto bad_access;
|
||||
pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
|
||||
if (!pte)
|
||||
goto bad_access;
|
||||
if (!pte_present(*pte) || !pte_dirty(*pte)
|
||||
|| !pte_write(*pte)) {
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
|
@ -450,3 +450,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -91,7 +91,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
pte_t *pte = NULL;
|
||||
int ret = -1;
|
||||
int asid;
|
||||
|
||||
local_irq_save(flags);
|
||||
@ -100,47 +101,33 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
|
||||
regs->pc + (extension_word * sizeof(long));
|
||||
|
||||
mm = (!user_mode(regs) && KMAPAREA(mmuar)) ? &init_mm : current->mm;
|
||||
if (!mm) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
pgd = pgd_offset(mm, mmuar);
|
||||
if (pgd_none(*pgd)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (pgd_none(*pgd))
|
||||
goto out;
|
||||
|
||||
p4d = p4d_offset(pgd, mmuar);
|
||||
if (p4d_none(*p4d)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (p4d_none(*p4d))
|
||||
goto out;
|
||||
|
||||
pud = pud_offset(p4d, mmuar);
|
||||
if (pud_none(*pud)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (pud_none(*pud))
|
||||
goto out;
|
||||
|
||||
pmd = pmd_offset(pud, mmuar);
|
||||
if (pmd_none(*pmd)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (pmd_none(*pmd))
|
||||
goto out;
|
||||
|
||||
pte = (KMAPAREA(mmuar)) ? pte_offset_kernel(pmd, mmuar)
|
||||
: pte_offset_map(pmd, mmuar);
|
||||
if (pte_none(*pte) || !pte_present(*pte)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (!pte || pte_none(*pte) || !pte_present(*pte))
|
||||
goto out;
|
||||
|
||||
if (write) {
|
||||
if (!pte_write(*pte)) {
|
||||
local_irq_restore(flags);
|
||||
return -1;
|
||||
}
|
||||
if (!pte_write(*pte))
|
||||
goto out;
|
||||
set_pte(pte, pte_mkdirty(*pte));
|
||||
}
|
||||
|
||||
@ -161,9 +148,12 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
|
||||
mmu_write(MMUOR, MMUOR_ACC | MMUOR_UAA);
|
||||
else
|
||||
mmu_write(MMUOR, MMUOR_ITLB | MMUOR_ACC | MMUOR_UAA);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (pte && !KMAPAREA(mmuar))
|
||||
pte_unmap(pte);
|
||||
local_irq_restore(flags);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __init cf_bootmem_alloc(void)
|
||||
|
@ -18,4 +18,9 @@
|
||||
|
||||
#define SMP_CACHE_BYTES L1_CACHE_BYTES
|
||||
|
||||
/* MS be sure that SLAB allocates aligned objects */
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#endif /* _ASM_MICROBLAZE_CACHE_H */
|
||||
|
@ -30,11 +30,6 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/* MS be sure that SLAB allocates aligned objects */
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
/*
|
||||
* PAGE_OFFSET -- the first address of the first page of memory. With MMU
|
||||
* it is set to the kernel start address (aligned on a page boundary).
|
||||
|
@ -194,7 +194,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
|
||||
|
||||
preempt_disable();
|
||||
ptep = pte_offset_map(pmdp, address);
|
||||
if (pte_present(*ptep)) {
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
address = (unsigned long) page_address(pte_page(*ptep));
|
||||
/* MS: I need add offset in page */
|
||||
address += ((unsigned long)frame->tramp) & ~PAGE_MASK;
|
||||
@ -203,7 +203,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
|
||||
invalidate_icache_range(address, address + 8);
|
||||
flush_dcache_range(address, address + 8);
|
||||
}
|
||||
pte_unmap(ptep);
|
||||
if (ptep)
|
||||
pte_unmap(ptep);
|
||||
preempt_enable();
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
@ -456,3 +456,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -389,3 +389,4 @@
|
||||
448 n32 process_mrelease sys_process_mrelease
|
||||
449 n32 futex_waitv sys_futex_waitv
|
||||
450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 n32 cachestat sys_cachestat
|
||||
|
@ -365,3 +365,4 @@
|
||||
448 n64 process_mrelease sys_process_mrelease
|
||||
449 n64 futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 n64 cachestat sys_cachestat
|
||||
|
@ -438,3 +438,4 @@
|
||||
448 o32 process_mrelease sys_process_mrelease
|
||||
449 o32 futex_waitv sys_futex_waitv
|
||||
450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 o32 cachestat sys_cachestat
|
||||
|
@ -297,7 +297,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
|
||||
p4d_t *p4dp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
pte_t *ptep, *ptemap = NULL;
|
||||
int idx, pid;
|
||||
|
||||
/*
|
||||
@ -344,7 +344,12 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ptep = pte_offset_map(pmdp, address);
|
||||
ptemap = ptep = pte_offset_map(pmdp, address);
|
||||
/*
|
||||
* update_mmu_cache() is called between pte_offset_map_lock()
|
||||
* and pte_unmap_unlock(), so we can assume that ptep is not
|
||||
* NULL here: and what should be done below if it were NULL?
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
|
||||
#ifdef CONFIG_XPA
|
||||
@ -373,6 +378,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
|
||||
tlbw_use_hazard();
|
||||
htw_start();
|
||||
flush_micro_tlb_vm(vma);
|
||||
|
||||
if (ptemap)
|
||||
pte_unmap(ptemap);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -426,10 +426,15 @@ void flush_dcache_page(struct page *page)
|
||||
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
|
||||
addr = mpnt->vm_start + offset;
|
||||
if (parisc_requires_coherency()) {
|
||||
bool needs_flush = false;
|
||||
pte_t *ptep;
|
||||
|
||||
ptep = get_ptep(mpnt->vm_mm, addr);
|
||||
if (ptep && pte_needs_flush(*ptep))
|
||||
if (ptep) {
|
||||
needs_flush = pte_needs_flush(*ptep);
|
||||
pte_unmap(ptep);
|
||||
}
|
||||
if (needs_flush)
|
||||
flush_user_cache_page(mpnt, addr);
|
||||
} else {
|
||||
/*
|
||||
@ -561,14 +566,20 @@ EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
|
||||
static void flush_cache_page_if_present(struct vm_area_struct *vma,
|
||||
unsigned long vmaddr, unsigned long pfn)
|
||||
{
|
||||
pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
|
||||
bool needs_flush = false;
|
||||
pte_t *ptep;
|
||||
|
||||
/*
|
||||
* The pte check is racy and sometimes the flush will trigger
|
||||
* a non-access TLB miss. Hopefully, the page has already been
|
||||
* flushed.
|
||||
*/
|
||||
if (ptep && pte_needs_flush(*ptep))
|
||||
ptep = get_ptep(vma->vm_mm, vmaddr);
|
||||
if (ptep) {
|
||||
needs_flush = pte_needs_flush(*ptep);
|
||||
pte_unmap(ptep);
|
||||
}
|
||||
if (needs_flush)
|
||||
flush_cache_page(vma, vmaddr, pfn);
|
||||
}
|
||||
|
||||
@ -635,17 +646,22 @@ static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, u
|
||||
pte_t *ptep;
|
||||
|
||||
for (addr = start; addr < end; addr += PAGE_SIZE) {
|
||||
bool needs_flush = false;
|
||||
/*
|
||||
* The vma can contain pages that aren't present. Although
|
||||
* the pte search is expensive, we need the pte to find the
|
||||
* page pfn and to check whether the page should be flushed.
|
||||
*/
|
||||
ptep = get_ptep(vma->vm_mm, addr);
|
||||
if (ptep && pte_needs_flush(*ptep)) {
|
||||
if (ptep) {
|
||||
needs_flush = pte_needs_flush(*ptep);
|
||||
pfn = pte_pfn(*ptep);
|
||||
pte_unmap(ptep);
|
||||
}
|
||||
if (needs_flush) {
|
||||
if (parisc_requires_coherency()) {
|
||||
flush_user_cache_page(vma, addr);
|
||||
} else {
|
||||
pfn = pte_pfn(*ptep);
|
||||
if (WARN_ON(!pfn_valid(pfn)))
|
||||
return;
|
||||
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
|
||||
|
@ -164,7 +164,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr,
|
||||
pmd_clear(pmd);
|
||||
return;
|
||||
}
|
||||
pte = pte_offset_map(pmd, vaddr);
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
vaddr &= ~PMD_MASK;
|
||||
end = vaddr + size;
|
||||
if (end > PMD_SIZE)
|
||||
|
@ -448,3 +448,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -66,7 +66,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
if (pud) {
|
||||
pmd = pmd_alloc(mm, pud, addr);
|
||||
if (pmd)
|
||||
pte = pte_alloc_map(mm, pmd, addr);
|
||||
pte = pte_alloc_huge(mm, pmd, addr);
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
@ -90,7 +90,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
if (!pud_none(*pud)) {
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (!pmd_none(*pmd))
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
pte = pte_offset_huge(pmd, addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,10 @@
|
||||
|
||||
#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
|
||||
|
||||
#ifdef CONFIG_NOT_COHERENT_CACHE
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
#endif
|
||||
|
||||
#if !defined(__ASSEMBLY__)
|
||||
#ifdef CONFIG_PPC64
|
||||
|
||||
|
@ -12,10 +12,6 @@
|
||||
|
||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32
|
||||
|
||||
#ifdef CONFIG_NOT_COHERENT_CACHE
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PPC_256K_PAGES) || \
|
||||
(defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
|
||||
#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */
|
||||
|
@ -537,3 +537,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -509,7 +509,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
|
||||
} else {
|
||||
pte_t *pte;
|
||||
|
||||
pte = pte_offset_map(p, 0);
|
||||
pte = pte_offset_kernel(p, 0);
|
||||
kvmppc_unmap_free_pte(kvm, pte, full, lpid);
|
||||
pmd_clear(p);
|
||||
}
|
||||
|
@ -239,12 +239,16 @@ void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long
|
||||
local_irq_save(flags);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
start_pte = pte_offset_map(pmd, addr);
|
||||
if (!start_pte)
|
||||
goto out;
|
||||
for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
|
||||
unsigned long pteval = pte_val(*pte);
|
||||
if (pteval & H_PAGE_HASHPTE)
|
||||
hpte_need_flush(mm, addr, pte, pteval, 0);
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
pte_unmap(start_pte);
|
||||
out:
|
||||
arch_leave_lazy_mmu_mode();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@ -105,7 +105,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
|
||||
|
||||
ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
|
||||
FOLL_WRITE | FOLL_LONGTERM,
|
||||
mem->hpages + entry, NULL);
|
||||
mem->hpages + entry);
|
||||
if (ret == n) {
|
||||
pinned += n;
|
||||
continue;
|
||||
|
@ -71,6 +71,8 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
|
||||
if (pmd_none(*pmd))
|
||||
return;
|
||||
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
if (!pte)
|
||||
return;
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; npages > 0; --npages) {
|
||||
pte_update(mm, addr, pte, 0, 0, 0);
|
||||
|
@ -183,7 +183,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
|
||||
return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
|
||||
return pte_alloc_huge(mm, (pmd_t *)hpdp, addr);
|
||||
|
||||
BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
|
||||
|
||||
|
@ -3376,12 +3376,15 @@ static void show_pte(unsigned long addr)
|
||||
printf("pmdp @ 0x%px = 0x%016lx\n", pmdp, pmd_val(*pmdp));
|
||||
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
if (pte_none(*ptep)) {
|
||||
if (!ptep || pte_none(*ptep)) {
|
||||
if (ptep)
|
||||
pte_unmap(ptep);
|
||||
printf("no valid PTE\n");
|
||||
return;
|
||||
}
|
||||
|
||||
format_pte(ptep, pte_val(*ptep));
|
||||
pte_unmap(ptep);
|
||||
|
||||
sync();
|
||||
__delay(200);
|
||||
|
@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
|
||||
for_each_napot_order(order) {
|
||||
if (napot_cont_size(order) == sz) {
|
||||
pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order));
|
||||
pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -114,7 +114,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
|
||||
for_each_napot_order(order) {
|
||||
if (napot_cont_size(order) == sz) {
|
||||
pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order));
|
||||
pte = pte_offset_huge(pmd, addr & napot_cont_mask(order));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -453,3 +453,4 @@
|
||||
448 common process_mrelease sys_process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat sys_cachestat
|
||||
|
@ -294,6 +294,8 @@ again:
|
||||
|
||||
rc = -ENXIO;
|
||||
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
|
||||
page = pte_page(*ptep);
|
||||
rc = -EAGAIN;
|
||||
|
@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
|
||||
|
||||
mmap_read_lock(kvm->mm);
|
||||
get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
|
||||
&page, NULL, NULL);
|
||||
&page, NULL);
|
||||
mmap_read_unlock(kvm->mm);
|
||||
return page;
|
||||
}
|
||||
|
@ -895,12 +895,12 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
|
||||
|
||||
/**
|
||||
* gmap_pte_op_end - release the page table lock
|
||||
* @ptl: pointer to the spinlock pointer
|
||||
* @ptep: pointer to the locked pte
|
||||
* @ptl: pointer to the page table spinlock
|
||||
*/
|
||||
static void gmap_pte_op_end(spinlock_t *ptl)
|
||||
static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
|
||||
{
|
||||
if (ptl)
|
||||
spin_unlock(ptl);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1011,7 +1011,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
|
||||
{
|
||||
int rc;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl = NULL;
|
||||
spinlock_t *ptl;
|
||||
unsigned long pbits = 0;
|
||||
|
||||
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
|
||||
@ -1025,7 +1025,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
|
||||
pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
|
||||
/* Protect and unlock. */
|
||||
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
|
||||
gmap_pte_op_end(ptl);
|
||||
gmap_pte_op_end(ptep, ptl);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -1154,7 +1154,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
|
||||
/* Do *NOT* clear the _PAGE_INVALID bit! */
|
||||
rc = 0;
|
||||
}
|
||||
gmap_pte_op_end(ptl);
|
||||
gmap_pte_op_end(ptep, ptl);
|
||||
}
|
||||
if (!rc)
|
||||
break;
|
||||
@ -1248,7 +1248,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
|
||||
if (!rc)
|
||||
gmap_insert_rmap(sg, vmaddr, rmap);
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
gmap_pte_op_end(ptl);
|
||||
gmap_pte_op_end(ptep, ptl);
|
||||
}
|
||||
radix_tree_preload_end();
|
||||
if (rc) {
|
||||
@ -2156,7 +2156,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
|
||||
tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
|
||||
if (!tptep) {
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
gmap_pte_op_end(ptl);
|
||||
gmap_pte_op_end(sptep, ptl);
|
||||
radix_tree_preload_end();
|
||||
break;
|
||||
}
|
||||
@ -2167,7 +2167,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
|
||||
rmap = NULL;
|
||||
rc = 0;
|
||||
}
|
||||
gmap_pte_op_end(ptl);
|
||||
gmap_pte_op_end(sptep, ptl);
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
}
|
||||
radix_tree_preload_end();
|
||||
@ -2495,7 +2495,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
|
||||
continue;
|
||||
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
|
||||
set_bit(i, bitmap);
|
||||
spin_unlock(ptl);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
}
|
||||
gmap_pmd_op_end(gmap, pmdp);
|
||||
@ -2537,7 +2537,12 @@ static inline void thp_split_mm(struct mm_struct *mm)
|
||||
* Remove all empty zero pages from the mapping for lazy refaulting
|
||||
* - This must be called after mm->context.has_pgste is set, to avoid
|
||||
* future creation of zero pages
|
||||
* - This must be called after THP was enabled
|
||||
* - This must be called after THP was disabled.
|
||||
*
|
||||
* mm contracts with s390, that even if mm were to remove a page table,
|
||||
* racing with the loop below and so causing pte_offset_map_lock() to fail,
|
||||
* it will never insert a page table containing empty zero pages once
|
||||
* mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
|
||||
*/
|
||||
static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
@ -2549,6 +2554,8 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
|
||||
spinlock_t *ptl;
|
||||
|
||||
ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
|
||||
if (!ptep)
|
||||
break;
|
||||
if (is_zero_pfn(pte_pfn(*ptep)))
|
||||
ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
|
@ -829,7 +829,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
@ -850,6 +850,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
new = old = pgste_get_lock(ptep);
|
||||
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
|
||||
PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
@ -938,7 +940,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
@ -955,6 +957,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
new = old = pgste_get_lock(ptep);
|
||||
/* Reset guest reference bit only */
|
||||
pgste_val(new) &= ~PGSTE_GR_BIT;
|
||||
@ -1000,7 +1004,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
@ -1017,6 +1021,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
pgste = pgste_get_lock(ptep);
|
||||
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||
|
@ -14,6 +14,12 @@
|
||||
|
||||
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
|
||||
|
||||
/*
|
||||
* Some drivers need to perform DMA into kmalloc'ed buffers
|
||||
* and so we have to increase the kmalloc minalign for this.
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#define __read_mostly __section(".data..read_mostly")
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -174,10 +174,4 @@ typedef struct page *pgtable_t;
|
||||
#include <asm-generic/memory_model.h>
|
||||
#include <asm-generic/getorder.h>
|
||||
|
||||
/*
|
||||
* Some drivers need to perform DMA into kmalloc'ed buffers
|
||||
* and so we have to increase the kmalloc minalign for this.
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#endif /* __ASM_SH_PAGE_H */
|
||||
|
@ -453,3 +453,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -38,7 +38,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
if (pud) {
|
||||
pmd = pmd_alloc(mm, pud, addr);
|
||||
if (pmd)
|
||||
pte = pte_alloc_map(mm, pmd, addr);
|
||||
pte = pte_alloc_huge(mm, pmd, addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -63,7 +63,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
if (pud) {
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd)
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
pte = pte_offset_huge(pmd, addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -328,6 +328,8 @@ static void flush_signal_insns(unsigned long address)
|
||||
goto out_irqs_on;
|
||||
|
||||
ptep = pte_offset_map(pmdp, address);
|
||||
if (!ptep)
|
||||
goto out_irqs_on;
|
||||
pte = *ptep;
|
||||
if (!pte_present(pte))
|
||||
goto out_unmap;
|
||||
|
@ -496,3 +496,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -99,6 +99,7 @@ static unsigned int get_user_insn(unsigned long tpc)
|
||||
local_irq_disable();
|
||||
|
||||
pmdp = pmd_offset(pudp, tpc);
|
||||
again:
|
||||
if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
|
||||
goto out_irq_enable;
|
||||
|
||||
@ -115,6 +116,8 @@ static unsigned int get_user_insn(unsigned long tpc)
|
||||
#endif
|
||||
{
|
||||
ptep = pte_offset_map(pmdp, tpc);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
pte = *ptep;
|
||||
if (pte_present(pte)) {
|
||||
pa = (pte_pfn(pte) << PAGE_SHIFT);
|
||||
|
@ -298,7 +298,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return NULL;
|
||||
if (sz >= PMD_SIZE)
|
||||
return (pte_t *)pmd;
|
||||
return pte_alloc_map(mm, pmd, addr);
|
||||
return pte_alloc_huge(mm, pmd, addr);
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
@ -325,7 +325,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
return NULL;
|
||||
if (is_hugetlb_pmd(*pmd))
|
||||
return (pte_t *)pmd;
|
||||
return pte_offset_map(pmd, addr);
|
||||
return pte_offset_huge(pmd, addr);
|
||||
}
|
||||
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
|
@ -244,7 +244,7 @@ static void *iounit_alloc(struct device *dev, size_t len,
|
||||
long i;
|
||||
|
||||
pmdp = pmd_off_k(addr);
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
ptep = pte_offset_kernel(pmdp, addr);
|
||||
|
||||
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
|
||||
|
||||
|
@ -358,7 +358,7 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
|
||||
__flush_page_to_ram(page);
|
||||
|
||||
pmdp = pmd_off_k(addr);
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
ptep = pte_offset_kernel(pmdp, addr);
|
||||
|
||||
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
|
||||
}
|
||||
|
@ -149,6 +149,8 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
|
||||
pte_t *pte;
|
||||
|
||||
pte = pte_offset_map(&pmd, vaddr);
|
||||
if (!pte)
|
||||
return;
|
||||
end = vaddr + HPAGE_SIZE;
|
||||
while (vaddr < end) {
|
||||
if (pte_val(*pte) & _PAGE_VALID) {
|
||||
|
@ -455,3 +455,4 @@
|
||||
448 i386 process_mrelease sys_process_mrelease
|
||||
449 i386 futex_waitv sys_futex_waitv
|
||||
450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 i386 cachestat sys_cachestat
|
||||
|
@ -372,6 +372,7 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
|
@ -214,7 +214,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
|
||||
if (!(vma->vm_flags & VM_MAYEXEC))
|
||||
return -EACCES;
|
||||
|
||||
ret = get_user_pages(src, 1, 0, &src_page, NULL);
|
||||
ret = get_user_pages(src, 1, 0, &src_page);
|
||||
if (ret < 1)
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -367,8 +367,10 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
|
||||
|
||||
va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
|
||||
ptep = get_locked_pte(mm, va, &ptl);
|
||||
pte_clear(mm, va, ptep);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
if (!WARN_ON_ONCE(!ptep)) {
|
||||
pte_clear(mm, va, ptep);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
}
|
||||
|
||||
va = (unsigned long)ldt_slot_va(ldt->slot);
|
||||
|
@ -188,7 +188,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
|
||||
if (pmd_large(*pmd))
|
||||
return;
|
||||
|
||||
pte = pte_offset_map(pmd, ppd->vaddr);
|
||||
pte = pte_offset_kernel(pmd, ppd->vaddr);
|
||||
if (pte_none(*pte))
|
||||
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
|
||||
}
|
||||
|
@ -421,3 +421,4 @@
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
|
@ -179,6 +179,7 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
unsigned int pteval;
|
||||
|
||||
if (!mm)
|
||||
mm = task->active_mm;
|
||||
@ -197,7 +198,9 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
|
||||
pte = pte_offset_map(pmd, vaddr);
|
||||
if (!pte)
|
||||
return 0;
|
||||
return pte_val(*pte);
|
||||
pteval = pte_val(*pte);
|
||||
pte_unmap(pte);
|
||||
return pteval;
|
||||
}
|
||||
|
||||
enum {
|
||||
|
18
block/fops.c
18
block/fops.c
@ -598,21 +598,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
goto reexpand; /* skip atime */
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_needs_writeback(mapping, pos,
|
||||
pos + count - 1)) {
|
||||
ret = -EAGAIN;
|
||||
goto reexpand;
|
||||
}
|
||||
} else {
|
||||
ret = filemap_write_and_wait_range(mapping, pos,
|
||||
pos + count - 1);
|
||||
if (ret < 0)
|
||||
goto reexpand;
|
||||
}
|
||||
|
||||
ret = kiocb_write_and_wait(iocb, count);
|
||||
if (ret < 0)
|
||||
goto reexpand;
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
||||
ret = blkdev_direct_IO(iocb, to);
|
||||
|
@ -29,10 +29,10 @@ struct devres {
|
||||
* Some archs want to perform DMA into kmalloc caches
|
||||
* and need a guaranteed alignment larger than
|
||||
* the alignment of a 64-bit integer.
|
||||
* Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
|
||||
* buffer alignment as if it was allocated by plain kmalloc().
|
||||
* Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
|
||||
* alignment for struct devres when allocated by kmalloc().
|
||||
*/
|
||||
u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
|
||||
u8 __aligned(ARCH_DMA_MINALIGN) data[];
|
||||
};
|
||||
|
||||
struct devres_group {
|
||||
|
@ -1753,7 +1753,7 @@ static ssize_t recompress_store(struct device *dev,
|
||||
}
|
||||
}
|
||||
|
||||
if (threshold >= PAGE_SIZE)
|
||||
if (threshold >= huge_class_size)
|
||||
return -EINVAL;
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
|
@ -496,13 +496,13 @@ int drm_gem_create_mmap_offset(struct drm_gem_object *obj)
|
||||
EXPORT_SYMBOL(drm_gem_create_mmap_offset);
|
||||
|
||||
/*
|
||||
* Move pages to appropriate lru and release the pagevec, decrementing the
|
||||
* ref count of those pages.
|
||||
* Move folios to appropriate lru and release the folios, decrementing the
|
||||
* ref count of those folios.
|
||||
*/
|
||||
static void drm_gem_check_release_pagevec(struct pagevec *pvec)
|
||||
static void drm_gem_check_release_batch(struct folio_batch *fbatch)
|
||||
{
|
||||
check_move_unevictable_pages(pvec);
|
||||
__pagevec_release(pvec);
|
||||
check_move_unevictable_folios(fbatch);
|
||||
__folio_batch_release(fbatch);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@ -534,10 +534,10 @@ static void drm_gem_check_release_pagevec(struct pagevec *pvec)
|
||||
struct page **drm_gem_get_pages(struct drm_gem_object *obj)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
struct page *p, **pages;
|
||||
struct pagevec pvec;
|
||||
int i, npages;
|
||||
|
||||
struct page **pages;
|
||||
struct folio *folio;
|
||||
struct folio_batch fbatch;
|
||||
int i, j, npages;
|
||||
|
||||
if (WARN_ON(!obj->filp))
|
||||
return ERR_PTR(-EINVAL);
|
||||
@ -559,11 +559,14 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
|
||||
|
||||
mapping_set_unevictable(mapping);
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
p = shmem_read_mapping_page(mapping, i);
|
||||
if (IS_ERR(p))
|
||||
i = 0;
|
||||
while (i < npages) {
|
||||
folio = shmem_read_folio_gfp(mapping, i,
|
||||
mapping_gfp_mask(mapping));
|
||||
if (IS_ERR(folio))
|
||||
goto fail;
|
||||
pages[i] = p;
|
||||
for (j = 0; j < folio_nr_pages(folio); j++, i++)
|
||||
pages[i] = folio_file_page(folio, i);
|
||||
|
||||
/* Make sure shmem keeps __GFP_DMA32 allocated pages in the
|
||||
* correct region during swapin. Note that this requires
|
||||
@ -571,23 +574,26 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
|
||||
* so shmem can relocate pages during swapin if required.
|
||||
*/
|
||||
BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
|
||||
(page_to_pfn(p) >= 0x00100000UL));
|
||||
(folio_pfn(folio) >= 0x00100000UL));
|
||||
}
|
||||
|
||||
return pages;
|
||||
|
||||
fail:
|
||||
mapping_clear_unevictable(mapping);
|
||||
pagevec_init(&pvec);
|
||||
while (i--) {
|
||||
if (!pagevec_add(&pvec, pages[i]))
|
||||
drm_gem_check_release_pagevec(&pvec);
|
||||
folio_batch_init(&fbatch);
|
||||
j = 0;
|
||||
while (j < i) {
|
||||
struct folio *f = page_folio(pages[j]);
|
||||
if (!folio_batch_add(&fbatch, f))
|
||||
drm_gem_check_release_batch(&fbatch);
|
||||
j += folio_nr_pages(f);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
drm_gem_check_release_pagevec(&pvec);
|
||||
if (fbatch.nr)
|
||||
drm_gem_check_release_batch(&fbatch);
|
||||
|
||||
kvfree(pages);
|
||||
return ERR_CAST(p);
|
||||
return ERR_CAST(folio);
|
||||
}
|
||||
EXPORT_SYMBOL(drm_gem_get_pages);
|
||||
|
||||
@ -603,7 +609,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
|
||||
{
|
||||
int i, npages;
|
||||
struct address_space *mapping;
|
||||
struct pagevec pvec;
|
||||
struct folio_batch fbatch;
|
||||
|
||||
mapping = file_inode(obj->filp)->i_mapping;
|
||||
mapping_clear_unevictable(mapping);
|
||||
@ -616,23 +622,27 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
|
||||
|
||||
npages = obj->size >> PAGE_SHIFT;
|
||||
|
||||
pagevec_init(&pvec);
|
||||
folio_batch_init(&fbatch);
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct folio *folio;
|
||||
|
||||
if (!pages[i])
|
||||
continue;
|
||||
folio = page_folio(pages[i]);
|
||||
|
||||
if (dirty)
|
||||
set_page_dirty(pages[i]);
|
||||
folio_mark_dirty(folio);
|
||||
|
||||
if (accessed)
|
||||
mark_page_accessed(pages[i]);
|
||||
folio_mark_accessed(folio);
|
||||
|
||||
/* Undo the reference we took when populating the table */
|
||||
if (!pagevec_add(&pvec, pages[i]))
|
||||
drm_gem_check_release_pagevec(&pvec);
|
||||
if (!folio_batch_add(&fbatch, folio))
|
||||
drm_gem_check_release_batch(&fbatch);
|
||||
i += folio_nr_pages(folio) - 1;
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
drm_gem_check_release_pagevec(&pvec);
|
||||
if (folio_batch_count(&fbatch))
|
||||
drm_gem_check_release_batch(&fbatch);
|
||||
|
||||
kvfree(pages);
|
||||
}
|
||||
|
@ -49,10 +49,10 @@ struct drmres {
|
||||
* Some archs want to perform DMA into kmalloc caches
|
||||
* and need a guaranteed alignment larger than
|
||||
* the alignment of a 64-bit integer.
|
||||
* Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
|
||||
* buffer alignment as if it was allocated by plain kmalloc().
|
||||
* Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
|
||||
* alignment for struct drmres when allocated by kmalloc().
|
||||
*/
|
||||
u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
|
||||
u8 __aligned(ARCH_DMA_MINALIGN) data[];
|
||||
};
|
||||
|
||||
static void free_dr(struct drmres *dr)
|
||||
|
@ -19,13 +19,13 @@
|
||||
#include "i915_trace.h"
|
||||
|
||||
/*
|
||||
* Move pages to appropriate lru and release the pagevec, decrementing the
|
||||
* ref count of those pages.
|
||||
* Move folios to appropriate lru and release the batch, decrementing the
|
||||
* ref count of those folios.
|
||||
*/
|
||||
static void check_release_pagevec(struct pagevec *pvec)
|
||||
static void check_release_folio_batch(struct folio_batch *fbatch)
|
||||
{
|
||||
check_move_unevictable_pages(pvec);
|
||||
__pagevec_release(pvec);
|
||||
check_move_unevictable_folios(fbatch);
|
||||
__folio_batch_release(fbatch);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@ -33,24 +33,29 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
|
||||
bool dirty, bool backup)
|
||||
{
|
||||
struct sgt_iter sgt_iter;
|
||||
struct pagevec pvec;
|
||||
struct folio_batch fbatch;
|
||||
struct folio *last = NULL;
|
||||
struct page *page;
|
||||
|
||||
mapping_clear_unevictable(mapping);
|
||||
|
||||
pagevec_init(&pvec);
|
||||
folio_batch_init(&fbatch);
|
||||
for_each_sgt_page(page, sgt_iter, st) {
|
||||
struct folio *folio = page_folio(page);
|
||||
|
||||
if (folio == last)
|
||||
continue;
|
||||
last = folio;
|
||||
if (dirty)
|
||||
set_page_dirty(page);
|
||||
|
||||
folio_mark_dirty(folio);
|
||||
if (backup)
|
||||
mark_page_accessed(page);
|
||||
folio_mark_accessed(folio);
|
||||
|
||||
if (!pagevec_add(&pvec, page))
|
||||
check_release_pagevec(&pvec);
|
||||
if (!folio_batch_add(&fbatch, folio))
|
||||
check_release_folio_batch(&fbatch);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
if (fbatch.nr)
|
||||
check_release_folio_batch(&fbatch);
|
||||
|
||||
sg_free_table(st);
|
||||
}
|
||||
@ -63,8 +68,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
unsigned int page_count; /* restricted by sg_alloc_table */
|
||||
unsigned long i;
|
||||
struct scatterlist *sg;
|
||||
struct page *page;
|
||||
unsigned long last_pfn = 0; /* suppress gcc warning */
|
||||
unsigned long next_pfn = 0; /* suppress gcc warning */
|
||||
gfp_t noreclaim;
|
||||
int ret;
|
||||
|
||||
@ -95,6 +99,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
sg = st->sgl;
|
||||
st->nents = 0;
|
||||
for (i = 0; i < page_count; i++) {
|
||||
struct folio *folio;
|
||||
const unsigned int shrink[] = {
|
||||
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
|
||||
0,
|
||||
@ -103,12 +108,12 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
|
||||
do {
|
||||
cond_resched();
|
||||
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
|
||||
if (!IS_ERR(page))
|
||||
folio = shmem_read_folio_gfp(mapping, i, gfp);
|
||||
if (!IS_ERR(folio))
|
||||
break;
|
||||
|
||||
if (!*s) {
|
||||
ret = PTR_ERR(page);
|
||||
ret = PTR_ERR(folio);
|
||||
goto err_sg;
|
||||
}
|
||||
|
||||
@ -147,19 +152,21 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
|
||||
if (!i ||
|
||||
sg->length >= max_segment ||
|
||||
page_to_pfn(page) != last_pfn + 1) {
|
||||
folio_pfn(folio) != next_pfn) {
|
||||
if (i)
|
||||
sg = sg_next(sg);
|
||||
|
||||
st->nents++;
|
||||
sg_set_page(sg, page, PAGE_SIZE, 0);
|
||||
sg_set_folio(sg, folio, folio_size(folio), 0);
|
||||
} else {
|
||||
sg->length += PAGE_SIZE;
|
||||
/* XXX: could overflow? */
|
||||
sg->length += folio_size(folio);
|
||||
}
|
||||
last_pfn = page_to_pfn(page);
|
||||
next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
|
||||
i += folio_nr_pages(folio) - 1;
|
||||
|
||||
/* Check that the i965g/gm workaround works. */
|
||||
GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
|
||||
GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
|
||||
}
|
||||
if (sg) /* loop terminated early; short sg table */
|
||||
sg_mark_end(sg);
|
||||
|
@ -1681,7 +1681,9 @@ static int igt_mmap_gpu(void *arg)
|
||||
|
||||
static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
if (!pte_present(*pte) || pte_none(*pte)) {
|
||||
pte_t ptent = ptep_get(pte);
|
||||
|
||||
if (!pte_present(ptent) || pte_none(ptent)) {
|
||||
pr_err("missing PTE:%lx\n",
|
||||
(addr - (unsigned long)data) >> PAGE_SHIFT);
|
||||
return -EINVAL;
|
||||
@ -1692,7 +1694,9 @@ static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
|
||||
|
||||
static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
if (pte_present(*pte) && !pte_none(*pte)) {
|
||||
pte_t ptent = ptep_get(pte);
|
||||
|
||||
if (pte_present(ptent) && !pte_none(ptent)) {
|
||||
pr_err("present PTE:%lx; expected to be revoked\n",
|
||||
(addr - (unsigned long)data) >> PAGE_SHIFT);
|
||||
return -EINVAL;
|
||||
|
@ -187,64 +187,64 @@ i915_error_printer(struct drm_i915_error_state_buf *e)
|
||||
}
|
||||
|
||||
/* single threaded page allocator with a reserved stash for emergencies */
|
||||
static void pool_fini(struct pagevec *pv)
|
||||
static void pool_fini(struct folio_batch *fbatch)
|
||||
{
|
||||
pagevec_release(pv);
|
||||
folio_batch_release(fbatch);
|
||||
}
|
||||
|
||||
static int pool_refill(struct pagevec *pv, gfp_t gfp)
|
||||
static int pool_refill(struct folio_batch *fbatch, gfp_t gfp)
|
||||
{
|
||||
while (pagevec_space(pv)) {
|
||||
struct page *p;
|
||||
while (folio_batch_space(fbatch)) {
|
||||
struct folio *folio;
|
||||
|
||||
p = alloc_page(gfp);
|
||||
if (!p)
|
||||
folio = folio_alloc(gfp, 0);
|
||||
if (!folio)
|
||||
return -ENOMEM;
|
||||
|
||||
pagevec_add(pv, p);
|
||||
folio_batch_add(fbatch, folio);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pool_init(struct pagevec *pv, gfp_t gfp)
|
||||
static int pool_init(struct folio_batch *fbatch, gfp_t gfp)
|
||||
{
|
||||
int err;
|
||||
|
||||
pagevec_init(pv);
|
||||
folio_batch_init(fbatch);
|
||||
|
||||
err = pool_refill(pv, gfp);
|
||||
err = pool_refill(fbatch, gfp);
|
||||
if (err)
|
||||
pool_fini(pv);
|
||||
pool_fini(fbatch);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void *pool_alloc(struct pagevec *pv, gfp_t gfp)
|
||||
static void *pool_alloc(struct folio_batch *fbatch, gfp_t gfp)
|
||||
{
|
||||
struct page *p;
|
||||
struct folio *folio;
|
||||
|
||||
p = alloc_page(gfp);
|
||||
if (!p && pagevec_count(pv))
|
||||
p = pv->pages[--pv->nr];
|
||||
folio = folio_alloc(gfp, 0);
|
||||
if (!folio && folio_batch_count(fbatch))
|
||||
folio = fbatch->folios[--fbatch->nr];
|
||||
|
||||
return p ? page_address(p) : NULL;
|
||||
return folio ? folio_address(folio) : NULL;
|
||||
}
|
||||
|
||||
static void pool_free(struct pagevec *pv, void *addr)
|
||||
static void pool_free(struct folio_batch *fbatch, void *addr)
|
||||
{
|
||||
struct page *p = virt_to_page(addr);
|
||||
struct folio *folio = virt_to_folio(addr);
|
||||
|
||||
if (pagevec_space(pv))
|
||||
pagevec_add(pv, p);
|
||||
if (folio_batch_space(fbatch))
|
||||
folio_batch_add(fbatch, folio);
|
||||
else
|
||||
__free_page(p);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
|
||||
|
||||
struct i915_vma_compress {
|
||||
struct pagevec pool;
|
||||
struct folio_batch pool;
|
||||
struct z_stream_s zstream;
|
||||
void *tmp;
|
||||
};
|
||||
@ -381,7 +381,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
|
||||
#else
|
||||
|
||||
struct i915_vma_compress {
|
||||
struct pagevec pool;
|
||||
struct folio_batch pool;
|
||||
};
|
||||
|
||||
static bool compress_init(struct i915_vma_compress *c)
|
||||
|
@ -359,7 +359,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm
|
||||
struct page **pages = ttm->pages + pinned;
|
||||
|
||||
r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0,
|
||||
pages, NULL);
|
||||
pages);
|
||||
if (r < 0)
|
||||
goto release_pages;
|
||||
|
||||
|
@ -111,7 +111,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
|
||||
ret = pin_user_pages(start_page + got * PAGE_SIZE,
|
||||
num_pages - got,
|
||||
FOLL_LONGTERM | FOLL_WRITE,
|
||||
p + got, NULL);
|
||||
p + got);
|
||||
if (ret < 0) {
|
||||
mmap_read_unlock(current->mm);
|
||||
goto bail_release;
|
||||
|
@ -140,7 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
|
||||
ret = pin_user_pages(cur_base,
|
||||
min_t(unsigned long, npages,
|
||||
PAGE_SIZE / sizeof(struct page *)),
|
||||
gup_flags, page_list, NULL);
|
||||
gup_flags, page_list);
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
@ -422,7 +422,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
|
||||
umem->page_chunk[i].plist = plist;
|
||||
while (nents) {
|
||||
rv = pin_user_pages(first_page_va, nents, foll_flags,
|
||||
plist, NULL);
|
||||
plist);
|
||||
if (rv < 0)
|
||||
goto out_sem_up;
|
||||
|
||||
|
@ -152,6 +152,7 @@ config IOMMU_DMA
|
||||
select IOMMU_IOVA
|
||||
select IRQ_MSI_IOMMU
|
||||
select NEED_SG_DMA_LENGTH
|
||||
select NEED_SG_DMA_FLAGS if SWIOTLB
|
||||
|
||||
# Shared Virtual Addressing
|
||||
config IOMMU_SVA
|
||||
|
@ -520,9 +520,38 @@ static bool dev_is_untrusted(struct device *dev)
|
||||
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
|
||||
}
|
||||
|
||||
static bool dev_use_swiotlb(struct device *dev)
|
||||
static bool dev_use_swiotlb(struct device *dev, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
|
||||
return IS_ENABLED(CONFIG_SWIOTLB) &&
|
||||
(dev_is_untrusted(dev) ||
|
||||
dma_kmalloc_needs_bounce(dev, size, dir));
|
||||
}
|
||||
|
||||
static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SWIOTLB))
|
||||
return false;
|
||||
|
||||
if (dev_is_untrusted(dev))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If kmalloc() buffers are not DMA-safe for this device and
|
||||
* direction, check the individual lengths in the sg list. If any
|
||||
* element is deemed unsafe, use the swiotlb for bouncing.
|
||||
*/
|
||||
if (!dma_kmalloc_safe(dev, dir)) {
|
||||
for_each_sg(sg, s, nents, i)
|
||||
if (!dma_kmalloc_size_aligned(s->length))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -922,7 +951,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
|
||||
{
|
||||
phys_addr_t phys;
|
||||
|
||||
if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
|
||||
if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
|
||||
return;
|
||||
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
@ -938,7 +967,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
|
||||
{
|
||||
phys_addr_t phys;
|
||||
|
||||
if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
|
||||
if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
|
||||
return;
|
||||
|
||||
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
|
||||
@ -956,7 +985,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
if (dev_use_swiotlb(dev))
|
||||
if (sg_dma_is_swiotlb(sgl))
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
|
||||
sg->length, dir);
|
||||
@ -972,7 +1001,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
if (dev_use_swiotlb(dev))
|
||||
if (sg_dma_is_swiotlb(sgl))
|
||||
for_each_sg(sgl, sg, nelems, i)
|
||||
iommu_dma_sync_single_for_device(dev,
|
||||
sg_dma_address(sg),
|
||||
@ -998,7 +1027,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
|
||||
* If both the physical buffer start address and size are
|
||||
* page aligned, we don't need to use a bounce page.
|
||||
*/
|
||||
if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
|
||||
if (dev_use_swiotlb(dev, size, dir) &&
|
||||
iova_offset(iovad, phys | size)) {
|
||||
void *padding_start;
|
||||
size_t padding_size, aligned_size;
|
||||
|
||||
@ -1080,7 +1110,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
|
||||
sg_dma_address(s) = DMA_MAPPING_ERROR;
|
||||
sg_dma_len(s) = 0;
|
||||
|
||||
if (sg_is_dma_bus_address(s)) {
|
||||
if (sg_dma_is_bus_address(s)) {
|
||||
if (i > 0)
|
||||
cur = sg_next(cur);
|
||||
|
||||
@ -1136,7 +1166,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
|
||||
int i;
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
if (sg_is_dma_bus_address(s)) {
|
||||
if (sg_dma_is_bus_address(s)) {
|
||||
sg_dma_unmark_bus_address(s);
|
||||
} else {
|
||||
if (sg_dma_address(s) != DMA_MAPPING_ERROR)
|
||||
@ -1166,6 +1196,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
|
||||
struct scatterlist *s;
|
||||
int i;
|
||||
|
||||
sg_dma_mark_swiotlb(sg);
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
|
||||
s->offset, s->length, dir, attrs);
|
||||
@ -1210,7 +1242,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dev_use_swiotlb(dev))
|
||||
if (dev_use_sg_swiotlb(dev, sg, nents, dir))
|
||||
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
@ -1315,7 +1347,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
|
||||
struct scatterlist *tmp;
|
||||
int i;
|
||||
|
||||
if (dev_use_swiotlb(dev)) {
|
||||
if (sg_dma_is_swiotlb(sg)) {
|
||||
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
|
||||
return;
|
||||
}
|
||||
@ -1329,7 +1361,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
|
||||
* just have to be determined.
|
||||
*/
|
||||
for_each_sg(sg, tmp, nents, i) {
|
||||
if (sg_is_dma_bus_address(tmp)) {
|
||||
if (sg_dma_is_bus_address(tmp)) {
|
||||
sg_dma_unmark_bus_address(tmp);
|
||||
continue;
|
||||
}
|
||||
@ -1343,7 +1375,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
|
||||
|
||||
nents -= i;
|
||||
for_each_sg(tmp, tmp, nents, i) {
|
||||
if (sg_is_dma_bus_address(tmp)) {
|
||||
if (sg_dma_is_bus_address(tmp)) {
|
||||
sg_dma_unmark_bus_address(tmp);
|
||||
continue;
|
||||
}
|
||||
|
@ -2567,7 +2567,7 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
|
||||
len = 0;
|
||||
}
|
||||
|
||||
if (sg_is_dma_bus_address(sg))
|
||||
if (sg_dma_is_bus_address(sg))
|
||||
goto next;
|
||||
|
||||
if (len) {
|
||||
|
@ -786,7 +786,7 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
|
||||
user->locked = 1;
|
||||
}
|
||||
rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
|
||||
user->gup_flags, user->upages, NULL,
|
||||
user->gup_flags, user->upages,
|
||||
&user->locked);
|
||||
}
|
||||
if (rc <= 0) {
|
||||
@ -1799,7 +1799,7 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
|
||||
rc = pin_user_pages_remote(
|
||||
pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
|
||||
1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page,
|
||||
NULL, NULL);
|
||||
NULL);
|
||||
mmap_read_unlock(pages->source_mm);
|
||||
if (rc != 1) {
|
||||
if (WARN_ON(rc >= 0))
|
||||
|
@ -3255,7 +3255,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
|
||||
cc->per_bio_data_size = ti->per_io_data_size =
|
||||
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
|
||||
ARCH_KMALLOC_MINALIGN);
|
||||
ARCH_DMA_MINALIGN);
|
||||
|
||||
ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc);
|
||||
if (ret) {
|
||||
|
@ -180,7 +180,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
|
||||
data, size, dma->nr_pages);
|
||||
|
||||
err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, gup_flags,
|
||||
dma->pages, NULL);
|
||||
dma->pages);
|
||||
|
||||
if (err != dma->nr_pages) {
|
||||
dma->nr_pages = (err >= 0) ? err : 0;
|
||||
|
@ -185,7 +185,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
|
||||
#else
|
||||
*pageshift = PAGE_SHIFT;
|
||||
#endif
|
||||
if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
|
||||
if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page) <= 0)
|
||||
return -EFAULT;
|
||||
*paddr = page_to_phys(page);
|
||||
put_page(page);
|
||||
@ -228,7 +228,7 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
|
||||
goto err;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (unlikely(pmd_large(*pmdp)))
|
||||
pte = *(pte_t *) pmdp;
|
||||
pte = ptep_get((pte_t *)pmdp);
|
||||
else
|
||||
#endif
|
||||
pte = *pte_offset_kernel(pmdp, vaddr);
|
||||
|
@ -168,6 +168,7 @@ config PCI_P2PDMA
|
||||
#
|
||||
depends on 64BIT
|
||||
select GENERIC_ALLOCATOR
|
||||
select NEED_SG_DMA_FLAGS
|
||||
help
|
||||
Enableѕ drivers to do PCI peer-to-peer transactions to and from
|
||||
BARs that are exposed in other devices that are the part of
|
||||
|
@ -237,7 +237,7 @@ static int spidev_message(struct spidev_data *spidev,
|
||||
/* Ensure that also following allocations from rx_buf/tx_buf will meet
|
||||
* DMA alignment requirements.
|
||||
*/
|
||||
unsigned int len_aligned = ALIGN(u_tmp->len, ARCH_KMALLOC_MINALIGN);
|
||||
unsigned int len_aligned = ALIGN(u_tmp->len, ARCH_DMA_MINALIGN);
|
||||
|
||||
k_tmp->len = u_tmp->len;
|
||||
|
||||
|
@ -34,13 +34,13 @@ void __init usb_init_pool_max(void)
|
||||
{
|
||||
/*
|
||||
* The pool_max values must never be smaller than
|
||||
* ARCH_KMALLOC_MINALIGN.
|
||||
* ARCH_DMA_MINALIGN.
|
||||
*/
|
||||
if (ARCH_KMALLOC_MINALIGN <= 32)
|
||||
if (ARCH_DMA_MINALIGN <= 32)
|
||||
; /* Original value is okay */
|
||||
else if (ARCH_KMALLOC_MINALIGN <= 64)
|
||||
else if (ARCH_DMA_MINALIGN <= 64)
|
||||
pool_max[0] = 64;
|
||||
else if (ARCH_KMALLOC_MINALIGN <= 128)
|
||||
else if (ARCH_DMA_MINALIGN <= 128)
|
||||
pool_max[0] = 0; /* Don't use this pool */
|
||||
else
|
||||
BUILD_BUG(); /* We don't allow this */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user