mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
Merge drm/drm-next into drm-misc-next
We need to backmerge drm-next to fix the komeda build failure. Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
This commit is contained in:
commit
d588100baa
73
Documentation/devicetree/bindings/display/arm,komeda.txt
Normal file
73
Documentation/devicetree/bindings/display/arm,komeda.txt
Normal file
@ -0,0 +1,73 @@
|
||||
Device Tree bindings for Arm Komeda display driver
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "arm,mali-d71"
|
||||
- reg: Physical base address and length of the registers in the system
|
||||
- interrupts: the interrupt line number of the device in the system
|
||||
- clocks: A list of phandle + clock-specifier pairs, one for each entry
|
||||
in 'clock-names'
|
||||
- clock-names: A list of clock names. It should contain:
|
||||
- "mclk": for the main processor clock
|
||||
- "pclk": for the APB interface clock
|
||||
- #address-cells: Must be 1
|
||||
- #size-cells: Must be 0
|
||||
|
||||
Required properties for sub-node: pipeline@nq
|
||||
Each device contains one or two pipeline sub-nodes (at least one), each
|
||||
pipeline node should provide properties:
|
||||
- reg: Zero-indexed identifier for the pipeline
|
||||
- clocks: A list of phandle + clock-specifier pairs, one for each entry
|
||||
in 'clock-names'
|
||||
- clock-names: should contain:
|
||||
- "pxclk": pixel clock
|
||||
- "aclk": AXI interface clock
|
||||
|
||||
- port: each pipeline connect to an encoder input port. The connection is
|
||||
modeled using the OF graph bindings specified in
|
||||
Documentation/devicetree/bindings/graph.txt
|
||||
|
||||
Optional properties:
|
||||
- memory-region: phandle to a node describing memory (see
|
||||
Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
|
||||
to be used for the framebuffer; if not present, the framebuffer may
|
||||
be located anywhere in memory.
|
||||
|
||||
Example:
|
||||
/ {
|
||||
...
|
||||
|
||||
dp0: display@c00000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "arm,mali-d71";
|
||||
reg = <0xc00000 0x20000>;
|
||||
interrupts = <0 168 4>;
|
||||
clocks = <&dpu_mclk>, <&dpu_aclk>;
|
||||
clock-names = "mclk", "pclk";
|
||||
|
||||
dp0_pipe0: pipeline@0 {
|
||||
clocks = <&fpgaosc2>, <&dpu_aclk>;
|
||||
clock-names = "pxclk", "aclk";
|
||||
reg = <0>;
|
||||
|
||||
port {
|
||||
dp0_pipe0_out: endpoint {
|
||||
remote-endpoint = <&db_dvi0_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
dp0_pipe1: pipeline@1 {
|
||||
clocks = <&fpgaosc2>, <&dpu_aclk>;
|
||||
clock-names = "pxclk", "aclk";
|
||||
reg = <1>;
|
||||
|
||||
port {
|
||||
dp0_pipe1_out: endpoint {
|
||||
remote-endpoint = <&db_dvi1_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
...
|
||||
};
|
@ -8,6 +8,7 @@ Required properties:
|
||||
|
||||
- compatible : Shall contain one of
|
||||
- "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
|
||||
- "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
|
||||
- "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
|
||||
- "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
|
||||
- "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
|
||||
|
@ -238,6 +238,9 @@ of the following host1x client modules:
|
||||
- nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
|
||||
- nvidia,edid: supplies a binary EDID blob
|
||||
- nvidia,panel: phandle of a display panel
|
||||
- nvidia,xbar-cfg: 5 cells containing the crossbar configuration. Each lane
|
||||
of the SOR, identified by the cell's index, is mapped via the crossbar to
|
||||
the pad specified by the cell's value.
|
||||
|
||||
Optional properties when driving an eDP output:
|
||||
- nvidia,dpaux: phandle to a DispayPort AUX interface
|
||||
|
@ -2,9 +2,10 @@
|
||||
|
||||
Required properties:
|
||||
- compatible : value should be one of the following:
|
||||
(a) "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
|
||||
(b) "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
|
||||
(c) "samsung,exynos5250-rotator" for Rotator IP in Exynos5250
|
||||
* "samsung,s5pv210-rotator" for Rotator IP in S5PV210
|
||||
* "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
|
||||
* "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
|
||||
* "samsung,exynos5250-rotator" for Rotator IP in Exynos5250
|
||||
|
||||
- reg : Physical base address of the IP registers and length of memory
|
||||
mapped region.
|
||||
|
235
Documentation/gpu/afbc.rst
Normal file
235
Documentation/gpu/afbc.rst
Normal file
@ -0,0 +1,235 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
===================================
|
||||
Arm Framebuffer Compression (AFBC)
|
||||
===================================
|
||||
|
||||
AFBC is a proprietary lossless image compression protocol and format.
|
||||
It provides fine-grained random access and minimizes the amount of
|
||||
data transferred between IP blocks.
|
||||
|
||||
AFBC can be enabled on drivers which support it via use of the AFBC
|
||||
format modifiers defined in drm_fourcc.h. See DRM_FORMAT_MOD_ARM_AFBC(*).
|
||||
|
||||
All users of the AFBC modifiers must follow the usage guidelines laid
|
||||
out in this document, to ensure compatibility across different AFBC
|
||||
producers and consumers.
|
||||
|
||||
Components and Ordering
|
||||
=======================
|
||||
|
||||
AFBC streams can contain several components - where a component
|
||||
corresponds to a color channel (i.e. R, G, B, X, A, Y, Cb, Cr).
|
||||
The assignment of input/output color channels must be consistent
|
||||
between the encoder and the decoder for correct operation, otherwise
|
||||
the consumer will interpret the decoded data incorrectly.
|
||||
|
||||
Furthermore, when the lossless colorspace transform is used
|
||||
(AFBC_FORMAT_MOD_YTR, which should be enabled for RGB buffers for
|
||||
maximum compression efficiency), the component order must be:
|
||||
|
||||
* Component 0: R
|
||||
* Component 1: G
|
||||
* Component 2: B
|
||||
|
||||
The component ordering is communicated via the fourcc code in the
|
||||
fourcc:modifier pair. In general, component '0' is considered to
|
||||
reside in the least-significant bits of the corresponding linear
|
||||
format. For example, COMP(bits):
|
||||
|
||||
* DRM_FORMAT_ABGR8888
|
||||
|
||||
* Component 0: R(8)
|
||||
* Component 1: G(8)
|
||||
* Component 2: B(8)
|
||||
* Component 3: A(8)
|
||||
|
||||
* DRM_FORMAT_BGR888
|
||||
|
||||
* Component 0: R(8)
|
||||
* Component 1: G(8)
|
||||
* Component 2: B(8)
|
||||
|
||||
* DRM_FORMAT_YUYV
|
||||
|
||||
* Component 0: Y(8)
|
||||
* Component 1: Cb(8, 2x1 subsampled)
|
||||
* Component 2: Cr(8, 2x1 subsampled)
|
||||
|
||||
In AFBC, 'X' components are not treated any differently from any other
|
||||
component. Therefore, an AFBC buffer with fourcc DRM_FORMAT_XBGR8888
|
||||
encodes with 4 components, like so:
|
||||
|
||||
* DRM_FORMAT_XBGR8888
|
||||
|
||||
* Component 0: R(8)
|
||||
* Component 1: G(8)
|
||||
* Component 2: B(8)
|
||||
* Component 3: X(8)
|
||||
|
||||
Please note, however, that the inclusion of a "wasted" 'X' channel is
|
||||
bad for compression efficiency, and so it's recommended to avoid
|
||||
formats containing 'X' bits. If a fourth component is
|
||||
required/expected by the encoder/decoder, then it is recommended to
|
||||
instead use an equivalent format with alpha, setting all alpha bits to
|
||||
'1'. If there is no requirement for a fourth component, then a format
|
||||
which doesn't include alpha can be used, e.g. DRM_FORMAT_BGR888.
|
||||
|
||||
Number of Planes
|
||||
================
|
||||
|
||||
Formats which are typically multi-planar in linear layouts (e.g. YUV
|
||||
420), can be encoded into one, or multiple, AFBC planes. As with
|
||||
component order, the encoder and decoder must agree about the number
|
||||
of planes in order to correctly decode the buffer. The fourcc code is
|
||||
used to determine the number of encoded planes in an AFBC buffer,
|
||||
matching the number of planes for the linear (unmodified) format.
|
||||
Within each plane, the component ordering also follows the fourcc
|
||||
code:
|
||||
|
||||
For example:
|
||||
|
||||
* DRM_FORMAT_YUYV: nplanes = 1
|
||||
|
||||
* Plane 0:
|
||||
|
||||
* Component 0: Y(8)
|
||||
* Component 1: Cb(8, 2x1 subsampled)
|
||||
* Component 2: Cr(8, 2x1 subsampled)
|
||||
|
||||
* DRM_FORMAT_NV12: nplanes = 2
|
||||
|
||||
* Plane 0:
|
||||
|
||||
* Component 0: Y(8)
|
||||
|
||||
* Plane 1:
|
||||
|
||||
* Component 0: Cb(8, 2x1 subsampled)
|
||||
* Component 1: Cr(8, 2x1 subsampled)
|
||||
|
||||
Cross-device interoperability
|
||||
=============================
|
||||
|
||||
For maximum compatibility across devices, the table below defines
|
||||
canonical formats for use between AFBC-enabled devices. Formats which
|
||||
are listed here must be used exactly as specified when using the AFBC
|
||||
modifiers. Formats which are not listed should be avoided.
|
||||
|
||||
.. flat-table:: AFBC formats
|
||||
|
||||
* - Fourcc code
|
||||
- Description
|
||||
- Planes/Components
|
||||
|
||||
* - DRM_FORMAT_ABGR2101010
|
||||
- 10-bit per component RGB, with 2-bit alpha
|
||||
- Plane 0: 4 components
|
||||
* Component 0: R(10)
|
||||
* Component 1: G(10)
|
||||
* Component 2: B(10)
|
||||
* Component 3: A(2)
|
||||
|
||||
* - DRM_FORMAT_ABGR8888
|
||||
- 8-bit per component RGB, with 8-bit alpha
|
||||
- Plane 0: 4 components
|
||||
* Component 0: R(8)
|
||||
* Component 1: G(8)
|
||||
* Component 2: B(8)
|
||||
* Component 3: A(8)
|
||||
|
||||
* - DRM_FORMAT_BGR888
|
||||
- 8-bit per component RGB
|
||||
- Plane 0: 3 components
|
||||
* Component 0: R(8)
|
||||
* Component 1: G(8)
|
||||
* Component 2: B(8)
|
||||
|
||||
* - DRM_FORMAT_BGR565
|
||||
- 5/6-bit per component RGB
|
||||
- Plane 0: 3 components
|
||||
* Component 0: R(5)
|
||||
* Component 1: G(6)
|
||||
* Component 2: B(5)
|
||||
|
||||
* - DRM_FORMAT_ABGR1555
|
||||
- 5-bit per component RGB, with 1-bit alpha
|
||||
- Plane 0: 4 components
|
||||
* Component 0: R(5)
|
||||
* Component 1: G(5)
|
||||
* Component 2: B(5)
|
||||
* Component 3: A(1)
|
||||
|
||||
* - DRM_FORMAT_VUY888
|
||||
- 8-bit per component YCbCr 444, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(8)
|
||||
* Component 1: Cb(8)
|
||||
* Component 2: Cr(8)
|
||||
|
||||
* - DRM_FORMAT_VUY101010
|
||||
- 10-bit per component YCbCr 444, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(10)
|
||||
* Component 1: Cb(10)
|
||||
* Component 2: Cr(10)
|
||||
|
||||
* - DRM_FORMAT_YUYV
|
||||
- 8-bit per component YCbCr 422, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(8)
|
||||
* Component 1: Cb(8, 2x1 subsampled)
|
||||
* Component 2: Cr(8, 2x1 subsampled)
|
||||
|
||||
* - DRM_FORMAT_NV16
|
||||
- 8-bit per component YCbCr 422, two plane
|
||||
- Plane 0: 1 component
|
||||
* Component 0: Y(8)
|
||||
Plane 1: 2 components
|
||||
* Component 0: Cb(8, 2x1 subsampled)
|
||||
* Component 1: Cr(8, 2x1 subsampled)
|
||||
|
||||
* - DRM_FORMAT_Y210
|
||||
- 10-bit per component YCbCr 422, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(10)
|
||||
* Component 1: Cb(10, 2x1 subsampled)
|
||||
* Component 2: Cr(10, 2x1 subsampled)
|
||||
|
||||
* - DRM_FORMAT_P210
|
||||
- 10-bit per component YCbCr 422, two plane
|
||||
- Plane 0: 1 component
|
||||
* Component 0: Y(10)
|
||||
Plane 1: 2 components
|
||||
* Component 0: Cb(10, 2x1 subsampled)
|
||||
* Component 1: Cr(10, 2x1 subsampled)
|
||||
|
||||
* - DRM_FORMAT_YUV420_8BIT
|
||||
- 8-bit per component YCbCr 420, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(8)
|
||||
* Component 1: Cb(8, 2x2 subsampled)
|
||||
* Component 2: Cr(8, 2x2 subsampled)
|
||||
|
||||
* - DRM_FORMAT_YUV420_10BIT
|
||||
- 10-bit per component YCbCr 420, single plane
|
||||
- Plane 0: 3 components
|
||||
* Component 0: Y(10)
|
||||
* Component 1: Cb(10, 2x2 subsampled)
|
||||
* Component 2: Cr(10, 2x2 subsampled)
|
||||
|
||||
* - DRM_FORMAT_NV12
|
||||
- 8-bit per component YCbCr 420, two plane
|
||||
- Plane 0: 1 component
|
||||
* Component 0: Y(8)
|
||||
Plane 1: 2 components
|
||||
* Component 0: Cb(8, 2x2 subsampled)
|
||||
* Component 1: Cr(8, 2x2 subsampled)
|
||||
|
||||
* - DRM_FORMAT_P010
|
||||
- 10-bit per component YCbCr 420, two plane
|
||||
- Plane 0: 1 component
|
||||
* Component 0: Y(10)
|
||||
Plane 1: 2 components
|
||||
* Component 0: Cb(10, 2x2 subsampled)
|
||||
* Component 1: Cr(10, 2x2 subsampled)
|
@ -17,6 +17,8 @@ GPU Driver Documentation
|
||||
vkms
|
||||
bridge/dw-hdmi
|
||||
xen-front
|
||||
afbc
|
||||
komeda-kms
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
488
Documentation/gpu/komeda-kms.rst
Normal file
488
Documentation/gpu/komeda-kms.rst
Normal file
@ -0,0 +1,488 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================
|
||||
drm/komeda Arm display driver
|
||||
==============================
|
||||
|
||||
The drm/komeda driver supports the Arm display processor D71 and later products,
|
||||
this document gives a brief overview of driver design: how it works and why
|
||||
design it like that.
|
||||
|
||||
Overview of D71 like display IPs
|
||||
================================
|
||||
|
||||
From D71, Arm display IP begins to adopt a flexible and modularized
|
||||
architecture. A display pipeline is made up of multiple individual and
|
||||
functional pipeline stages called components, and every component has some
|
||||
specific capabilities that can give the flowed pipeline pixel data a
|
||||
particular processing.
|
||||
|
||||
Typical D71 components:
|
||||
|
||||
Layer
|
||||
-----
|
||||
Layer is the first pipeline stage, which prepares the pixel data for the next
|
||||
stage. It fetches the pixel from memory, decodes it if it's AFBC, rotates the
|
||||
source image, unpacks or converts YUV pixels to the device internal RGB pixels,
|
||||
then adjusts the color_space of pixels if needed.
|
||||
|
||||
Scaler
|
||||
------
|
||||
As its name suggests, scaler takes responsibility for scaling, and D71 also
|
||||
supports image enhancements by scaler.
|
||||
The usage of scaler is very flexible and can be connected to layer output
|
||||
for layer scaling, or connected to compositor and scale the whole display
|
||||
frame and then feed the output data into wb_layer which will then write it
|
||||
into memory.
|
||||
|
||||
Compositor (compiz)
|
||||
-------------------
|
||||
Compositor blends multiple layers or pixel data flows into one single display
|
||||
frame. its output frame can be fed into post image processor for showing it on
|
||||
the monitor or fed into wb_layer and written to memory at the same time.
|
||||
user can also insert a scaler between compositor and wb_layer to down scale
|
||||
the display frame first and and then write to memory.
|
||||
|
||||
Writeback Layer (wb_layer)
|
||||
--------------------------
|
||||
Writeback layer does the opposite things of Layer, which connects to compiz
|
||||
and writes the composition result to memory.
|
||||
|
||||
Post image processor (improc)
|
||||
-----------------------------
|
||||
Post image processor adjusts frame data like gamma and color space to fit the
|
||||
requirements of the monitor.
|
||||
|
||||
Timing controller (timing_ctrlr)
|
||||
--------------------------------
|
||||
Final stage of display pipeline, Timing controller is not for the pixel
|
||||
handling, but only for controlling the display timing.
|
||||
|
||||
Merger
|
||||
------
|
||||
D71 scaler mostly only has the half horizontal input/output capabilities
|
||||
compared with Layer, like if Layer supports 4K input size, the scaler only can
|
||||
support 2K input/output in the same time. To achieve the ful frame scaling, D71
|
||||
introduces Layer Split, which splits the whole image to two half parts and feeds
|
||||
them to two Layers A and B, and does the scaling independently. After scaling
|
||||
the result need to be fed to merger to merge two part images together, and then
|
||||
output merged result to compiz.
|
||||
|
||||
Splitter
|
||||
--------
|
||||
Similar to Layer Split, but Splitter is used for writeback, which splits the
|
||||
compiz result to two parts and then feed them to two scalers.
|
||||
|
||||
Possible D71 Pipeline usage
|
||||
===========================
|
||||
|
||||
Benefitting from the modularized architecture, D71 pipelines can be easily
|
||||
adjusted to fit different usages. And D71 has two pipelines, which support two
|
||||
types of working mode:
|
||||
|
||||
- Dual display mode
|
||||
Two pipelines work independently and separately to drive two display outputs.
|
||||
|
||||
- Single display mode
|
||||
Two pipelines work together to drive only one display output.
|
||||
|
||||
On this mode, pipeline_B doesn't work indenpendently, but outputs its
|
||||
composition result into pipeline_A, and its pixel timing also derived from
|
||||
pipeline_A.timing_ctrlr. The pipeline_B works just like a "slave" of
|
||||
pipeline_A(master)
|
||||
|
||||
Single pipeline data flow
|
||||
-------------------------
|
||||
|
||||
.. kernel-render:: DOT
|
||||
:alt: Single pipeline digraph
|
||||
:caption: Single pipeline data flow
|
||||
|
||||
digraph single_ppl {
|
||||
rankdir=LR;
|
||||
|
||||
subgraph {
|
||||
"Memory";
|
||||
"Monitor";
|
||||
}
|
||||
|
||||
subgraph cluster_pipeline {
|
||||
style=dashed
|
||||
node [shape=box]
|
||||
{
|
||||
node [bgcolor=grey style=dashed]
|
||||
"Scaler-0";
|
||||
"Scaler-1";
|
||||
"Scaler-0/1"
|
||||
}
|
||||
|
||||
node [bgcolor=grey style=filled]
|
||||
"Layer-0" -> "Scaler-0"
|
||||
"Layer-1" -> "Scaler-0"
|
||||
"Layer-2" -> "Scaler-1"
|
||||
"Layer-3" -> "Scaler-1"
|
||||
|
||||
"Layer-0" -> "Compiz"
|
||||
"Layer-1" -> "Compiz"
|
||||
"Layer-2" -> "Compiz"
|
||||
"Layer-3" -> "Compiz"
|
||||
"Scaler-0" -> "Compiz"
|
||||
"Scaler-1" -> "Compiz"
|
||||
|
||||
"Compiz" -> "Scaler-0/1" -> "Wb_layer"
|
||||
"Compiz" -> "Improc" -> "Timing Controller"
|
||||
}
|
||||
|
||||
"Wb_layer" -> "Memory"
|
||||
"Timing Controller" -> "Monitor"
|
||||
}
|
||||
|
||||
Dual pipeline with Slave enabled
|
||||
--------------------------------
|
||||
|
||||
.. kernel-render:: DOT
|
||||
:alt: Slave pipeline digraph
|
||||
:caption: Slave pipeline enabled data flow
|
||||
|
||||
digraph slave_ppl {
|
||||
rankdir=LR;
|
||||
|
||||
subgraph {
|
||||
"Memory";
|
||||
"Monitor";
|
||||
}
|
||||
node [shape=box]
|
||||
subgraph cluster_pipeline_slave {
|
||||
style=dashed
|
||||
label="Slave Pipeline_B"
|
||||
node [shape=box]
|
||||
{
|
||||
node [bgcolor=grey style=dashed]
|
||||
"Slave.Scaler-0";
|
||||
"Slave.Scaler-1";
|
||||
}
|
||||
|
||||
node [bgcolor=grey style=filled]
|
||||
"Slave.Layer-0" -> "Slave.Scaler-0"
|
||||
"Slave.Layer-1" -> "Slave.Scaler-0"
|
||||
"Slave.Layer-2" -> "Slave.Scaler-1"
|
||||
"Slave.Layer-3" -> "Slave.Scaler-1"
|
||||
|
||||
"Slave.Layer-0" -> "Slave.Compiz"
|
||||
"Slave.Layer-1" -> "Slave.Compiz"
|
||||
"Slave.Layer-2" -> "Slave.Compiz"
|
||||
"Slave.Layer-3" -> "Slave.Compiz"
|
||||
"Slave.Scaler-0" -> "Slave.Compiz"
|
||||
"Slave.Scaler-1" -> "Slave.Compiz"
|
||||
}
|
||||
|
||||
subgraph cluster_pipeline_master {
|
||||
style=dashed
|
||||
label="Master Pipeline_A"
|
||||
node [shape=box]
|
||||
{
|
||||
node [bgcolor=grey style=dashed]
|
||||
"Scaler-0";
|
||||
"Scaler-1";
|
||||
"Scaler-0/1"
|
||||
}
|
||||
|
||||
node [bgcolor=grey style=filled]
|
||||
"Layer-0" -> "Scaler-0"
|
||||
"Layer-1" -> "Scaler-0"
|
||||
"Layer-2" -> "Scaler-1"
|
||||
"Layer-3" -> "Scaler-1"
|
||||
|
||||
"Slave.Compiz" -> "Compiz"
|
||||
"Layer-0" -> "Compiz"
|
||||
"Layer-1" -> "Compiz"
|
||||
"Layer-2" -> "Compiz"
|
||||
"Layer-3" -> "Compiz"
|
||||
"Scaler-0" -> "Compiz"
|
||||
"Scaler-1" -> "Compiz"
|
||||
|
||||
"Compiz" -> "Scaler-0/1" -> "Wb_layer"
|
||||
"Compiz" -> "Improc" -> "Timing Controller"
|
||||
}
|
||||
|
||||
"Wb_layer" -> "Memory"
|
||||
"Timing Controller" -> "Monitor"
|
||||
}
|
||||
|
||||
Sub-pipelines for input and output
|
||||
----------------------------------
|
||||
|
||||
A complete display pipeline can be easily divided into three sub-pipelines
|
||||
according to the in/out usage.
|
||||
|
||||
Layer(input) pipeline
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. kernel-render:: DOT
|
||||
:alt: Layer data digraph
|
||||
:caption: Layer (input) data flow
|
||||
|
||||
digraph layer_data_flow {
|
||||
rankdir=LR;
|
||||
node [shape=box]
|
||||
|
||||
{
|
||||
node [bgcolor=grey style=dashed]
|
||||
"Scaler-n";
|
||||
}
|
||||
|
||||
"Layer-n" -> "Scaler-n" -> "Compiz"
|
||||
}
|
||||
|
||||
.. kernel-render:: DOT
|
||||
:alt: Layer Split digraph
|
||||
:caption: Layer Split pipeline
|
||||
|
||||
digraph layer_data_flow {
|
||||
rankdir=LR;
|
||||
node [shape=box]
|
||||
|
||||
"Layer-0/1" -> "Scaler-0" -> "Merger"
|
||||
"Layer-2/3" -> "Scaler-1" -> "Merger"
|
||||
"Merger" -> "Compiz"
|
||||
}
|
||||
|
||||
Writeback(output) pipeline
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. kernel-render:: DOT
|
||||
:alt: writeback digraph
|
||||
:caption: Writeback(output) data flow
|
||||
|
||||
digraph writeback_data_flow {
|
||||
rankdir=LR;
|
||||
node [shape=box]
|
||||
|
||||
{
|
||||
node [bgcolor=grey style=dashed]
|
||||
"Scaler-n";
|
||||
}
|
||||
|
||||
"Compiz" -> "Scaler-n" -> "Wb_layer"
|
||||
}
|
||||
|
||||
.. kernel-render:: DOT
|
||||
:alt: split writeback digraph
|
||||
:caption: Writeback(output) Split data flow
|
||||
|
||||
digraph writeback_data_flow {
|
||||
rankdir=LR;
|
||||
node [shape=box]
|
||||
|
||||
"Compiz" -> "Splitter"
|
||||
"Splitter" -> "Scaler-0" -> "Merger"
|
||||
"Splitter" -> "Scaler-1" -> "Merger"
|
||||
"Merger" -> "Wb_layer"
|
||||
}
|
||||
|
||||
Display output pipeline
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. kernel-render:: DOT
|
||||
:alt: display digraph
|
||||
:caption: display output data flow
|
||||
|
||||
digraph single_ppl {
|
||||
rankdir=LR;
|
||||
node [shape=box]
|
||||
|
||||
"Compiz" -> "Improc" -> "Timing Controller"
|
||||
}
|
||||
|
||||
In the following section we'll see these three sub-pipelines will be handled
|
||||
by KMS-plane/wb_conn/crtc respectively.
|
||||
|
||||
Komeda Resource abstraction
|
||||
===========================
|
||||
|
||||
struct komeda_pipeline/component
|
||||
--------------------------------
|
||||
|
||||
To fully utilize and easily access/configure the HW, the driver side also uses
|
||||
a similar architecture: Pipeline/Component to describe the HW features and
|
||||
capabilities, and a specific component includes two parts:
|
||||
|
||||
- Data flow controlling.
|
||||
- Specific component capabilities and features.
|
||||
|
||||
So the driver defines a common header struct komeda_component to describe the
|
||||
data flow control and all specific components are a subclass of this base
|
||||
structure.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
|
||||
:internal:
|
||||
|
||||
Resource discovery and initialization
|
||||
=====================================
|
||||
|
||||
Pipeline and component are used to describe how to handle the pixel data. We
|
||||
still need a @struct komeda_dev to describe the whole view of the device, and
|
||||
the control-abilites of device.
|
||||
|
||||
We have &komeda_dev, &komeda_pipeline, &komeda_component. Now fill devices with
|
||||
pipelines. Since komeda is not for D71 only but also intended for later products,
|
||||
of course we’d better share as much as possible between different products. To
|
||||
achieve this, split the komeda device into two layers: CORE and CHIP.
|
||||
|
||||
- CORE: for common features and capabilities handling.
|
||||
- CHIP: for register programing and HW specific feature (limitation) handling.
|
||||
|
||||
CORE can access CHIP by three chip function structures:
|
||||
|
||||
- struct komeda_dev_funcs
|
||||
- struct komeda_pipeline_funcs
|
||||
- struct komeda_component_funcs
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_dev.h
|
||||
:internal:
|
||||
|
||||
Format handling
|
||||
===============
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h
|
||||
:internal:
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
|
||||
:internal:
|
||||
|
||||
Attach komeda_dev to DRM-KMS
|
||||
============================
|
||||
|
||||
Komeda abstracts resources by pipeline/component, but DRM-KMS uses
|
||||
crtc/plane/connector. One KMS-obj cannot represent only one single component,
|
||||
since the requirements of a single KMS object cannot simply be achieved by a
|
||||
single component, usually that needs multiple components to fit the requirement.
|
||||
Like set mode, gamma, ctm for KMS all target on CRTC-obj, but komeda needs
|
||||
compiz, improc and timing_ctrlr to work together to fit these requirements.
|
||||
And a KMS-Plane may require multiple komeda resources: layer/scaler/compiz.
|
||||
|
||||
So, one KMS-Obj represents a sub-pipeline of komeda resources.
|
||||
|
||||
- Plane: `Layer(input) pipeline`_
|
||||
- Wb_connector: `Writeback(output) pipeline`_
|
||||
- Crtc: `Display output pipeline`_
|
||||
|
||||
So, for komeda, we treat KMS crtc/plane/connector as users of pipeline and
|
||||
component, and at any one time a pipeline/component only can be used by one
|
||||
user. And pipeline/component will be treated as private object of DRM-KMS; the
|
||||
state will be managed by drm_atomic_state as well.
|
||||
|
||||
How to map plane to Layer(input) pipeline
|
||||
-----------------------------------------
|
||||
|
||||
Komeda has multiple Layer input pipelines, see:
|
||||
- `Single pipeline data flow`_
|
||||
- `Dual pipeline with Slave enabled`_
|
||||
|
||||
The easiest way is binding a plane to a fixed Layer pipeline, but consider the
|
||||
komeda capabilities:
|
||||
|
||||
- Layer Split, See `Layer(input) pipeline`_
|
||||
|
||||
Layer_Split is quite complicated feature, which splits a big image into two
|
||||
parts and handles it by two layers and two scalers individually. But it
|
||||
imports an edge problem or effect in the middle of the image after the split.
|
||||
To avoid such a problem, it needs a complicated Split calculation and some
|
||||
special configurations to the layer and scaler. We'd better hide such HW
|
||||
related complexity to user mode.
|
||||
|
||||
- Slave pipeline, See `Dual pipeline with Slave enabled`_
|
||||
|
||||
Since the compiz component doesn't output alpha value, the slave pipeline
|
||||
only can be used for bottom layers composition. The komeda driver wants to
|
||||
hide this limitation to the user. The way to do this is to pick a suitable
|
||||
Layer according to plane_state->zpos.
|
||||
|
||||
So for komeda, the KMS-plane doesn't represent a fixed komeda layer pipeline,
|
||||
but multiple Layers with same capabilities. Komeda will select one or more
|
||||
Layers to fit the requirement of one KMS-plane.
|
||||
|
||||
Make component/pipeline to be drm_private_obj
|
||||
---------------------------------------------
|
||||
|
||||
Add :c:type:`drm_private_obj` to :c:type:`komeda_component`, :c:type:`komeda_pipeline`
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct komeda_component {
|
||||
struct drm_private_obj obj;
|
||||
...
|
||||
}
|
||||
|
||||
struct komeda_pipeline {
|
||||
struct drm_private_obj obj;
|
||||
...
|
||||
}
|
||||
|
||||
Tracking component_state/pipeline_state by drm_atomic_state
|
||||
-----------------------------------------------------------
|
||||
|
||||
Add :c:type:`drm_private_state` and user to :c:type:`komeda_component_state`,
|
||||
:c:type:`komeda_pipeline_state`
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct komeda_component_state {
|
||||
struct drm_private_state obj;
|
||||
void *binding_user;
|
||||
...
|
||||
}
|
||||
|
||||
struct komeda_pipeline_state {
|
||||
struct drm_private_state obj;
|
||||
struct drm_crtc *crtc;
|
||||
...
|
||||
}
|
||||
|
||||
komeda component validation
|
||||
---------------------------
|
||||
|
||||
Komeda has multiple types of components, but the process of validation are
|
||||
similar, usually including the following steps:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int komeda_xxxx_validate(struct komeda_component_xxx xxx_comp,
|
||||
struct komeda_component_output *input_dflow,
|
||||
struct drm_plane/crtc/connector *user,
|
||||
struct drm_plane/crtc/connector_state, *user_state)
|
||||
{
|
||||
setup 1: check if component is needed, like the scaler is optional depending
|
||||
on the user_state; if unneeded, just return, and the caller will
|
||||
put the data flow into next stage.
|
||||
Setup 2: check user_state with component features and capabilities to see
|
||||
if requirements can be met; if not, return fail.
|
||||
Setup 3: get component_state from drm_atomic_state, and try set to set
|
||||
user to component; fail if component has been assigned to another
|
||||
user already.
|
||||
Setup 3: configure the component_state, like set its input component,
|
||||
convert user_state to component specific state.
|
||||
Setup 4: adjust the input_dflow and prepare it for the next stage.
|
||||
}
|
||||
|
||||
komeda_kms Abstraction
|
||||
----------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_kms.h
|
||||
:internal:
|
||||
|
||||
komde_kms Functions
|
||||
-------------------
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
|
||||
:internal:
|
||||
.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_plane.c
|
||||
:internal:
|
||||
|
||||
Build komeda to be a Linux module driver
|
||||
========================================
|
||||
|
||||
Now we have two level devices:
|
||||
|
||||
- komeda_dev: describes the real display hardware.
|
||||
- komeda_kms_dev: attachs or connects komeda_dev to DRM-KMS.
|
||||
|
||||
All komeda operations are supplied or operated by komeda_dev or komeda_kms_dev,
|
||||
the module driver is only a simple wrapper to pass the Linux command
|
||||
(probe/remove/pm) into komeda_dev or komeda_kms_dev.
|
15
MAINTAINERS
15
MAINTAINERS
@ -1133,13 +1133,26 @@ S: Supported
|
||||
F: drivers/gpu/drm/arm/hdlcd_*
|
||||
F: Documentation/devicetree/bindings/display/arm,hdlcd.txt
|
||||
|
||||
ARM KOMEDA DRM-KMS DRIVER
|
||||
M: James (Qian) Wang <james.qian.wang@arm.com>
|
||||
M: Liviu Dudau <liviu.dudau@arm.com>
|
||||
L: Mali DP Maintainers <malidp@foss.arm.com>
|
||||
S: Supported
|
||||
T: git git://linux-arm.org/linux-ld.git for-upstream/mali-dp
|
||||
F: drivers/gpu/drm/arm/display/include/
|
||||
F: drivers/gpu/drm/arm/display/komeda/
|
||||
F: Documentation/devicetree/bindings/display/arm/arm,komeda.txt
|
||||
F: Documentation/gpu/komeda-kms.rst
|
||||
|
||||
ARM MALI-DP DRM DRIVER
|
||||
M: Liviu Dudau <liviu.dudau@arm.com>
|
||||
M: Brian Starkey <brian.starkey@arm.com>
|
||||
M: Mali DP Maintainers <malidp@foss.arm.com>
|
||||
L: Mali DP Maintainers <malidp@foss.arm.com>
|
||||
S: Supported
|
||||
T: git git://linux-arm.org/linux-ld.git for-upstream/mali-dp
|
||||
F: drivers/gpu/drm/arm/
|
||||
F: Documentation/devicetree/bindings/display/arm,malidp.txt
|
||||
F: Documentation/gpu/afbc.rst
|
||||
|
||||
ARM MFM AND FLOPPY DRIVERS
|
||||
M: Ian Molton <spyro@f2s.com>
|
||||
|
@ -51,7 +51,7 @@ obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
|
||||
obj-$(CONFIG_DRM) += drm.o
|
||||
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
|
||||
obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
|
||||
obj-$(CONFIG_DRM_ARM) += arm/
|
||||
obj-y += arm/
|
||||
obj-$(CONFIG_DRM_TTM) += ttm/
|
||||
obj-$(CONFIG_DRM_SCHED) += scheduler/
|
||||
obj-$(CONFIG_DRM_TDFX) += tdfx/
|
||||
@ -81,7 +81,7 @@ obj-$(CONFIG_DRM_UDL) += udl/
|
||||
obj-$(CONFIG_DRM_AST) += ast/
|
||||
obj-$(CONFIG_DRM_ARMADA) += armada/
|
||||
obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/
|
||||
obj-$(CONFIG_DRM_RCAR_DU) += rcar-du/
|
||||
obj-y += rcar-du/
|
||||
obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
|
||||
obj-y += omapdrm/
|
||||
obj-$(CONFIG_DRM_SUN4I) += sun4i/
|
||||
|
@ -57,7 +57,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
||||
|
||||
# add asic specific block
|
||||
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
|
||||
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
|
||||
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
|
||||
|
||||
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
|
||||
|
||||
|
@ -542,6 +542,11 @@ struct amdgpu_asic_funcs {
|
||||
bool (*need_full_reset)(struct amdgpu_device *adev);
|
||||
/* initialize doorbell layout for specific asic*/
|
||||
void (*init_doorbell_index)(struct amdgpu_device *adev);
|
||||
/* PCIe bandwidth usage */
|
||||
void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1);
|
||||
/* do we need to reset the asic at init time (e.g., kexec) */
|
||||
bool (*need_reset_on_init)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -634,7 +639,7 @@ struct amdgpu_nbio_funcs {
|
||||
void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
|
||||
u32 (*get_memsize)(struct amdgpu_device *adev);
|
||||
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index);
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size);
|
||||
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
|
||||
bool enable);
|
||||
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
|
||||
@ -1042,6 +1047,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
||||
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
|
||||
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
|
||||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
||||
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
||||
|
||||
/* Common functions */
|
||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
|
@ -28,8 +28,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||
|
||||
/* Total memory size in system memory and all GPU VRAM. Used to
|
||||
@ -47,12 +45,9 @@ int amdgpu_amdkfd_init(void)
|
||||
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
|
||||
|
||||
#ifdef CONFIG_HSA_AMD
|
||||
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
|
||||
if (ret)
|
||||
kgd2kfd = NULL;
|
||||
ret = kgd2kfd_init();
|
||||
amdgpu_amdkfd_gpuvm_init_mem_limits();
|
||||
#else
|
||||
kgd2kfd = NULL;
|
||||
ret = -ENOENT;
|
||||
#endif
|
||||
|
||||
@ -61,17 +56,13 @@ int amdgpu_amdkfd_init(void)
|
||||
|
||||
void amdgpu_amdkfd_fini(void)
|
||||
{
|
||||
if (kgd2kfd)
|
||||
kgd2kfd->exit();
|
||||
kgd2kfd_exit();
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
||||
{
|
||||
const struct kfd2kgd_calls *kfd2kgd;
|
||||
|
||||
if (!kgd2kfd)
|
||||
return;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_KAVERI:
|
||||
@ -98,8 +89,8 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
||||
return;
|
||||
}
|
||||
|
||||
adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
|
||||
if (adev->kfd.dev)
|
||||
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
|
||||
@ -182,7 +173,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
&gpu_resources.doorbell_start_offset);
|
||||
|
||||
if (adev->asic_type < CHIP_VEGA10) {
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -197,13 +188,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
* can use each doorbell assignment twice.
|
||||
*/
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
adev->doorbell_index.sdma_engine0 + (i >> 1);
|
||||
adev->doorbell_index.sdma_engine[0] + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
|
||||
adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
adev->doorbell_index.sdma_engine1 + (i >> 1);
|
||||
adev->doorbell_index.sdma_engine[1] + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
|
||||
adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
|
||||
}
|
||||
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
|
||||
* SDMA, IH and VCN. So don't use them for the CP.
|
||||
@ -211,14 +202,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
gpu_resources.reserved_doorbell_mask = 0x1e0;
|
||||
gpu_resources.reserved_doorbell_val = 0x0e0;
|
||||
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd.dev) {
|
||||
kgd2kfd->device_exit(adev->kfd.dev);
|
||||
kgd2kfd_device_exit(adev->kfd.dev);
|
||||
adev->kfd.dev = NULL;
|
||||
}
|
||||
}
|
||||
@ -227,13 +218,13 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
||||
const void *ih_ring_entry)
|
||||
{
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
|
||||
kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->suspend(adev->kfd.dev);
|
||||
kgd2kfd_suspend(adev->kfd.dev);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
|
||||
@ -241,7 +232,7 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->resume(adev->kfd.dev);
|
||||
r = kgd2kfd_resume(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -251,7 +242,7 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->pre_reset(adev->kfd.dev);
|
||||
r = kgd2kfd_pre_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -261,7 +252,7 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->post_reset(adev->kfd.dev);
|
||||
r = kgd2kfd_post_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -619,4 +610,47 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
|
||||
const struct kfd2kgd_calls *f2g)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
const struct kgd2kfd_shared_resources *gpu_resources)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
{
|
||||
}
|
||||
|
||||
void kgd2kfd_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
}
|
||||
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
@ -33,7 +33,6 @@
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
extern const struct kgd2kfd_calls *kgd2kfd;
|
||||
extern uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
struct amdgpu_device;
|
||||
@ -214,4 +213,22 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
/* KGD2KFD callbacks */
|
||||
int kgd2kfd_init(void);
|
||||
void kgd2kfd_exit(void);
|
||||
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
|
||||
const struct kfd2kgd_calls *f2g);
|
||||
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
const struct kgd2kfd_shared_resources *gpu_resources);
|
||||
void kgd2kfd_device_exit(struct kfd_dev *kfd);
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd);
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd);
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
@ -122,7 +122,7 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
|
||||
if (dma_fence_is_signaled(f))
|
||||
return true;
|
||||
|
||||
if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
|
||||
if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -1790,7 +1790,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
|
||||
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
|
||||
if (evicted_bos == 1) {
|
||||
/* First eviction, stop the queues */
|
||||
r = kgd2kfd->quiesce_mm(mm);
|
||||
r = kgd2kfd_quiesce_mm(mm);
|
||||
if (r)
|
||||
pr_err("Failed to quiesce KFD\n");
|
||||
schedule_delayed_work(&process_info->restore_userptr_work,
|
||||
@ -2082,7 +2082,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
|
||||
evicted_bos)
|
||||
goto unlock_out;
|
||||
evicted_bos = 0;
|
||||
if (kgd2kfd->resume_mm(mm)) {
|
||||
if (kgd2kfd_resume_mm(mm)) {
|
||||
pr_err("%s: Failed to resume KFD\n", __func__);
|
||||
/* No recovery from this failure. Probably the CP is
|
||||
* hanging. No point trying again.
|
||||
|
@ -576,6 +576,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
|
||||
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
|
||||
{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
|
||||
{ 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
|
||||
{ 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },
|
||||
{ 0, 0, 0, 0, 0 },
|
||||
};
|
||||
|
||||
|
@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
|
||||
case AMDGPU_CHUNK_ID_DEPENDENCIES:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
|
||||
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
|
||||
|
||||
fence = amdgpu_ctx_get_fence(ctx, entity,
|
||||
deps[i].handle);
|
||||
|
||||
if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
|
||||
struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
|
||||
struct dma_fence *old = fence;
|
||||
|
||||
fence = dma_fence_get(&s_fence->scheduled);
|
||||
dma_fence_put(old);
|
||||
}
|
||||
|
||||
if (IS_ERR(fence)) {
|
||||
r = PTR_ERR(fence);
|
||||
amdgpu_ctx_put(ctx);
|
||||
@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||
|
||||
chunk = &p->chunks[i];
|
||||
|
||||
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
|
||||
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
|
||||
chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
|
||||
r = amdgpu_cs_process_fence_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -1645,7 +1645,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
if (r) {
|
||||
DRM_ERROR("sw_init of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
goto init_failed;
|
||||
}
|
||||
adev->ip_blocks[i].status.sw = true;
|
||||
|
||||
@ -1654,17 +1654,17 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
r = amdgpu_device_vram_scratch_init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
|
||||
return r;
|
||||
goto init_failed;
|
||||
}
|
||||
r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
|
||||
if (r) {
|
||||
DRM_ERROR("hw_init %d failed %d\n", i, r);
|
||||
return r;
|
||||
goto init_failed;
|
||||
}
|
||||
r = amdgpu_device_wb_init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
|
||||
return r;
|
||||
goto init_failed;
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = true;
|
||||
|
||||
@ -1675,7 +1675,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
AMDGPU_CSA_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("allocate CSA failed %d\n", r);
|
||||
return r;
|
||||
goto init_failed;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1683,28 +1683,32 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
|
||||
r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
|
||||
if (r)
|
||||
return r;
|
||||
goto init_failed;
|
||||
|
||||
r = amdgpu_device_ip_hw_init_phase1(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto init_failed;
|
||||
|
||||
r = amdgpu_device_fw_loading(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto init_failed;
|
||||
|
||||
r = amdgpu_device_ip_hw_init_phase2(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto init_failed;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
init_failed:
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (!r)
|
||||
amdgpu_virt_init_data_exchange(adev);
|
||||
amdgpu_virt_release_full_gpu(adev, true);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2131,7 +2135,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
|
||||
continue;
|
||||
|
||||
r = block->version->funcs->hw_init(adev);
|
||||
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
@ -2165,7 +2169,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
|
||||
continue;
|
||||
|
||||
r = block->version->funcs->hw_init(adev);
|
||||
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
@ -2546,6 +2550,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
/* detect if we are with an SRIOV vbios */
|
||||
amdgpu_device_detect_sriov_bios(adev);
|
||||
|
||||
/* check if we need to reset the asic
|
||||
* E.g., driver was not cleanly unloaded previously, etc.
|
||||
*/
|
||||
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
|
||||
r = amdgpu_asic_reset(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "asic reset on init failed\n");
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
/* Post card if necessary */
|
||||
if (amdgpu_device_need_post(adev)) {
|
||||
if (!adev->bios) {
|
||||
@ -2610,6 +2625,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
}
|
||||
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
|
||||
if (amdgpu_virt_request_full_gpu(adev, false))
|
||||
amdgpu_virt_release_full_gpu(adev, false);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@ -2632,9 +2649,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
amdgpu_virt_init_data_exchange(adev);
|
||||
|
||||
amdgpu_fbdev_init(adev);
|
||||
|
||||
r = amdgpu_pm_sysfs_init(adev);
|
||||
@ -2798,7 +2812,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
|
||||
struct drm_framebuffer *fb = crtc->primary->fb;
|
||||
struct amdgpu_bo *robj;
|
||||
|
||||
if (amdgpu_crtc->cursor_bo) {
|
||||
if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
|
||||
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
|
||||
r = amdgpu_bo_reserve(aobj, true);
|
||||
if (r == 0) {
|
||||
@ -2906,7 +2920,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
|
||||
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
|
||||
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
|
||||
|
||||
if (amdgpu_crtc->cursor_bo) {
|
||||
if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
|
||||
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
|
||||
r = amdgpu_bo_reserve(aobj, true);
|
||||
if (r == 0) {
|
||||
@ -3226,6 +3240,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
|
||||
r = amdgpu_ib_ring_tests(adev);
|
||||
|
||||
error:
|
||||
amdgpu_virt_init_data_exchange(adev);
|
||||
amdgpu_virt_release_full_gpu(adev, true);
|
||||
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
|
||||
atomic_inc(&adev->vram_lost_counter);
|
||||
@ -3298,17 +3313,15 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
kthread_park(ring->sched.thread);
|
||||
|
||||
if (job && job->base.sched != &ring->sched)
|
||||
continue;
|
||||
|
||||
drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
|
||||
drm_sched_stop(&ring->sched);
|
||||
|
||||
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
|
||||
amdgpu_fence_driver_force_completion(ring);
|
||||
}
|
||||
|
||||
if(job)
|
||||
drm_sched_increase_karma(&job->base);
|
||||
|
||||
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
@ -3454,14 +3467,10 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
/* only need recovery sched of the given job's ring
|
||||
* or all rings (in the case @job is NULL)
|
||||
* after above amdgpu_reset accomplished
|
||||
*/
|
||||
if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
|
||||
drm_sched_job_recovery(&ring->sched);
|
||||
if (!adev->asic_reset_res)
|
||||
drm_sched_resubmit_jobs(&ring->sched);
|
||||
|
||||
kthread_unpark(ring->sched.thread);
|
||||
drm_sched_start(&ring->sched, !adev->asic_reset_res);
|
||||
}
|
||||
|
||||
if (!amdgpu_device_has_dc_support(adev)) {
|
||||
@ -3521,9 +3530,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
* by different nodes. No point also since the one node already executing
|
||||
* reset will also reset all the other nodes in the hive.
|
||||
*/
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
hive = amdgpu_get_xgmi_hive(adev, 0);
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
!mutex_trylock(&hive->hive_lock))
|
||||
!mutex_trylock(&hive->reset_lock))
|
||||
return 0;
|
||||
|
||||
/* Start with adev pre asic reset first for soft reset check.*/
|
||||
@ -3602,13 +3611,45 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
mutex_unlock(&hive->reset_lock);
|
||||
|
||||
if (r)
|
||||
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
|
||||
enum pci_bus_speed *speed,
|
||||
enum pcie_link_width *width)
|
||||
{
|
||||
struct pci_dev *pdev = adev->pdev;
|
||||
enum pci_bus_speed cur_speed;
|
||||
enum pcie_link_width cur_width;
|
||||
|
||||
*speed = PCI_SPEED_UNKNOWN;
|
||||
*width = PCIE_LNK_WIDTH_UNKNOWN;
|
||||
|
||||
while (pdev) {
|
||||
cur_speed = pcie_get_speed_cap(pdev);
|
||||
cur_width = pcie_get_width_cap(pdev);
|
||||
|
||||
if (cur_speed != PCI_SPEED_UNKNOWN) {
|
||||
if (*speed == PCI_SPEED_UNKNOWN)
|
||||
*speed = cur_speed;
|
||||
else if (cur_speed < *speed)
|
||||
*speed = cur_speed;
|
||||
}
|
||||
|
||||
if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
|
||||
if (*width == PCIE_LNK_WIDTH_UNKNOWN)
|
||||
*width = cur_width;
|
||||
else if (cur_width < *width)
|
||||
*width = cur_width;
|
||||
}
|
||||
pdev = pci_upstream_bridge(pdev);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
||||
*
|
||||
@ -3621,8 +3662,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
||||
{
|
||||
struct pci_dev *pdev;
|
||||
enum pci_bus_speed speed_cap;
|
||||
enum pcie_link_width link_width;
|
||||
enum pci_bus_speed speed_cap, platform_speed_cap;
|
||||
enum pcie_link_width platform_link_width;
|
||||
|
||||
if (amdgpu_pcie_gen_cap)
|
||||
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
|
||||
@ -3639,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
||||
return;
|
||||
}
|
||||
|
||||
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
|
||||
return;
|
||||
|
||||
amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
|
||||
&platform_link_width);
|
||||
|
||||
if (adev->pm.pcie_gen_mask == 0) {
|
||||
/* asic caps */
|
||||
pdev = adev->pdev;
|
||||
@ -3664,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
||||
adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
|
||||
}
|
||||
/* platform caps */
|
||||
pdev = adev->ddev->pdev->bus->self;
|
||||
speed_cap = pcie_get_speed_cap(pdev);
|
||||
if (speed_cap == PCI_SPEED_UNKNOWN) {
|
||||
if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
|
||||
adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
|
||||
} else {
|
||||
if (speed_cap == PCIE_SPEED_16_0GT)
|
||||
if (platform_speed_cap == PCIE_SPEED_16_0GT)
|
||||
adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
|
||||
else if (speed_cap == PCIE_SPEED_8_0GT)
|
||||
else if (platform_speed_cap == PCIE_SPEED_8_0GT)
|
||||
adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
|
||||
else if (speed_cap == PCIE_SPEED_5_0GT)
|
||||
else if (platform_speed_cap == PCIE_SPEED_5_0GT)
|
||||
adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
|
||||
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
|
||||
else
|
||||
@ -3688,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
if (adev->pm.pcie_mlw_mask == 0) {
|
||||
pdev = adev->ddev->pdev->bus->self;
|
||||
link_width = pcie_get_width_cap(pdev);
|
||||
if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
|
||||
if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
|
||||
adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
|
||||
} else {
|
||||
switch (link_width) {
|
||||
switch (platform_link_width) {
|
||||
case PCIE_LNK_X32:
|
||||
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
|
||||
CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
|
||||
|
@ -188,10 +188,12 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
|
||||
if (unlikely(r != 0)) {
|
||||
DRM_ERROR("failed to pin new abo buffer before flip\n");
|
||||
goto unreserve;
|
||||
if (!adev->enable_virtual_display) {
|
||||
r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
|
||||
if (unlikely(r != 0)) {
|
||||
DRM_ERROR("failed to pin new abo buffer before flip\n");
|
||||
goto unreserve;
|
||||
}
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
|
||||
@ -211,7 +213,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
|
||||
amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
|
||||
amdgpu_bo_unreserve(new_abo);
|
||||
|
||||
work->base = amdgpu_bo_gpu_offset(new_abo);
|
||||
if (!adev->enable_virtual_display)
|
||||
work->base = amdgpu_bo_gpu_offset(new_abo);
|
||||
work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
|
||||
amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
|
||||
|
||||
@ -242,9 +245,10 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
|
||||
goto cleanup;
|
||||
}
|
||||
unpin:
|
||||
if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) {
|
||||
DRM_ERROR("failed to unpin new abo in error path\n");
|
||||
}
|
||||
if (!adev->enable_virtual_display)
|
||||
if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
|
||||
DRM_ERROR("failed to unpin new abo in error path\n");
|
||||
|
||||
unreserve:
|
||||
amdgpu_bo_unreserve(new_abo);
|
||||
|
||||
|
@ -51,14 +51,7 @@ struct amdgpu_doorbell_index {
|
||||
uint32_t userqueue_start;
|
||||
uint32_t userqueue_end;
|
||||
uint32_t gfx_ring0;
|
||||
uint32_t sdma_engine0;
|
||||
uint32_t sdma_engine1;
|
||||
uint32_t sdma_engine2;
|
||||
uint32_t sdma_engine3;
|
||||
uint32_t sdma_engine4;
|
||||
uint32_t sdma_engine5;
|
||||
uint32_t sdma_engine6;
|
||||
uint32_t sdma_engine7;
|
||||
uint32_t sdma_engine[8];
|
||||
uint32_t ih;
|
||||
union {
|
||||
struct {
|
||||
@ -79,6 +72,8 @@ struct amdgpu_doorbell_index {
|
||||
} uvd_vce;
|
||||
};
|
||||
uint32_t max_assignment;
|
||||
/* Per engine SDMA doorbell size in dword */
|
||||
uint32_t sdma_doorbell_range;
|
||||
};
|
||||
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
|
@ -364,6 +364,14 @@ enum amdgpu_pcie_gen {
|
||||
((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\
|
||||
(adev)->powerplay.pp_handle))
|
||||
|
||||
#define amdgpu_dpm_get_ppfeature_status(adev, buf) \
|
||||
((adev)->powerplay.pp_funcs->get_ppfeature_status(\
|
||||
(adev)->powerplay.pp_handle, (buf)))
|
||||
|
||||
#define amdgpu_dpm_set_ppfeature_status(adev, ppfeatures) \
|
||||
((adev)->powerplay.pp_funcs->set_ppfeature_status(\
|
||||
(adev)->powerplay.pp_handle, (ppfeatures)))
|
||||
|
||||
struct amdgpu_dpm {
|
||||
struct amdgpu_ps *ps;
|
||||
/* number of valid power states */
|
||||
|
@ -71,9 +71,11 @@
|
||||
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
|
||||
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
|
||||
* - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
|
||||
* - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
|
||||
* - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
|
||||
*/
|
||||
#define KMS_DRIVER_MAJOR 3
|
||||
#define KMS_DRIVER_MINOR 27
|
||||
#define KMS_DRIVER_MINOR 29
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
int amdgpu_vram_limit = 0;
|
||||
|
@ -37,6 +37,8 @@ struct amdgpu_gds {
|
||||
struct amdgpu_gds_asic_info mem;
|
||||
struct amdgpu_gds_asic_info gws;
|
||||
struct amdgpu_gds_asic_info oa;
|
||||
uint32_t gds_compute_max_wave_id;
|
||||
|
||||
/* At present, GDS, GWS and OA resources for gfx (graphics)
|
||||
* is always pre-allocated and available for graphics operation.
|
||||
* Such resource is shared between all gfx clients.
|
||||
|
@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
||||
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
*obj = NULL;
|
||||
/* At least align on page size */
|
||||
if (alignment < PAGE_SIZE) {
|
||||
alignment = PAGE_SIZE;
|
||||
}
|
||||
|
||||
bp.size = size;
|
||||
bp.byte_align = alignment;
|
||||
@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
|
||||
return -EINVAL;
|
||||
}
|
||||
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
/* GDS allocations must be DW aligned */
|
||||
if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
|
||||
size = ALIGN(size, 4);
|
||||
}
|
||||
|
||||
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
|
||||
|
@ -202,12 +202,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||
amdgpu_asic_flush_hdp(adev, ring);
|
||||
}
|
||||
|
||||
if (need_ctx_switch)
|
||||
status |= AMDGPU_HAVE_CTX_SWITCH;
|
||||
|
||||
skip_preamble = ring->current_ctx == fence_ctx;
|
||||
if (job && ring->funcs->emit_cntxcntl) {
|
||||
if (need_ctx_switch)
|
||||
status |= AMDGPU_HAVE_CTX_SWITCH;
|
||||
status |= job->preamble_status;
|
||||
|
||||
amdgpu_ring_emit_cntxcntl(ring, status);
|
||||
}
|
||||
|
||||
@ -221,8 +221,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
|
||||
continue;
|
||||
|
||||
amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
|
||||
need_ctx_switch = false;
|
||||
amdgpu_ring_emit_ib(ring, job, ib, status);
|
||||
status &= ~AMDGPU_HAVE_CTX_SWITCH;
|
||||
}
|
||||
|
||||
if (ring->funcs->emit_tmz)
|
||||
|
@ -52,6 +52,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
ih->use_bus_addr = use_bus_addr;
|
||||
|
||||
if (use_bus_addr) {
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
if (ih->ring)
|
||||
return 0;
|
||||
|
||||
@ -59,21 +61,26 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
* add them to the end of the ring allocation.
|
||||
*/
|
||||
ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
|
||||
&ih->rb_dma_addr, GFP_KERNEL);
|
||||
&dma_addr, GFP_KERNEL);
|
||||
if (ih->ring == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
memset((void *)ih->ring, 0, ih->ring_size + 8);
|
||||
ih->wptr_offs = (ih->ring_size / 4) + 0;
|
||||
ih->rptr_offs = (ih->ring_size / 4) + 1;
|
||||
ih->gpu_addr = dma_addr;
|
||||
ih->wptr_addr = dma_addr + ih->ring_size;
|
||||
ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
|
||||
ih->rptr_addr = dma_addr + ih->ring_size + 4;
|
||||
ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
|
||||
} else {
|
||||
r = amdgpu_device_wb_get(adev, &ih->wptr_offs);
|
||||
unsigned wptr_offs, rptr_offs;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &wptr_offs);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &ih->rptr_offs);
|
||||
r = amdgpu_device_wb_get(adev, &rptr_offs);
|
||||
if (r) {
|
||||
amdgpu_device_wb_free(adev, ih->wptr_offs);
|
||||
amdgpu_device_wb_free(adev, wptr_offs);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -82,10 +89,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
&ih->ring_obj, &ih->gpu_addr,
|
||||
(void **)&ih->ring);
|
||||
if (r) {
|
||||
amdgpu_device_wb_free(adev, ih->rptr_offs);
|
||||
amdgpu_device_wb_free(adev, ih->wptr_offs);
|
||||
amdgpu_device_wb_free(adev, rptr_offs);
|
||||
amdgpu_device_wb_free(adev, wptr_offs);
|
||||
return r;
|
||||
}
|
||||
|
||||
ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
|
||||
ih->wptr_cpu = &adev->wb.wb[wptr_offs];
|
||||
ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
|
||||
ih->rptr_cpu = &adev->wb.wb[rptr_offs];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -109,13 +121,13 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
|
||||
* add them to the end of the ring allocation.
|
||||
*/
|
||||
dma_free_coherent(adev->dev, ih->ring_size + 8,
|
||||
(void *)ih->ring, ih->rb_dma_addr);
|
||||
(void *)ih->ring, ih->gpu_addr);
|
||||
ih->ring = NULL;
|
||||
} else {
|
||||
amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
|
||||
(void **)&ih->ring);
|
||||
amdgpu_device_wb_free(adev, ih->wptr_offs);
|
||||
amdgpu_device_wb_free(adev, ih->rptr_offs);
|
||||
amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
|
||||
amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,7 +149,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
if (!ih->enabled || adev->shutdown)
|
||||
return IRQ_NONE;
|
||||
|
||||
wptr = amdgpu_ih_get_wptr(adev);
|
||||
wptr = amdgpu_ih_get_wptr(adev, ih);
|
||||
|
||||
restart_ih:
|
||||
/* is somebody else already processing irqs? */
|
||||
@ -154,11 +166,11 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
ih->rptr &= ih->ptr_mask;
|
||||
}
|
||||
|
||||
amdgpu_ih_set_rptr(adev);
|
||||
amdgpu_ih_set_rptr(adev, ih);
|
||||
atomic_set(&ih->lock, 0);
|
||||
|
||||
/* make sure wptr hasn't changed while processing */
|
||||
wptr = amdgpu_ih_get_wptr(adev);
|
||||
wptr = amdgpu_ih_get_wptr(adev, ih);
|
||||
if (wptr != ih->rptr)
|
||||
goto restart_ih;
|
||||
|
||||
|
@ -31,34 +31,40 @@ struct amdgpu_iv_entry;
|
||||
* R6xx+ IH ring
|
||||
*/
|
||||
struct amdgpu_ih_ring {
|
||||
struct amdgpu_bo *ring_obj;
|
||||
volatile uint32_t *ring;
|
||||
unsigned rptr;
|
||||
unsigned ring_size;
|
||||
uint64_t gpu_addr;
|
||||
uint32_t ptr_mask;
|
||||
atomic_t lock;
|
||||
bool enabled;
|
||||
unsigned wptr_offs;
|
||||
unsigned rptr_offs;
|
||||
u32 doorbell_index;
|
||||
bool use_doorbell;
|
||||
bool use_bus_addr;
|
||||
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
|
||||
|
||||
struct amdgpu_bo *ring_obj;
|
||||
volatile uint32_t *ring;
|
||||
uint64_t gpu_addr;
|
||||
|
||||
uint64_t wptr_addr;
|
||||
volatile uint32_t *wptr_cpu;
|
||||
|
||||
uint64_t rptr_addr;
|
||||
volatile uint32_t *rptr_cpu;
|
||||
|
||||
bool enabled;
|
||||
unsigned rptr;
|
||||
atomic_t lock;
|
||||
};
|
||||
|
||||
/* provided by the ih block */
|
||||
struct amdgpu_ih_funcs {
|
||||
/* ring read/write ptr handling, called from interrupt context */
|
||||
u32 (*get_wptr)(struct amdgpu_device *adev);
|
||||
void (*decode_iv)(struct amdgpu_device *adev,
|
||||
u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
|
||||
void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
void (*set_rptr)(struct amdgpu_device *adev);
|
||||
void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
|
||||
};
|
||||
|
||||
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
|
||||
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
|
||||
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
|
||||
#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
|
||||
#define amdgpu_ih_decode_iv(adev, iv) \
|
||||
(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
|
||||
#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
|
||||
|
||||
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
|
||||
unsigned ring_size, bool use_bus_addr);
|
||||
|
@ -148,6 +148,8 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev,
|
||||
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
|
||||
amdgpu_ih_decode_iv(adev, &entry);
|
||||
|
||||
trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
|
||||
|
||||
amdgpu_irq_dispatch(adev, &entry);
|
||||
}
|
||||
|
||||
@ -174,6 +176,36 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_handle_ih1 - kick of processing for IH1
|
||||
*
|
||||
* @work: work structure in struct amdgpu_irq
|
||||
*
|
||||
* Kick of processing IH ring 1.
|
||||
*/
|
||||
static void amdgpu_irq_handle_ih1(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
irq.ih1_work);
|
||||
|
||||
amdgpu_ih_process(adev, &adev->irq.ih1, amdgpu_irq_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_handle_ih2 - kick of processing for IH2
|
||||
*
|
||||
* @work: work structure in struct amdgpu_irq
|
||||
*
|
||||
* Kick of processing IH ring 2.
|
||||
*/
|
||||
static void amdgpu_irq_handle_ih2(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
irq.ih2_work);
|
||||
|
||||
amdgpu_ih_process(adev, &adev->irq.ih2, amdgpu_irq_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_msi_ok - check whether MSI functionality is enabled
|
||||
*
|
||||
@ -238,6 +270,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
||||
amdgpu_hotplug_work_func);
|
||||
}
|
||||
|
||||
INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
|
||||
INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
|
||||
|
||||
adev->irq.installed = true;
|
||||
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
|
||||
if (r) {
|
||||
@ -367,8 +402,6 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
|
||||
bool handled = false;
|
||||
int r;
|
||||
|
||||
trace_amdgpu_iv(entry);
|
||||
|
||||
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
|
||||
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
|
||||
|
||||
|
@ -87,9 +87,11 @@ struct amdgpu_irq {
|
||||
/* status, etc. */
|
||||
bool msi_enabled; /* msi enabled */
|
||||
|
||||
/* interrupt ring */
|
||||
struct amdgpu_ih_ring ih;
|
||||
const struct amdgpu_ih_funcs *ih_funcs;
|
||||
/* interrupt rings */
|
||||
struct amdgpu_ih_ring ih, ih1, ih2;
|
||||
const struct amdgpu_ih_funcs *ih_funcs;
|
||||
struct work_struct ih1_work, ih2_work;
|
||||
struct amdgpu_irq_src self_irq;
|
||||
|
||||
/* gen irq stuff */
|
||||
struct irq_domain *domain; /* GPU irq controller domain */
|
||||
|
@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
|
||||
size_t acc_size;
|
||||
int r;
|
||||
|
||||
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
|
||||
AMDGPU_GEM_DOMAIN_OA))
|
||||
/* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
|
||||
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
|
||||
/* GWS and OA don't need any alignment. */
|
||||
page_align = bp->byte_align;
|
||||
size <<= PAGE_SHIFT;
|
||||
else
|
||||
} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
|
||||
/* Both size and alignment must be a multiple of 4. */
|
||||
page_align = ALIGN(bp->byte_align, 4);
|
||||
size = ALIGN(size, 4) << PAGE_SHIFT;
|
||||
} else {
|
||||
/* Memory should be aligned at least to a page size. */
|
||||
page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
size = ALIGN(size, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
|
||||
return -ENOMEM;
|
||||
|
@ -626,11 +626,71 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
|
||||
* DOC: ppfeatures
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for adjusting what powerplay
|
||||
* features to be enabled. The file ppfeatures is used for this. And
|
||||
* this is only available for Vega10 and later dGPUs.
|
||||
*
|
||||
* Reading back the file will show you the followings:
|
||||
* - Current ppfeature masks
|
||||
* - List of the all supported powerplay features with their naming,
|
||||
* bitmasks and enablement status('Y'/'N' means "enabled"/"disabled").
|
||||
*
|
||||
* To manually enable or disable a specific feature, just set or clear
|
||||
* the corresponding bit from original ppfeature masks and input the
|
||||
* new ppfeature masks.
|
||||
*/
|
||||
static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
uint64_t featuremask;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou64(buf, 0, &featuremask);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
pr_debug("featuremask = 0x%llx\n", featuremask);
|
||||
|
||||
if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
||||
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
||||
return amdgpu_dpm_get_ppfeature_status(adev, buf);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk
|
||||
* pp_dpm_pcie
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for adjusting what power levels
|
||||
* are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
|
||||
* and pp_dpm_pcie are used for this.
|
||||
* pp_dpm_socclk, pp_dpm_fclk, pp_dpm_dcefclk and pp_dpm_pcie are used for
|
||||
* this.
|
||||
*
|
||||
* pp_dpm_socclk and pp_dpm_dcefclk interfaces are only available for
|
||||
* Vega10 and later ASICs.
|
||||
* pp_dpm_fclk interface is only available for Vega20 and later ASICs.
|
||||
*
|
||||
* Reading back the files will show you the available power levels within
|
||||
* the power state and the clock information for those levels.
|
||||
@ -640,6 +700,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
|
||||
* Secondly,Enter a new value for each level by inputing a string that
|
||||
* contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
|
||||
* E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
|
||||
*
|
||||
* NOTE: change to the dcefclk max dpm level is not supported now
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
|
||||
@ -750,6 +812,114 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->powerplay.pp_funcs->print_clock_levels)
|
||||
return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
|
||||
else
|
||||
return snprintf(buf, PAGE_SIZE, "\n");
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
int ret;
|
||||
uint32_t mask = 0;
|
||||
|
||||
ret = amdgpu_read_mask(buf, count, &mask);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (adev->powerplay.pp_funcs->force_clock_level)
|
||||
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
|
||||
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->powerplay.pp_funcs->print_clock_levels)
|
||||
return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
|
||||
else
|
||||
return snprintf(buf, PAGE_SIZE, "\n");
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
int ret;
|
||||
uint32_t mask = 0;
|
||||
|
||||
ret = amdgpu_read_mask(buf, count, &mask);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (adev->powerplay.pp_funcs->force_clock_level)
|
||||
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
|
||||
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->powerplay.pp_funcs->print_clock_levels)
|
||||
return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
|
||||
else
|
||||
return snprintf(buf, PAGE_SIZE, "\n");
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
int ret;
|
||||
uint32_t mask = 0;
|
||||
|
||||
ret = amdgpu_read_mask(buf, count, &mask);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (adev->powerplay.pp_funcs->force_clock_level)
|
||||
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
|
||||
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
@ -990,6 +1160,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: pcie_bw
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for estimating how much data
|
||||
* has been received and sent by the GPU in the last second through PCIe.
|
||||
* The file pcie_bw is used for this.
|
||||
* The Perf counters count the number of received and sent messages and return
|
||||
* those values, as well as the maximum payload size of a PCIe packet (mps).
|
||||
* Note that it is not possible to easily and quickly obtain the size of each
|
||||
* packet transmitted, so we output the max payload size (mps) to allow for
|
||||
* quick estimation of the PCIe bandwidth usage
|
||||
*/
|
||||
static ssize_t amdgpu_get_pcie_bw(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
uint64_t count0, count1;
|
||||
|
||||
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
|
||||
return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
|
||||
count0, count1, pcie_get_mps(adev->pdev));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
|
||||
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_dpm_forced_performance_level,
|
||||
@ -1008,6 +1203,15 @@ static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
|
||||
static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_dpm_mclk,
|
||||
amdgpu_set_pp_dpm_mclk);
|
||||
static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_dpm_socclk,
|
||||
amdgpu_set_pp_dpm_socclk);
|
||||
static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_dpm_fclk,
|
||||
amdgpu_set_pp_dpm_fclk);
|
||||
static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_dpm_dcefclk,
|
||||
amdgpu_set_pp_dpm_dcefclk);
|
||||
static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_dpm_pcie,
|
||||
amdgpu_set_pp_dpm_pcie);
|
||||
@ -1025,6 +1229,10 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
|
||||
amdgpu_set_pp_od_clk_voltage);
|
||||
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
||||
amdgpu_get_busy_percent, NULL);
|
||||
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
||||
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_ppfeature_status,
|
||||
amdgpu_set_ppfeature_status);
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
@ -1516,6 +1724,75 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
struct drm_device *ddev = adev->ddev;
|
||||
uint32_t sclk;
|
||||
int r, size = sizeof(sclk);
|
||||
|
||||
/* Can't get voltage when the card is off */
|
||||
if ((adev->flags & AMD_IS_PX) &&
|
||||
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||
return -EINVAL;
|
||||
|
||||
/* sanity check PP is enabled */
|
||||
if (!(adev->powerplay.pp_funcs &&
|
||||
adev->powerplay.pp_funcs->read_sensor))
|
||||
return -EINVAL;
|
||||
|
||||
/* get the sclk */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
|
||||
(void *)&sclk, &size);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "sclk\n");
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
struct drm_device *ddev = adev->ddev;
|
||||
uint32_t mclk;
|
||||
int r, size = sizeof(mclk);
|
||||
|
||||
/* Can't get voltage when the card is off */
|
||||
if ((adev->flags & AMD_IS_PX) &&
|
||||
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||
return -EINVAL;
|
||||
|
||||
/* sanity check PP is enabled */
|
||||
if (!(adev->powerplay.pp_funcs &&
|
||||
adev->powerplay.pp_funcs->read_sensor))
|
||||
return -EINVAL;
|
||||
|
||||
/* get the sclk */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
|
||||
(void *)&mclk, &size);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "mclk\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: hwmon
|
||||
@ -1532,6 +1809,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
|
||||
*
|
||||
* - GPU fan
|
||||
*
|
||||
* - GPU gfx/compute engine clock
|
||||
*
|
||||
* - GPU memory clock (dGPU only)
|
||||
*
|
||||
* hwmon interfaces for GPU temperature:
|
||||
*
|
||||
* - temp1_input: the on die GPU temperature in millidegrees Celsius
|
||||
@ -1576,6 +1857,12 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
|
||||
*
|
||||
* - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
|
||||
*
|
||||
* hwmon interfaces for GPU clocks:
|
||||
*
|
||||
* - freq1_input: the gfx/compute clock in hertz
|
||||
*
|
||||
* - freq2_input: the memory clock in hertz
|
||||
*
|
||||
* You can use hwmon tools like sensors to view this information on your system.
|
||||
*
|
||||
*/
|
||||
@ -1600,6 +1887,10 @@ static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg,
|
||||
static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
|
||||
static SENSOR_DEVICE_ATTR(freq1_input, S_IRUGO, amdgpu_hwmon_show_sclk, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(freq1_label, S_IRUGO, amdgpu_hwmon_show_sclk_label, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(freq2_input, S_IRUGO, amdgpu_hwmon_show_mclk, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(freq2_label, S_IRUGO, amdgpu_hwmon_show_mclk_label, NULL, 0);
|
||||
|
||||
static struct attribute *hwmon_attributes[] = {
|
||||
&sensor_dev_attr_temp1_input.dev_attr.attr,
|
||||
@ -1622,6 +1913,10 @@ static struct attribute *hwmon_attributes[] = {
|
||||
&sensor_dev_attr_power1_cap_max.dev_attr.attr,
|
||||
&sensor_dev_attr_power1_cap_min.dev_attr.attr,
|
||||
&sensor_dev_attr_power1_cap.dev_attr.attr,
|
||||
&sensor_dev_attr_freq1_input.dev_attr.attr,
|
||||
&sensor_dev_attr_freq1_label.dev_attr.attr,
|
||||
&sensor_dev_attr_freq2_input.dev_attr.attr,
|
||||
&sensor_dev_attr_freq2_label.dev_attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -1686,7 +1981,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
||||
effective_mode &= ~S_IWUSR;
|
||||
|
||||
if ((adev->flags & AMD_IS_APU) &&
|
||||
(attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
|
||||
(attr == &sensor_dev_attr_power1_average.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
|
||||
attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
|
||||
return 0;
|
||||
@ -1712,6 +2008,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
||||
attr == &sensor_dev_attr_in1_label.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* no mclk on APUs */
|
||||
if ((adev->flags & AMD_IS_APU) &&
|
||||
(attr == &sensor_dev_attr_freq2_input.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
return effective_mode;
|
||||
}
|
||||
|
||||
@ -2008,6 +2310,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
|
||||
|
||||
int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
|
||||
int ret;
|
||||
|
||||
if (adev->pm.sysfs_initialized)
|
||||
@ -2069,6 +2372,25 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
|
||||
return ret;
|
||||
}
|
||||
if (adev->asic_type >= CHIP_VEGA10) {
|
||||
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pp_dpm_socclk\n");
|
||||
return ret;
|
||||
}
|
||||
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (adev->asic_type >= CHIP_VEGA20) {
|
||||
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pp_dpm_fclk\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pp_dpm_pcie\n");
|
||||
@ -2091,12 +2413,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||
"pp_power_profile_mode\n");
|
||||
return ret;
|
||||
}
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file "
|
||||
"pp_od_clk_voltage\n");
|
||||
return ret;
|
||||
if (hwmgr->od_enabled) {
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file "
|
||||
"pp_od_clk_voltage\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_gpu_busy_percent);
|
||||
@ -2105,12 +2429,31 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||
"gpu_busy_level\n");
|
||||
return ret;
|
||||
}
|
||||
/* PCIe Perf counters won't work on APU nodes */
|
||||
if (!(adev->flags & AMD_IS_APU)) {
|
||||
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file pcie_bw\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = amdgpu_debugfs_pm_init(adev);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to register debugfs file for dpm!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||
!(adev->flags & AMD_IS_APU)) {
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_ppfeatures);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file "
|
||||
"ppfeatures\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
adev->pm.sysfs_initialized = true;
|
||||
|
||||
return 0;
|
||||
@ -2118,6 +2461,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||
|
||||
void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
|
||||
|
||||
if (adev->pm.dpm_enabled == 0)
|
||||
return;
|
||||
|
||||
@ -2133,14 +2478,26 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
||||
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
|
||||
if (adev->asic_type >= CHIP_VEGA10) {
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk);
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
|
||||
}
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
|
||||
if (adev->asic_type >= CHIP_VEGA20)
|
||||
device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);
|
||||
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
|
||||
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
|
||||
device_remove_file(adev->dev,
|
||||
&dev_attr_pp_power_profile_mode);
|
||||
device_remove_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
if (hwmgr->od_enabled)
|
||||
device_remove_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
|
||||
if (!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_pcie_bw);
|
||||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||
!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_ppfeatures);
|
||||
}
|
||||
|
||||
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
|
||||
|
@ -67,9 +67,6 @@ static int psp_sw_init(void *handle)
|
||||
|
||||
psp->adev = adev;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
ret = psp_init_microcode(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to load psp firmware!\n");
|
||||
@ -83,9 +80,6 @@ static int psp_sw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
release_firmware(adev->psp.sos_fw);
|
||||
adev->psp.sos_fw = NULL;
|
||||
release_firmware(adev->psp.asd_fw);
|
||||
@ -140,13 +134,24 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
||||
while (*((unsigned int *)psp->fence_buf) != index)
|
||||
msleep(1);
|
||||
|
||||
/* the status field must be 0 after FW is loaded */
|
||||
if (ucode && psp->cmd_buf_mem->resp.status) {
|
||||
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
|
||||
psp->cmd_buf_mem->resp.status, ucode->ucode_id);
|
||||
return -EINVAL;
|
||||
/* In some cases, psp response status is not 0 even there is no
|
||||
* problem while the command is submitted. Some version of PSP FW
|
||||
* doesn't write 0 to that field.
|
||||
* So here we would like to only print a warning instead of an error
|
||||
* during psp initialization to avoid breaking hw_init and it doesn't
|
||||
* return -EINVAL.
|
||||
*/
|
||||
if (psp->cmd_buf_mem->resp.status) {
|
||||
if (ucode)
|
||||
DRM_WARN("failed to load ucode id (%d) ",
|
||||
ucode->ucode_id);
|
||||
DRM_WARN("psp command failed and response status is (%d)\n",
|
||||
psp->cmd_buf_mem->resp.status);
|
||||
}
|
||||
|
||||
/* get xGMI session id from response buffer */
|
||||
cmd->resp.session_id = psp->cmd_buf_mem->resp.session_id;
|
||||
|
||||
if (ucode) {
|
||||
ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
|
||||
ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
|
||||
@ -495,6 +500,98 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
|
||||
enum psp_gfx_fw_type *type)
|
||||
{
|
||||
switch (ucode->ucode_id) {
|
||||
case AMDGPU_UCODE_ID_SDMA0:
|
||||
*type = GFX_FW_TYPE_SDMA0;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA1:
|
||||
*type = GFX_FW_TYPE_SDMA1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_CE:
|
||||
*type = GFX_FW_TYPE_CP_CE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_PFP:
|
||||
*type = GFX_FW_TYPE_CP_PFP;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_ME:
|
||||
*type = GFX_FW_TYPE_CP_ME;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME2;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_G:
|
||||
*type = GFX_FW_TYPE_RLC_G;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SMC:
|
||||
*type = GFX_FW_TYPE_SMU;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD:
|
||||
*type = GFX_FW_TYPE_UVD;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD1:
|
||||
*type = GFX_FW_TYPE_UVD1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCE:
|
||||
*type = GFX_FW_TYPE_VCE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCN:
|
||||
*type = GFX_FW_TYPE_VCN;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_DMCU_ERAM:
|
||||
*type = GFX_FW_TYPE_DMCU_ERAM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_DMCU_INTV:
|
||||
*type = GFX_FW_TYPE_DMCU_ISR;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_MAXIMUM:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
int ret;
|
||||
uint64_t fw_mem_mc_addr = ucode->mc_addr;
|
||||
|
||||
memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
|
||||
|
||||
ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
|
||||
if (ret)
|
||||
DRM_ERROR("Unknown firmware type\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_np_fw_load(struct psp_context *psp)
|
||||
{
|
||||
int i, ret;
|
||||
@ -516,7 +613,7 @@ static int psp_np_fw_load(struct psp_context *psp)
|
||||
/*skip ucode loading in SRIOV VF */
|
||||
continue;
|
||||
|
||||
ret = psp_prep_cmd_buf(ucode, psp->cmd);
|
||||
ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -541,7 +638,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
|
||||
goto skip_memalloc;
|
||||
}
|
||||
|
||||
@ -618,10 +715,6 @@ static int psp_hw_init(void *handle)
|
||||
int ret;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&adev->firmware.mutex);
|
||||
/*
|
||||
* This sequence is just used on hw_init only once, no need on
|
||||
@ -651,9 +744,6 @@ static int psp_hw_fini(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1)
|
||||
psp_xgmi_terminate(psp);
|
||||
@ -682,9 +772,6 @@ static int psp_suspend(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1) {
|
||||
ret = psp_xgmi_terminate(psp);
|
||||
@ -709,9 +796,6 @@ static int psp_resume(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
DRM_INFO("PSP is resuming...\n");
|
||||
|
||||
mutex_lock(&adev->firmware.mutex);
|
||||
@ -747,11 +831,6 @@ static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
|
||||
{
|
||||
struct amdgpu_firmware_info *ucode = NULL;
|
||||
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
DRM_INFO("firmware is not loaded by PSP\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!adev->firmware.fw_size)
|
||||
return false;
|
||||
|
||||
|
@ -65,8 +65,6 @@ struct psp_funcs
|
||||
int (*init_microcode)(struct psp_context *psp);
|
||||
int (*bootloader_load_sysdrv)(struct psp_context *psp);
|
||||
int (*bootloader_load_sos)(struct psp_context *psp);
|
||||
int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd);
|
||||
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
|
||||
int (*ring_create)(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type);
|
||||
@ -176,7 +174,6 @@ struct psp_xgmi_topology_info {
|
||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
|
||||
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
|
||||
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
|
||||
#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
|
||||
|
@ -131,7 +131,7 @@ struct amdgpu_ring_funcs {
|
||||
void (*emit_ib)(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch);
|
||||
uint32_t flags);
|
||||
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
|
||||
uint64_t seq, unsigned flags);
|
||||
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
|
||||
@ -229,7 +229,7 @@ struct amdgpu_ring {
|
||||
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
|
||||
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
|
||||
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
|
||||
#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
|
||||
#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
|
||||
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
|
||||
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
|
||||
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
|
||||
|
@ -76,9 +76,10 @@ TRACE_EVENT(amdgpu_mm_wreg,
|
||||
);
|
||||
|
||||
TRACE_EVENT(amdgpu_iv,
|
||||
TP_PROTO(struct amdgpu_iv_entry *iv),
|
||||
TP_ARGS(iv),
|
||||
TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv),
|
||||
TP_ARGS(ih, iv),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned, ih)
|
||||
__field(unsigned, client_id)
|
||||
__field(unsigned, src_id)
|
||||
__field(unsigned, ring_id)
|
||||
@ -90,6 +91,7 @@ TRACE_EVENT(amdgpu_iv,
|
||||
__array(unsigned, src_data, 4)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ih = ih;
|
||||
__entry->client_id = iv->client_id;
|
||||
__entry->src_id = iv->src_id;
|
||||
__entry->ring_id = iv->ring_id;
|
||||
@ -103,8 +105,9 @@ TRACE_EVENT(amdgpu_iv,
|
||||
__entry->src_data[2] = iv->src_data[2];
|
||||
__entry->src_data[3] = iv->src_data[3];
|
||||
),
|
||||
TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
|
||||
__entry->client_id, __entry->src_id,
|
||||
TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u "
|
||||
"timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
|
||||
__entry->ih, __entry->client_id, __entry->src_id,
|
||||
__entry->ring_id, __entry->vmid,
|
||||
__entry->timestamp, __entry->pasid,
|
||||
__entry->src_data[0], __entry->src_data[1],
|
||||
|
@ -1546,7 +1546,8 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
|
||||
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
|
||||
.io_mem_free = &amdgpu_ttm_io_mem_free,
|
||||
.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
|
||||
.access_memory = &amdgpu_ttm_access_memory
|
||||
.access_memory = &amdgpu_ttm_access_memory,
|
||||
.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1755,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
|
||||
4, AMDGPU_GEM_DOMAIN_GDS,
|
||||
&adev->gds.gds_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
@ -1768,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
|
||||
1, AMDGPU_GEM_DOMAIN_GWS,
|
||||
&adev->gds.gws_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
@ -1781,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
|
||||
1, AMDGPU_GEM_DOMAIN_OA,
|
||||
&adev->gds.oa_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -1035,7 +1035,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCE_CMD_IB);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@ -66,7 +66,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
|
||||
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, bool ctx_switch);
|
||||
struct amdgpu_ib *ib, uint32_t flags);
|
||||
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags);
|
||||
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
|
||||
|
@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
|
||||
* DMA addresses to use for mapping, used during VM update by CPU
|
||||
*/
|
||||
dma_addr_t *pages_addr;
|
||||
|
||||
/**
|
||||
* @kptr:
|
||||
*
|
||||
* Kernel pointer of PD/PT BO that needs to be updated,
|
||||
* used during VM update by CPU
|
||||
*/
|
||||
void *kptr;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -623,6 +615,28 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
||||
list_add(&entry->tv.head, validated);
|
||||
}
|
||||
|
||||
void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
|
||||
{
|
||||
struct amdgpu_bo *abo;
|
||||
struct amdgpu_vm_bo_base *bo_base;
|
||||
|
||||
if (!amdgpu_bo_is_amdgpu_bo(bo))
|
||||
return;
|
||||
|
||||
if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
|
||||
return;
|
||||
|
||||
abo = ttm_to_amdgpu_bo(bo);
|
||||
if (!abo->parent)
|
||||
return;
|
||||
for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
|
||||
struct amdgpu_vm *vm = bo_base->vm;
|
||||
|
||||
if (abo->tbo.resv == vm->root.base.bo->tbo.resv)
|
||||
vm->bulk_moveable = false;
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
|
||||
*
|
||||
@ -799,9 +813,16 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
||||
addr += ats_entries * 8;
|
||||
}
|
||||
|
||||
if (entries)
|
||||
if (entries) {
|
||||
uint64_t value = 0;
|
||||
|
||||
/* Workaround for fault priority problem on GMC9 */
|
||||
if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10)
|
||||
value = AMDGPU_PTE_EXECUTABLE;
|
||||
|
||||
amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
|
||||
entries, 0, 0);
|
||||
entries, 0, value);
|
||||
}
|
||||
|
||||
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
|
||||
|
||||
@ -847,9 +868,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
bp->size = amdgpu_vm_bo_size(adev, level);
|
||||
bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
|
||||
bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
|
||||
adev->flags & AMD_IS_APU)
|
||||
bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
|
||||
bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
|
||||
bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
@ -1506,20 +1524,27 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_update_huge - figure out parameters for PTE updates
|
||||
* amdgpu_vm_update_flags - figure out flags for PTE updates
|
||||
*
|
||||
* Make sure to set the right flags for the PTEs at the desired level.
|
||||
*/
|
||||
static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
|
||||
struct amdgpu_bo *bo, unsigned level,
|
||||
uint64_t pe, uint64_t addr,
|
||||
unsigned count, uint32_t incr,
|
||||
uint64_t flags)
|
||||
static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params,
|
||||
struct amdgpu_bo *bo, unsigned level,
|
||||
uint64_t pe, uint64_t addr,
|
||||
unsigned count, uint32_t incr,
|
||||
uint64_t flags)
|
||||
|
||||
{
|
||||
if (level != AMDGPU_VM_PTB) {
|
||||
flags |= AMDGPU_PDE_PTE;
|
||||
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
|
||||
|
||||
} else if (params->adev->asic_type >= CHIP_VEGA10 &&
|
||||
!(flags & AMDGPU_PTE_VALID) &&
|
||||
!(flags & AMDGPU_PTE_PRT)) {
|
||||
|
||||
/* Workaround for fault priority problem on GMC9 */
|
||||
flags |= AMDGPU_PTE_EXECUTABLE;
|
||||
}
|
||||
|
||||
amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
|
||||
@ -1676,9 +1701,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
|
||||
uint64_t upd_end = min(entry_end, frag_end);
|
||||
unsigned nptes = (upd_end - frag_start) >> shift;
|
||||
|
||||
amdgpu_vm_update_huge(params, pt, cursor.level,
|
||||
pe_start, dst, nptes, incr,
|
||||
flags | AMDGPU_PTE_FRAG(frag));
|
||||
amdgpu_vm_update_flags(params, pt, cursor.level,
|
||||
pe_start, dst, nptes, incr,
|
||||
flags | AMDGPU_PTE_FRAG(frag));
|
||||
|
||||
pe_start += nptes * 8;
|
||||
dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
|
||||
@ -1756,13 +1781,20 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
if (pages_addr)
|
||||
params.src = ~0;
|
||||
|
||||
/* Wait for PT BOs to be free. PTs share the same resv. object
|
||||
/* Wait for PT BOs to be idle. PTs share the same resv. object
|
||||
* as the root PD BO
|
||||
*/
|
||||
r = amdgpu_vm_wait_pd(adev, vm, owner);
|
||||
if (unlikely(r))
|
||||
return r;
|
||||
|
||||
/* Wait for any BO move to be completed */
|
||||
if (exclusive) {
|
||||
r = dma_fence_wait(exclusive, true);
|
||||
if (unlikely(r))
|
||||
return r;
|
||||
}
|
||||
|
||||
params.func = amdgpu_vm_cpu_set_ptes;
|
||||
params.pages_addr = pages_addr;
|
||||
return amdgpu_vm_update_ptes(¶ms, start, last + 1,
|
||||
@ -1776,13 +1808,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
/*
|
||||
* reserve space for two commands every (1 << BLOCK_SIZE)
|
||||
* entries or 2k dwords (whatever is smaller)
|
||||
*
|
||||
* The second command is for the shadow pagetables.
|
||||
*/
|
||||
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
|
||||
|
||||
/* The second command is for the shadow pagetables. */
|
||||
if (vm->root.base.bo->shadow)
|
||||
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
|
||||
else
|
||||
ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
|
||||
ncmds *= 2;
|
||||
|
||||
/* padding, etc. */
|
||||
ndw = 64;
|
||||
@ -1801,10 +1832,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||
ndw += ncmds * 10;
|
||||
|
||||
/* extra commands for begin/end fragments */
|
||||
ncmds = 2 * adev->vm_manager.fragment_size;
|
||||
if (vm->root.base.bo->shadow)
|
||||
ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
|
||||
else
|
||||
ndw += 2 * 10 * adev->vm_manager.fragment_size;
|
||||
ncmds *= 2;
|
||||
|
||||
ndw += 10 * ncmds;
|
||||
|
||||
params.func = amdgpu_vm_do_set_ptes;
|
||||
}
|
||||
@ -3006,7 +3038,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
}
|
||||
DRM_DEBUG_DRIVER("VM update mode is %s\n",
|
||||
vm->use_cpu_for_update ? "CPU" : "SDMA");
|
||||
WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
|
||||
WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
|
||||
"CPU update of VM recommended only for large BAR system\n");
|
||||
vm->last_update = NULL;
|
||||
|
||||
@ -3136,7 +3168,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
|
||||
vm->pte_support_ats = pte_support_ats;
|
||||
DRM_DEBUG_DRIVER("VM update mode is %s\n",
|
||||
vm->use_cpu_for_update ? "CPU" : "SDMA");
|
||||
WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
|
||||
WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
|
||||
"CPU update of VM recommended only for large BAR system\n");
|
||||
|
||||
if (vm->pasid) {
|
||||
|
@ -363,4 +363,6 @@ int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
|
||||
|
||||
void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
|
||||
|
||||
void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
|
||||
|
||||
#endif
|
||||
|
@ -40,26 +40,40 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
|
||||
return &hive->device_list;
|
||||
}
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_hive_info *tmp;
|
||||
|
||||
if (!adev->gmc.xgmi.hive_id)
|
||||
return NULL;
|
||||
|
||||
mutex_lock(&xgmi_mutex);
|
||||
|
||||
for (i = 0 ; i < hive_count; ++i) {
|
||||
tmp = &xgmi_hives[i];
|
||||
if (tmp->hive_id == adev->gmc.xgmi.hive_id)
|
||||
if (tmp->hive_id == adev->gmc.xgmi.hive_id) {
|
||||
if (lock)
|
||||
mutex_lock(&tmp->hive_lock);
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
if (i >= AMDGPU_MAX_XGMI_HIVE)
|
||||
if (i >= AMDGPU_MAX_XGMI_HIVE) {
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize new hive if not exist */
|
||||
tmp = &xgmi_hives[hive_count++];
|
||||
tmp->hive_id = adev->gmc.xgmi.hive_id;
|
||||
INIT_LIST_HEAD(&tmp->device_list);
|
||||
mutex_init(&tmp->hive_lock);
|
||||
mutex_init(&tmp->reset_lock);
|
||||
if (lock)
|
||||
mutex_lock(&tmp->hive_lock);
|
||||
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
@ -77,10 +91,6 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
adev->gmc.xgmi.node_id,
|
||||
adev->gmc.xgmi.hive_id, ret);
|
||||
else
|
||||
dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
adev->gmc.xgmi.hive_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -111,10 +121,14 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_lock(&xgmi_mutex);
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive)
|
||||
hive = amdgpu_get_xgmi_hive(adev, 1);
|
||||
if (!hive) {
|
||||
ret = -EINVAL;
|
||||
dev_err(adev->dev,
|
||||
"XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
|
||||
adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
hive_topology = &hive->topology_info;
|
||||
|
||||
@ -142,8 +156,11 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||
break;
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
|
||||
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -154,15 +171,14 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return;
|
||||
|
||||
mutex_lock(&xgmi_mutex);
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
hive = amdgpu_get_xgmi_hive(adev, 1);
|
||||
if (!hive)
|
||||
goto exit;
|
||||
return;
|
||||
|
||||
if (!(hive->number_devices--))
|
||||
if (!(hive->number_devices--)) {
|
||||
mutex_destroy(&hive->hive_lock);
|
||||
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
mutex_destroy(&hive->reset_lock);
|
||||
} else {
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
}
|
||||
}
|
||||
|
@ -29,10 +29,11 @@ struct amdgpu_hive_info {
|
||||
struct list_head device_list;
|
||||
struct psp_xgmi_topology_info topology_info;
|
||||
int number_devices;
|
||||
struct mutex hive_lock;
|
||||
struct mutex hive_lock,
|
||||
reset_lock;
|
||||
};
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,349 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __CI_DPM_H__
|
||||
#define __CI_DPM_H__
|
||||
|
||||
#include "amdgpu_atombios.h"
|
||||
#include "ppsmc.h"
|
||||
|
||||
#define SMU__NUM_SCLK_DPM_STATE 8
|
||||
#define SMU__NUM_MCLK_DPM_LEVELS 6
|
||||
#define SMU__NUM_LCLK_DPM_LEVELS 8
|
||||
#define SMU__NUM_PCIE_DPM_LEVELS 8
|
||||
#include "smu7_discrete.h"
|
||||
|
||||
#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2
|
||||
|
||||
#define CISLANDS_UNUSED_GPIO_PIN 0x7F
|
||||
|
||||
struct ci_pl {
|
||||
u32 mclk;
|
||||
u32 sclk;
|
||||
enum amdgpu_pcie_gen pcie_gen;
|
||||
u16 pcie_lane;
|
||||
};
|
||||
|
||||
struct ci_ps {
|
||||
u16 performance_level_count;
|
||||
bool dc_compatible;
|
||||
u32 sclk_t;
|
||||
struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS];
|
||||
};
|
||||
|
||||
struct ci_dpm_level {
|
||||
bool enabled;
|
||||
u32 value;
|
||||
u32 param1;
|
||||
};
|
||||
|
||||
#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5
|
||||
#define MAX_REGULAR_DPM_NUMBER 8
|
||||
#define CISLAND_MINIMUM_ENGINE_CLOCK 800
|
||||
|
||||
struct ci_single_dpm_table {
|
||||
u32 count;
|
||||
struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER];
|
||||
};
|
||||
|
||||
struct ci_dpm_table {
|
||||
struct ci_single_dpm_table sclk_table;
|
||||
struct ci_single_dpm_table mclk_table;
|
||||
struct ci_single_dpm_table pcie_speed_table;
|
||||
struct ci_single_dpm_table vddc_table;
|
||||
struct ci_single_dpm_table vddci_table;
|
||||
struct ci_single_dpm_table mvdd_table;
|
||||
};
|
||||
|
||||
struct ci_mc_reg_entry {
|
||||
u32 mclk_max;
|
||||
u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
|
||||
};
|
||||
|
||||
struct ci_mc_reg_table {
|
||||
u8 last;
|
||||
u8 num_entries;
|
||||
u16 valid_flag;
|
||||
struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES];
|
||||
SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
|
||||
};
|
||||
|
||||
struct ci_ulv_parm
|
||||
{
|
||||
bool supported;
|
||||
u32 cg_ulv_parameter;
|
||||
u32 volt_change_delay;
|
||||
struct ci_pl pl;
|
||||
};
|
||||
|
||||
#define CISLANDS_MAX_LEAKAGE_COUNT 8
|
||||
|
||||
struct ci_leakage_voltage {
|
||||
u16 count;
|
||||
u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT];
|
||||
u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT];
|
||||
};
|
||||
|
||||
struct ci_dpm_level_enable_mask {
|
||||
u32 uvd_dpm_enable_mask;
|
||||
u32 vce_dpm_enable_mask;
|
||||
u32 acp_dpm_enable_mask;
|
||||
u32 samu_dpm_enable_mask;
|
||||
u32 sclk_dpm_enable_mask;
|
||||
u32 mclk_dpm_enable_mask;
|
||||
u32 pcie_dpm_enable_mask;
|
||||
};
|
||||
|
||||
struct ci_vbios_boot_state
|
||||
{
|
||||
u16 mvdd_bootup_value;
|
||||
u16 vddc_bootup_value;
|
||||
u16 vddci_bootup_value;
|
||||
u32 sclk_bootup_value;
|
||||
u32 mclk_bootup_value;
|
||||
u16 pcie_gen_bootup_value;
|
||||
u16 pcie_lane_bootup_value;
|
||||
};
|
||||
|
||||
struct ci_clock_registers {
|
||||
u32 cg_spll_func_cntl;
|
||||
u32 cg_spll_func_cntl_2;
|
||||
u32 cg_spll_func_cntl_3;
|
||||
u32 cg_spll_func_cntl_4;
|
||||
u32 cg_spll_spread_spectrum;
|
||||
u32 cg_spll_spread_spectrum_2;
|
||||
u32 dll_cntl;
|
||||
u32 mclk_pwrmgt_cntl;
|
||||
u32 mpll_ad_func_cntl;
|
||||
u32 mpll_dq_func_cntl;
|
||||
u32 mpll_func_cntl;
|
||||
u32 mpll_func_cntl_1;
|
||||
u32 mpll_func_cntl_2;
|
||||
u32 mpll_ss1;
|
||||
u32 mpll_ss2;
|
||||
};
|
||||
|
||||
struct ci_thermal_temperature_setting {
|
||||
s32 temperature_low;
|
||||
s32 temperature_high;
|
||||
s32 temperature_shutdown;
|
||||
};
|
||||
|
||||
struct ci_pcie_perf_range {
|
||||
u16 max;
|
||||
u16 min;
|
||||
};
|
||||
|
||||
enum ci_pt_config_reg_type {
|
||||
CISLANDS_CONFIGREG_MMR = 0,
|
||||
CISLANDS_CONFIGREG_SMC_IND,
|
||||
CISLANDS_CONFIGREG_DIDT_IND,
|
||||
CISLANDS_CONFIGREG_CACHE,
|
||||
CISLANDS_CONFIGREG_MAX
|
||||
};
|
||||
|
||||
#define POWERCONTAINMENT_FEATURE_BAPM 0x00000001
|
||||
#define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002
|
||||
#define POWERCONTAINMENT_FEATURE_PkgPwrLimit 0x00000004
|
||||
|
||||
struct ci_pt_config_reg {
|
||||
u32 offset;
|
||||
u32 mask;
|
||||
u32 shift;
|
||||
u32 value;
|
||||
enum ci_pt_config_reg_type type;
|
||||
};
|
||||
|
||||
struct ci_pt_defaults {
|
||||
u8 svi_load_line_en;
|
||||
u8 svi_load_line_vddc;
|
||||
u8 tdc_vddc_throttle_release_limit_perc;
|
||||
u8 tdc_mawt;
|
||||
u8 tdc_waterfall_ctl;
|
||||
u8 dte_ambient_temp_base;
|
||||
u32 display_cac;
|
||||
u32 bapm_temp_gradient;
|
||||
u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
|
||||
u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
|
||||
};
|
||||
|
||||
#define DPMTABLE_OD_UPDATE_SCLK 0x00000001
|
||||
#define DPMTABLE_OD_UPDATE_MCLK 0x00000002
|
||||
#define DPMTABLE_UPDATE_SCLK 0x00000004
|
||||
#define DPMTABLE_UPDATE_MCLK 0x00000008
|
||||
|
||||
struct ci_power_info {
|
||||
struct ci_dpm_table dpm_table;
|
||||
struct ci_dpm_table golden_dpm_table;
|
||||
u32 voltage_control;
|
||||
u32 mvdd_control;
|
||||
u32 vddci_control;
|
||||
u32 active_auto_throttle_sources;
|
||||
struct ci_clock_registers clock_registers;
|
||||
u16 acpi_vddc;
|
||||
u16 acpi_vddci;
|
||||
enum amdgpu_pcie_gen force_pcie_gen;
|
||||
enum amdgpu_pcie_gen acpi_pcie_gen;
|
||||
struct ci_leakage_voltage vddc_leakage;
|
||||
struct ci_leakage_voltage vddci_leakage;
|
||||
u16 max_vddc_in_pp_table;
|
||||
u16 min_vddc_in_pp_table;
|
||||
u16 max_vddci_in_pp_table;
|
||||
u16 min_vddci_in_pp_table;
|
||||
u32 mclk_strobe_mode_threshold;
|
||||
u32 mclk_stutter_mode_threshold;
|
||||
u32 mclk_edc_enable_threshold;
|
||||
u32 mclk_edc_wr_enable_threshold;
|
||||
struct ci_vbios_boot_state vbios_boot_state;
|
||||
/* smc offsets */
|
||||
u32 sram_end;
|
||||
u32 dpm_table_start;
|
||||
u32 soft_regs_start;
|
||||
u32 mc_reg_table_start;
|
||||
u32 fan_table_start;
|
||||
u32 arb_table_start;
|
||||
/* smc tables */
|
||||
SMU7_Discrete_DpmTable smc_state_table;
|
||||
SMU7_Discrete_MCRegisters smc_mc_reg_table;
|
||||
SMU7_Discrete_PmFuses smc_powertune_table;
|
||||
/* other stuff */
|
||||
struct ci_mc_reg_table mc_reg_table;
|
||||
struct atom_voltage_table vddc_voltage_table;
|
||||
struct atom_voltage_table vddci_voltage_table;
|
||||
struct atom_voltage_table mvdd_voltage_table;
|
||||
struct ci_ulv_parm ulv;
|
||||
u32 power_containment_features;
|
||||
const struct ci_pt_defaults *powertune_defaults;
|
||||
u32 dte_tj_offset;
|
||||
bool vddc_phase_shed_control;
|
||||
struct ci_thermal_temperature_setting thermal_temp_setting;
|
||||
struct ci_dpm_level_enable_mask dpm_level_enable_mask;
|
||||
u32 need_update_smu7_dpm_table;
|
||||
u32 sclk_dpm_key_disabled;
|
||||
u32 mclk_dpm_key_disabled;
|
||||
u32 pcie_dpm_key_disabled;
|
||||
u32 thermal_sclk_dpm_enabled;
|
||||
struct ci_pcie_perf_range pcie_gen_performance;
|
||||
struct ci_pcie_perf_range pcie_lane_performance;
|
||||
struct ci_pcie_perf_range pcie_gen_powersaving;
|
||||
struct ci_pcie_perf_range pcie_lane_powersaving;
|
||||
u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS];
|
||||
u32 mclk_activity_target;
|
||||
u32 low_sclk_interrupt_t;
|
||||
u32 last_mclk_dpm_enable_mask;
|
||||
u32 sys_pcie_mask;
|
||||
/* caps */
|
||||
bool caps_power_containment;
|
||||
bool caps_cac;
|
||||
bool caps_sq_ramping;
|
||||
bool caps_db_ramping;
|
||||
bool caps_td_ramping;
|
||||
bool caps_tcp_ramping;
|
||||
bool caps_fps;
|
||||
bool caps_sclk_ds;
|
||||
bool caps_sclk_ss_support;
|
||||
bool caps_mclk_ss_support;
|
||||
bool caps_uvd_dpm;
|
||||
bool caps_vce_dpm;
|
||||
bool caps_samu_dpm;
|
||||
bool caps_acp_dpm;
|
||||
bool caps_automatic_dc_transition;
|
||||
bool caps_sclk_throttle_low_notification;
|
||||
bool caps_dynamic_ac_timing;
|
||||
bool caps_od_fuzzy_fan_control_support;
|
||||
/* flags */
|
||||
bool thermal_protection;
|
||||
bool pcie_performance_request;
|
||||
bool dynamic_ss;
|
||||
bool dll_default_on;
|
||||
bool cac_enabled;
|
||||
bool uvd_enabled;
|
||||
bool battery_state;
|
||||
bool pspp_notify_required;
|
||||
bool enable_bapm_feature;
|
||||
bool enable_tdc_limit_feature;
|
||||
bool enable_pkg_pwr_tracking_feature;
|
||||
bool use_pcie_performance_levels;
|
||||
bool use_pcie_powersaving_levels;
|
||||
bool uvd_power_gated;
|
||||
/* driver states */
|
||||
struct amdgpu_ps current_rps;
|
||||
struct ci_ps current_ps;
|
||||
struct amdgpu_ps requested_rps;
|
||||
struct ci_ps requested_ps;
|
||||
/* fan control */
|
||||
bool fan_ctrl_is_in_default_mode;
|
||||
bool fan_is_controlled_by_smc;
|
||||
u32 t_min;
|
||||
u32 fan_ctrl_default_mode;
|
||||
};
|
||||
|
||||
#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
|
||||
#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO 0x1
|
||||
#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2 0x2
|
||||
|
||||
#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT 256
|
||||
|
||||
#define CISLANDS_VRC_DFLT0 0x3FFFC000
|
||||
#define CISLANDS_VRC_DFLT1 0x000400
|
||||
#define CISLANDS_VRC_DFLT2 0xC00080
|
||||
#define CISLANDS_VRC_DFLT3 0xC00200
|
||||
#define CISLANDS_VRC_DFLT4 0xC01680
|
||||
#define CISLANDS_VRC_DFLT5 0xC00033
|
||||
#define CISLANDS_VRC_DFLT6 0xC00033
|
||||
#define CISLANDS_VRC_DFLT7 0x3FFFC000
|
||||
|
||||
#define CISLANDS_CGULVPARAMETER_DFLT 0x00040035
|
||||
#define CISLAND_TARGETACTIVITY_DFLT 30
|
||||
#define CISLAND_MCLK_TARGETACTIVITY_DFLT 10
|
||||
|
||||
#define PCIE_PERF_REQ_REMOVE_REGISTRY 0
|
||||
#define PCIE_PERF_REQ_FORCE_LOWPOWER 1
|
||||
#define PCIE_PERF_REQ_PECI_GEN1 2
|
||||
#define PCIE_PERF_REQ_PECI_GEN2 3
|
||||
#define PCIE_PERF_REQ_PECI_GEN3 4
|
||||
|
||||
#define CISLANDS_SSTU_DFLT 0
|
||||
#define CISLANDS_SST_DFLT 0x00C8
|
||||
|
||||
/* XXX are these ok? */
|
||||
#define CISLANDS_TEMP_RANGE_MIN (90 * 1000)
|
||||
#define CISLANDS_TEMP_RANGE_MAX (120 * 1000)
|
||||
|
||||
int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev,
|
||||
u32 smc_start_address,
|
||||
const u8 *src, u32 byte_count, u32 limit);
|
||||
void amdgpu_ci_start_smc(struct amdgpu_device *adev);
|
||||
void amdgpu_ci_reset_smc(struct amdgpu_device *adev);
|
||||
int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev);
|
||||
void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev);
|
||||
void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev);
|
||||
bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev);
|
||||
PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg);
|
||||
PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev);
|
||||
int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit);
|
||||
int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev,
|
||||
u32 smc_address, u32 *value, u32 limit);
|
||||
int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev,
|
||||
u32 smc_address, u32 value, u32 limit);
|
||||
|
||||
#endif
|
@ -1,279 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors: Alex Deucher
|
||||
*/
|
||||
|
||||
#include <linux/firmware.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "cikd.h"
|
||||
#include "ppsmc.h"
|
||||
#include "amdgpu_ucode.h"
|
||||
#include "ci_dpm.h"
|
||||
|
||||
#include "smu/smu_7_0_1_d.h"
|
||||
#include "smu/smu_7_0_1_sh_mask.h"
|
||||
|
||||
static int ci_set_smc_sram_address(struct amdgpu_device *adev,
|
||||
u32 smc_address, u32 limit)
|
||||
{
|
||||
if (smc_address & 3)
|
||||
return -EINVAL;
|
||||
if ((smc_address + 3) > limit)
|
||||
return -EINVAL;
|
||||
|
||||
WREG32(mmSMC_IND_INDEX_0, smc_address);
|
||||
WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev,
|
||||
u32 smc_start_address,
|
||||
const u8 *src, u32 byte_count, u32 limit)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 data, original_data;
|
||||
u32 addr;
|
||||
u32 extra_shift;
|
||||
int ret = 0;
|
||||
|
||||
if (smc_start_address & 3)
|
||||
return -EINVAL;
|
||||
if ((smc_start_address + byte_count) > limit)
|
||||
return -EINVAL;
|
||||
|
||||
addr = smc_start_address;
|
||||
|
||||
spin_lock_irqsave(&adev->smc_idx_lock, flags);
|
||||
while (byte_count >= 4) {
|
||||
/* SMC address space is BE */
|
||||
data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
|
||||
|
||||
ret = ci_set_smc_sram_address(adev, addr, limit);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
WREG32(mmSMC_IND_DATA_0, data);
|
||||
|
||||
src += 4;
|
||||
byte_count -= 4;
|
||||
addr += 4;
|
||||
}
|
||||
|
||||
/* RMW for the final bytes */
|
||||
if (byte_count > 0) {
|
||||
data = 0;
|
||||
|
||||
ret = ci_set_smc_sram_address(adev, addr, limit);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
original_data = RREG32(mmSMC_IND_DATA_0);
|
||||
|
||||
extra_shift = 8 * (4 - byte_count);
|
||||
|
||||
while (byte_count > 0) {
|
||||
data = (data << 8) + *src++;
|
||||
byte_count--;
|
||||
}
|
||||
|
||||
data <<= extra_shift;
|
||||
|
||||
data |= (original_data & ~((~0UL) << extra_shift));
|
||||
|
||||
ret = ci_set_smc_sram_address(adev, addr, limit);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
WREG32(mmSMC_IND_DATA_0, data);
|
||||
}
|
||||
|
||||
done:
|
||||
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_ci_start_smc(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
|
||||
|
||||
tmp &= ~SMC_SYSCON_RESET_CNTL__rst_reg_MASK;
|
||||
WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp);
|
||||
}
|
||||
|
||||
void amdgpu_ci_reset_smc(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
|
||||
|
||||
tmp |= SMC_SYSCON_RESET_CNTL__rst_reg_MASK;
|
||||
WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp);
|
||||
}
|
||||
|
||||
int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev)
|
||||
{
|
||||
static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
|
||||
|
||||
return amdgpu_ci_copy_bytes_to_smc(adev, 0x0, data, 4, sizeof(data)+1);
|
||||
}
|
||||
|
||||
void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
|
||||
tmp |= SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK;
|
||||
|
||||
WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp);
|
||||
}
|
||||
|
||||
void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
|
||||
tmp &= ~SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK;
|
||||
|
||||
WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp);
|
||||
}
|
||||
|
||||
bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 clk = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
u32 pc_c = RREG32_SMC(ixSMC_PC_C);
|
||||
|
||||
if (!(clk & SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK) && (0x20100 <= pc_c))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
|
||||
{
|
||||
u32 tmp;
|
||||
int i;
|
||||
|
||||
if (!amdgpu_ci_is_smc_running(adev))
|
||||
return PPSMC_Result_Failed;
|
||||
|
||||
WREG32(mmSMC_MESSAGE_0, msg);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(mmSMC_RESP_0);
|
||||
if (tmp != 0)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
tmp = RREG32(mmSMC_RESP_0);
|
||||
|
||||
return (PPSMC_Result)tmp;
|
||||
}
|
||||
|
||||
PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp;
|
||||
int i;
|
||||
|
||||
if (!amdgpu_ci_is_smc_running(adev))
|
||||
return PPSMC_Result_OK;
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
if ((tmp & SMC_SYSCON_CLOCK_CNTL_0__cken_MASK) == 0)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
return PPSMC_Result_OK;
|
||||
}
|
||||
|
||||
int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit)
|
||||
{
|
||||
const struct smc_firmware_header_v1_0 *hdr;
|
||||
unsigned long flags;
|
||||
u32 ucode_start_address;
|
||||
u32 ucode_size;
|
||||
const u8 *src;
|
||||
u32 data;
|
||||
|
||||
if (!adev->pm.fw)
|
||||
return -EINVAL;
|
||||
|
||||
hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
|
||||
amdgpu_ucode_print_smc_hdr(&hdr->header);
|
||||
|
||||
adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
|
||||
ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
|
||||
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
|
||||
src = (const u8 *)
|
||||
(adev->pm.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
|
||||
if (ucode_size & 3)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&adev->smc_idx_lock, flags);
|
||||
WREG32(mmSMC_IND_INDEX_0, ucode_start_address);
|
||||
WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK,
|
||||
~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
|
||||
while (ucode_size >= 4) {
|
||||
/* SMC address space is BE */
|
||||
data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
|
||||
|
||||
WREG32(mmSMC_IND_DATA_0, data);
|
||||
|
||||
src += 4;
|
||||
ucode_size -= 4;
|
||||
}
|
||||
WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
|
||||
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev,
|
||||
u32 smc_address, u32 *value, u32 limit)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&adev->smc_idx_lock, flags);
|
||||
ret = ci_set_smc_sram_address(adev, smc_address, limit);
|
||||
if (ret == 0)
|
||||
*value = RREG32(mmSMC_IND_DATA_0);
|
||||
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev,
|
||||
u32 smc_address, u32 value, u32 limit)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&adev->smc_idx_lock, flags);
|
||||
ret = ci_set_smc_sram_address(adev, smc_address, limit);
|
||||
if (ret == 0)
|
||||
WREG32(mmSMC_IND_DATA_0, value);
|
||||
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
@ -1741,6 +1741,69 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static bool cik_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 clock_cntl, pc;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return false;
|
||||
|
||||
/* check if the SMC is already running */
|
||||
clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
pc = RREG32_SMC(ixSMC_PC_C);
|
||||
if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
|
||||
(0x20100 <= pc))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &cik_read_disabled_bios,
|
||||
@ -1756,6 +1819,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||
.invalidate_hdp = &cik_invalidate_hdp,
|
||||
.need_full_reset = &cik_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &cik_get_pcie_usage,
|
||||
.need_reset_on_init = &cik_need_reset_on_init,
|
||||
};
|
||||
|
||||
static int cik_common_early_init(void *handle)
|
||||
@ -2005,10 +2070,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
|
||||
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
|
||||
if (amdgpu_dpm == -1)
|
||||
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
|
||||
if (adev->enable_virtual_display)
|
||||
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
|
||||
#if defined(CONFIG_DRM_AMD_DC)
|
||||
@ -2026,10 +2088,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
|
||||
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
|
||||
if (amdgpu_dpm == -1)
|
||||
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
|
||||
if (adev->enable_virtual_display)
|
||||
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
|
||||
#if defined(CONFIG_DRM_AMD_DC)
|
||||
|
@ -24,7 +24,6 @@
|
||||
#ifndef __CIK_DPM_H__
|
||||
#define __CIK_DPM_H__
|
||||
|
||||
extern const struct amdgpu_ip_block_version ci_smu_ip_block;
|
||||
extern const struct amdgpu_ip_block_version kv_smu_ip_block;
|
||||
|
||||
#endif
|
||||
|
@ -103,9 +103,9 @@ static void cik_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int cik_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih = &adev->irq.ih;
|
||||
int rb_bufsz;
|
||||
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
|
||||
u64 wptr_off;
|
||||
|
||||
/* disable irqs */
|
||||
cik_ih_disable_interrupts(adev);
|
||||
@ -131,9 +131,8 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
|
||||
ih_rb_cntl |= IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK;
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
|
||||
|
||||
WREG32(mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
@ -183,11 +182,12 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev)
|
||||
* Used by cik_irq_process().
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
|
||||
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
|
||||
@ -196,13 +196,13 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
|
||||
adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
|
||||
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
|
||||
ih->rptr = (wptr + 16) & ih->ptr_mask;
|
||||
tmp = RREG32(mmIH_RB_CNTL);
|
||||
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
|
||||
WREG32(mmIH_RB_CNTL, tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/* CIK IV Ring
|
||||
@ -237,16 +237,17 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* position and also advance the position.
|
||||
*/
|
||||
static void cik_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* wptr/rptr are in bytes! */
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[4];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
|
||||
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
|
||||
entry->src_id = dw[0] & 0xff;
|
||||
@ -256,7 +257,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->pasid = (dw[2] >> 16) & 0xffff;
|
||||
|
||||
/* wptr/rptr are in bytes! */
|
||||
adev->irq.ih.rptr += 16;
|
||||
ih->rptr += 16;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -266,9 +267,10 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void cik_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void cik_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
|
||||
WREG32(mmIH_RB_RPTR, ih->rptr);
|
||||
}
|
||||
|
||||
static int cik_ih_early_init(void *handle)
|
||||
|
@ -220,7 +220,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 extra_bits = vmid & 0xf;
|
||||
|
@ -103,9 +103,9 @@ static void cz_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int cz_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int rb_bufsz;
|
||||
struct amdgpu_ih_ring *ih = &adev->irq.ih;
|
||||
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
|
||||
u64 wptr_off;
|
||||
int rb_bufsz;
|
||||
|
||||
/* disable irqs */
|
||||
cz_ih_disable_interrupts(adev);
|
||||
@ -133,9 +133,8 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
|
||||
|
||||
WREG32(mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
@ -185,11 +184,12 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev)
|
||||
* Used by cz_irq_process(VI).
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
@ -198,13 +198,13 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
|
||||
adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
|
||||
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
|
||||
ih->rptr = (wptr + 16) & ih->ptr_mask;
|
||||
tmp = RREG32(mmIH_RB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32(mmIH_RB_CNTL, tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -216,16 +216,17 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* position and also advance the position.
|
||||
*/
|
||||
static void cz_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* wptr/rptr are in bytes! */
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[4];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
|
||||
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
|
||||
entry->src_id = dw[0] & 0xff;
|
||||
@ -235,7 +236,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->pasid = (dw[2] >> 16) & 0xffff;
|
||||
|
||||
/* wptr/rptr are in bytes! */
|
||||
adev->irq.ih.rptr += 16;
|
||||
ih->rptr += 16;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,9 +246,10 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void cz_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void cz_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
|
||||
WREG32(mmIH_RB_RPTR, ih->rptr);
|
||||
}
|
||||
|
||||
static int cz_ih_early_init(void *handle)
|
||||
|
@ -167,19 +167,6 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
|
||||
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
|
||||
|
||||
dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
|
||||
if (crtc->primary->fb) {
|
||||
int r;
|
||||
struct amdgpu_bo *abo;
|
||||
|
||||
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
|
||||
r = amdgpu_bo_reserve(abo, true);
|
||||
if (unlikely(r))
|
||||
DRM_ERROR("failed to reserve abo before unpin\n");
|
||||
else {
|
||||
amdgpu_bo_unpin(abo);
|
||||
amdgpu_bo_unreserve(abo);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
|
||||
amdgpu_crtc->encoder = NULL;
|
||||
@ -692,7 +679,9 @@ static int dce_virtual_pageflip(struct amdgpu_device *adev,
|
||||
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
|
||||
|
||||
drm_crtc_vblank_put(&amdgpu_crtc->base);
|
||||
schedule_work(&works->unpin_work);
|
||||
amdgpu_bo_unref(&works->old_abo);
|
||||
kfree(works->shared);
|
||||
kfree(works);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1842,13 +1842,13 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
|
||||
if (ctx_switch) {
|
||||
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
@ -2228,13 +2228,13 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
|
||||
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
|
||||
if (ctx_switch) {
|
||||
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
@ -2259,11 +2259,27 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
/* Currently, there is a high possibility to get wave ID mismatch
|
||||
* between ME and GDS, leading to a hw deadlock, because ME generates
|
||||
* different wave IDs than the GDS expects. This situation happens
|
||||
* randomly when at least 5 compute pipes use GDS ordered append.
|
||||
* The wave IDs generated by ME are also wrong after suspend/resume.
|
||||
* Those are probably bugs somewhere else in the kernel driver.
|
||||
*
|
||||
* Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
|
||||
* GDS to 0 for this ring (me/pipe).
|
||||
*/
|
||||
if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
|
||||
amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
amdgpu_ring_write(ring,
|
||||
#ifdef __BIG_ENDIAN
|
||||
@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
|
||||
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
|
||||
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
|
||||
7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
|
||||
.emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
|
||||
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
|
||||
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
|
||||
.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
|
||||
@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
|
||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||
|
||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
||||
adev->gds.mem.gfx_partition_size = 4096;
|
||||
|
@ -4233,7 +4233,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
u32 tmp;
|
||||
u32 rb_bufsz;
|
||||
u64 rb_addr, rptr_addr, wptr_gpu_addr;
|
||||
int r;
|
||||
|
||||
/* Set the write pointer delay */
|
||||
WREG32(mmCP_RB_WPTR_DELAY, 0);
|
||||
@ -4278,9 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
|
||||
amdgpu_ring_clear_ring(ring);
|
||||
gfx_v8_0_cp_gfx_start(adev);
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
||||
@ -4369,10 +4367,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
||||
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ enable failed\n");
|
||||
return r;
|
||||
amdgpu_ring_commit(kiq_ring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
|
||||
@ -4709,18 +4706,34 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
|
||||
if (r)
|
||||
goto done;
|
||||
|
||||
/* Test KCQs - reversing the order of rings seems to fix ring test failure
|
||||
* after GPU reset
|
||||
*/
|
||||
for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
}
|
||||
|
||||
done:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
|
||||
{
|
||||
int r, i;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
/* collect all the ring_tests here, gfx, kiq, compute */
|
||||
ring = &adev->gfx.gfx_ring[0];
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->gfx.kiq.ring;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
amdgpu_ring_test_helper(ring);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
@ -4739,6 +4752,11 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
|
||||
r = gfx_v8_0_kcq_resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = gfx_v8_0_cp_test_all_rings(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
gfx_v8_0_enable_gui_idle_interrupt(adev, true);
|
||||
|
||||
return 0;
|
||||
@ -5086,6 +5104,8 @@ static int gfx_v8_0_post_soft_reset(void *handle)
|
||||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
|
||||
gfx_v8_0_cp_gfx_resume(adev);
|
||||
|
||||
gfx_v8_0_cp_test_all_rings(adev);
|
||||
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
@ -6027,7 +6047,7 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
|
||||
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
@ -6059,11 +6079,27 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
/* Currently, there is a high possibility to get wave ID mismatch
|
||||
* between ME and GDS, leading to a hw deadlock, because ME generates
|
||||
* different wave IDs than the GDS expects. This situation happens
|
||||
* randomly when at least 5 compute pipes use GDS ordered append.
|
||||
* The wave IDs generated by ME are also wrong after suspend/resume.
|
||||
* Those are probably bugs somewhere else in the kernel driver.
|
||||
*
|
||||
* Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
|
||||
* GDS to 0 for this ring (me/pipe).
|
||||
*/
|
||||
if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
|
||||
amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
amdgpu_ring_write(ring,
|
||||
#ifdef __BIG_ENDIAN
|
||||
@ -6870,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
|
||||
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
|
||||
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
|
||||
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
|
||||
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
|
||||
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
|
||||
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
|
||||
.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
|
||||
@ -6900,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
|
||||
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
|
||||
17 + /* gfx_v8_0_ring_emit_vm_flush */
|
||||
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
|
||||
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
|
||||
.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
|
||||
.test_ring = gfx_v8_0_ring_test_ring,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
@ -6976,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
|
||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||
|
||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
||||
adev->gds.mem.gfx_partition_size = 4096;
|
||||
|
@ -113,7 +113,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
|
||||
@ -135,10 +138,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
|
||||
@ -3587,6 +3587,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t data, def;
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
/* It is disabled by HW by default */
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
|
||||
@ -3651,6 +3653,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
@ -3968,7 +3972,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
|
||||
static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
@ -4001,11 +4005,27 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
/* Currently, there is a high possibility to get wave ID mismatch
|
||||
* between ME and GDS, leading to a hw deadlock, because ME generates
|
||||
* different wave IDs than the GDS expects. This situation happens
|
||||
* randomly when at least 5 compute pipes use GDS ordered append.
|
||||
* The wave IDs generated by ME are also wrong after suspend/resume.
|
||||
* Those are probably bugs somewhere else in the kernel driver.
|
||||
*
|
||||
* Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
|
||||
* GDS to 0 for this ring (me/pipe).
|
||||
*/
|
||||
if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||
amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
|
||||
amdgpu_ring_write(ring,
|
||||
@ -4725,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||
2 + /* gfx_v9_0_ring_emit_vm_flush */
|
||||
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
|
||||
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
|
||||
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
|
||||
.emit_fence = gfx_v9_0_ring_emit_fence,
|
||||
.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
|
||||
@ -4760,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||
2 + /* gfx_v9_0_ring_emit_vm_flush */
|
||||
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
|
||||
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
|
||||
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
|
||||
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
|
||||
.test_ring = gfx_v9_0_ring_test_ring,
|
||||
.insert_nop = amdgpu_ring_insert_nop,
|
||||
@ -4842,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
|
||||
break;
|
||||
}
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA20:
|
||||
adev->gds.gds_compute_max_wave_id = 0x7ff;
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
adev->gds.gds_compute_max_wave_id = 0x27f;
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
if (adev->rev_id >= 0x8)
|
||||
adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
|
||||
else
|
||||
adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
|
||||
break;
|
||||
default:
|
||||
/* this really depends on the chip */
|
||||
adev->gds.gds_compute_max_wave_id = 0x7ff;
|
||||
break;
|
||||
}
|
||||
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
|
||||
|
@ -1471,8 +1471,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
|
||||
gmc_v8_0_set_fault_enable_default(adev, false);
|
||||
|
||||
if (printk_ratelimit()) {
|
||||
struct amdgpu_task_info task_info = { 0 };
|
||||
struct amdgpu_task_info task_info;
|
||||
|
||||
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
|
||||
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
|
||||
|
||||
dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
|
||||
|
@ -305,6 +305,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
|
||||
bool retry_fault = !!(entry->src_data[1] & 0x80);
|
||||
uint32_t status = 0;
|
||||
u64 addr;
|
||||
|
||||
@ -320,13 +321,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
if (printk_ratelimit()) {
|
||||
struct amdgpu_task_info task_info = { 0 };
|
||||
struct amdgpu_task_info task_info;
|
||||
|
||||
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
|
||||
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
|
||||
|
||||
dev_err(adev->dev,
|
||||
"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
|
||||
"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
|
||||
"pasid:%u, for process %s pid %d thread %s pid %d)\n",
|
||||
entry->vmid_src ? "mmhub" : "gfxhub",
|
||||
retry_fault ? "retry" : "no-retry",
|
||||
entry->src_id, entry->ring_id, entry->vmid,
|
||||
entry->pasid, task_info.process_name, task_info.tgid,
|
||||
task_info.task_name, task_info.pid);
|
||||
@ -961,7 +965,11 @@ static int gmc_v9_0_sw_init(void *handle)
|
||||
* vm size is 256TB (48bit), maximum size of Vega10,
|
||||
* block size 512 (9bit)
|
||||
*/
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
/* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
|
||||
else
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -103,9 +103,9 @@ static void iceland_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int iceland_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih = &adev->irq.ih;
|
||||
int rb_bufsz;
|
||||
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
|
||||
u64 wptr_off;
|
||||
|
||||
/* disable irqs */
|
||||
iceland_ih_disable_interrupts(adev);
|
||||
@ -133,9 +133,8 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
|
||||
|
||||
WREG32(mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
@ -185,11 +184,12 @@ static void iceland_ih_irq_disable(struct amdgpu_device *adev)
|
||||
* Used by cz_irq_process(VI).
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
@ -198,13 +198,13 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
|
||||
adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
|
||||
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
|
||||
ih->rptr = (wptr + 16) & ih->ptr_mask;
|
||||
tmp = RREG32(mmIH_RB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32(mmIH_RB_CNTL, tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -216,16 +216,17 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* position and also advance the position.
|
||||
*/
|
||||
static void iceland_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* wptr/rptr are in bytes! */
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[4];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
|
||||
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
|
||||
entry->src_id = dw[0] & 0xff;
|
||||
@ -235,7 +236,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->pasid = (dw[2] >> 16) & 0xffff;
|
||||
|
||||
/* wptr/rptr are in bytes! */
|
||||
adev->irq.ih.rptr += 16;
|
||||
ih->rptr += 16;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,9 +246,10 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void iceland_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void iceland_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
|
||||
WREG32(mmIH_RB_RPTR, ih->rptr);
|
||||
}
|
||||
|
||||
static int iceland_ih_early_init(void *handle)
|
||||
|
@ -174,7 +174,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
|
||||
return r;
|
||||
}
|
||||
/* Retrieve checksum from mailbox2 */
|
||||
if (req == IDH_REQ_GPU_INIT_ACCESS) {
|
||||
if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
|
||||
adev->virt.fw_reserve.checksum_key =
|
||||
RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
|
||||
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
|
||||
|
@ -27,13 +27,9 @@
|
||||
#include "nbio/nbio_6_1_default.h"
|
||||
#include "nbio/nbio_6_1_offset.h"
|
||||
#include "nbio/nbio_6_1_sh_mask.h"
|
||||
#include "nbio/nbio_6_1_smn.h"
|
||||
#include "vega10_enum.h"
|
||||
|
||||
#define smnCPM_CONTROL 0x11180460
|
||||
#define smnPCIE_CNTL2 0x11180070
|
||||
#define smnPCIE_CONFIG_CNTL 0x11180044
|
||||
#define smnPCIE_CI_CNTL 0x11180080
|
||||
|
||||
static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||
@ -72,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index)
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size)
|
||||
{
|
||||
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
|
||||
@ -81,7 +77,7 @@ static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
|
||||
} else
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
|
||||
|
||||
|
@ -27,13 +27,11 @@
|
||||
#include "nbio/nbio_7_0_default.h"
|
||||
#include "nbio/nbio_7_0_offset.h"
|
||||
#include "nbio/nbio_7_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_smn.h"
|
||||
#include "vega10_enum.h"
|
||||
|
||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
|
||||
|
||||
#define smnCPM_CONTROL 0x11180460
|
||||
#define smnPCIE_CNTL2 0x11180070
|
||||
|
||||
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||
@ -69,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index)
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size)
|
||||
{
|
||||
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
|
||||
@ -78,7 +76,7 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
|
||||
} else
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
|
||||
|
||||
|
@ -26,13 +26,10 @@
|
||||
|
||||
#include "nbio/nbio_7_4_offset.h"
|
||||
#include "nbio/nbio_7_4_sh_mask.h"
|
||||
#include "nbio/nbio_7_4_0_smn.h"
|
||||
|
||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
|
||||
|
||||
#define smnCPM_CONTROL 0x11180460
|
||||
#define smnPCIE_CNTL2 0x11180070
|
||||
#define smnPCIE_CI_CNTL 0x11180080
|
||||
|
||||
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||
@ -68,7 +65,7 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index)
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size)
|
||||
{
|
||||
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
|
||||
@ -77,7 +74,7 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
|
||||
} else
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
|
||||
|
||||
|
@ -191,7 +191,7 @@ enum psp_gfx_fw_type
|
||||
GFX_FW_TYPE_MMSCH = 19,
|
||||
GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
|
||||
GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
|
||||
GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
|
||||
GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22,
|
||||
GFX_FW_TYPE_UVD1 = 23,
|
||||
GFX_FW_TYPE_MAX = 24
|
||||
};
|
||||
|
@ -38,75 +38,6 @@ MODULE_FIRMWARE("amdgpu/raven_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
|
||||
|
||||
static int
|
||||
psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
|
||||
{
|
||||
switch(ucode->ucode_id) {
|
||||
case AMDGPU_UCODE_ID_SDMA0:
|
||||
*type = GFX_FW_TYPE_SDMA0;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA1:
|
||||
*type = GFX_FW_TYPE_SDMA1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_CE:
|
||||
*type = GFX_FW_TYPE_CP_CE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_PFP:
|
||||
*type = GFX_FW_TYPE_CP_PFP;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_ME:
|
||||
*type = GFX_FW_TYPE_CP_ME;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME2;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_G:
|
||||
*type = GFX_FW_TYPE_RLC_G;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
|
||||
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SMC:
|
||||
*type = GFX_FW_TYPE_SMU;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD:
|
||||
*type = GFX_FW_TYPE_UVD;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCE:
|
||||
*type = GFX_FW_TYPE_VCE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCN:
|
||||
*type = GFX_FW_TYPE_VCN;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_DMCU_ERAM:
|
||||
*type = GFX_FW_TYPE_DMCU_ERAM;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_DMCU_INTV:
|
||||
*type = GFX_FW_TYPE_DMCU_ISR;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_MAXIMUM:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_v10_0_init_microcode(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
@ -158,26 +89,6 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
int ret;
|
||||
uint64_t fw_mem_mc_addr = ucode->mc_addr;
|
||||
|
||||
memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
|
||||
|
||||
ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
|
||||
if (ret)
|
||||
DRM_ERROR("Unknown firmware type\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_v10_0_ring_init(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type)
|
||||
{
|
||||
@ -454,7 +365,6 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp)
|
||||
|
||||
static const struct psp_funcs psp_v10_0_funcs = {
|
||||
.init_microcode = psp_v10_0_init_microcode,
|
||||
.prep_cmd_buf = psp_v10_0_prep_cmd_buf,
|
||||
.ring_init = psp_v10_0_ring_init,
|
||||
.ring_create = psp_v10_0_ring_create,
|
||||
.ring_stop = psp_v10_0_ring_stop,
|
||||
|
@ -40,60 +40,6 @@ MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
|
||||
/* address block */
|
||||
#define smnMP1_FIRMWARE_FLAGS 0x3010024
|
||||
|
||||
static int
|
||||
psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
|
||||
{
|
||||
switch (ucode->ucode_id) {
|
||||
case AMDGPU_UCODE_ID_SDMA0:
|
||||
*type = GFX_FW_TYPE_SDMA0;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA1:
|
||||
*type = GFX_FW_TYPE_SDMA1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_CE:
|
||||
*type = GFX_FW_TYPE_CP_CE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_PFP:
|
||||
*type = GFX_FW_TYPE_CP_PFP;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_ME:
|
||||
*type = GFX_FW_TYPE_CP_ME;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME2;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_G:
|
||||
*type = GFX_FW_TYPE_RLC_G;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SMC:
|
||||
*type = GFX_FW_TYPE_SMU;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD:
|
||||
*type = GFX_FW_TYPE_UVD;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCE:
|
||||
*type = GFX_FW_TYPE_VCE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD1:
|
||||
*type = GFX_FW_TYPE_UVD1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_MAXIMUM:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_v11_0_init_microcode(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
@ -267,26 +213,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_v11_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
int ret;
|
||||
uint64_t fw_mem_mc_addr = ucode->mc_addr;
|
||||
|
||||
memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
|
||||
|
||||
ret = psp_v11_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
|
||||
if (ret)
|
||||
DRM_ERROR("Unknown firmware type\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_v11_0_ring_init(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type)
|
||||
{
|
||||
@ -753,7 +679,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
|
||||
.init_microcode = psp_v11_0_init_microcode,
|
||||
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
|
||||
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
|
||||
.prep_cmd_buf = psp_v11_0_prep_cmd_buf,
|
||||
.ring_init = psp_v11_0_ring_init,
|
||||
.ring_create = psp_v11_0_ring_create,
|
||||
.ring_stop = psp_v11_0_ring_stop,
|
||||
|
@ -47,57 +47,6 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
|
||||
|
||||
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
|
||||
|
||||
static int
|
||||
psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
|
||||
{
|
||||
switch(ucode->ucode_id) {
|
||||
case AMDGPU_UCODE_ID_SDMA0:
|
||||
*type = GFX_FW_TYPE_SDMA0;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA1:
|
||||
*type = GFX_FW_TYPE_SDMA1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_CE:
|
||||
*type = GFX_FW_TYPE_CP_CE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_PFP:
|
||||
*type = GFX_FW_TYPE_CP_PFP;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_ME:
|
||||
*type = GFX_FW_TYPE_CP_ME;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC1_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2:
|
||||
*type = GFX_FW_TYPE_CP_MEC;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_MEC2_JT:
|
||||
*type = GFX_FW_TYPE_CP_MEC_ME2;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_RLC_G:
|
||||
*type = GFX_FW_TYPE_RLC_G;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SMC:
|
||||
*type = GFX_FW_TYPE_SMU;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_UVD:
|
||||
*type = GFX_FW_TYPE_UVD;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_VCE:
|
||||
*type = GFX_FW_TYPE_VCE;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_MAXIMUM:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_v3_1_init_microcode(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
@ -277,26 +226,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
int ret;
|
||||
uint64_t fw_mem_mc_addr = ucode->mc_addr;
|
||||
|
||||
memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
|
||||
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
|
||||
|
||||
ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
|
||||
if (ret)
|
||||
DRM_ERROR("Unknown firmware type\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_v3_1_ring_init(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type)
|
||||
{
|
||||
@ -615,7 +544,6 @@ static const struct psp_funcs psp_v3_1_funcs = {
|
||||
.init_microcode = psp_v3_1_init_microcode,
|
||||
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
|
||||
.bootloader_load_sos = psp_v3_1_bootloader_load_sos,
|
||||
.prep_cmd_buf = psp_v3_1_prep_cmd_buf,
|
||||
.ring_init = psp_v3_1_ring_init,
|
||||
.ring_create = psp_v3_1_ring_create,
|
||||
.ring_stop = psp_v3_1_ring_stop,
|
||||
|
@ -247,7 +247,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -421,7 +421,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
@ -1145,8 +1145,7 @@ static int sdma_v3_0_sw_init(void *handle)
|
||||
ring->ring_obj = NULL;
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (i == 0) ?
|
||||
adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1;
|
||||
ring->doorbell_index = adev->doorbell_index.sdma_engine[i];
|
||||
} else {
|
||||
ring->use_pollmem = true;
|
||||
}
|
||||
|
@ -78,7 +78,6 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
@ -96,6 +95,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
|
||||
static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
|
||||
};
|
||||
@ -103,6 +103,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
|
||||
static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
|
||||
};
|
||||
@ -499,7 +500,7 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
@ -833,8 +834,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
|
||||
OFFSET, ring->doorbell_index);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
|
||||
ring->doorbell_index);
|
||||
|
||||
sdma_v4_0_ring_set_wptr(ring);
|
||||
|
||||
@ -1521,9 +1520,7 @@ static int sdma_v4_0_sw_init(void *handle)
|
||||
ring->use_doorbell?"true":"false");
|
||||
|
||||
/* doorbell size is 2 dwords, get DWORD offset */
|
||||
ring->doorbell_index = (i == 0) ?
|
||||
(adev->doorbell_index.sdma_engine0 << 1)
|
||||
: (adev->doorbell_index.sdma_engine1 << 1);
|
||||
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
|
||||
|
||||
sprintf(ring->name, "sdma%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 1024,
|
||||
@ -1542,9 +1539,7 @@ static int sdma_v4_0_sw_init(void *handle)
|
||||
/* paging queue use same doorbell index/routing as gfx queue
|
||||
* with 0x400 (4096 dwords) offset on second doorbell page
|
||||
*/
|
||||
ring->doorbell_index = (i == 0) ?
|
||||
(adev->doorbell_index.sdma_engine0 << 1)
|
||||
: (adev->doorbell_index.sdma_engine1 << 1);
|
||||
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
|
||||
ring->doorbell_index += 0x400;
|
||||
|
||||
sprintf(ring->name, "page%d", i);
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include "dce/dce_6_0_d.h"
|
||||
#include "uvd/uvd_4_0_d.h"
|
||||
#include "bif/bif_3_0_d.h"
|
||||
#include "bif/bif_3_0_sh_mask.h"
|
||||
|
||||
static const u32 tahiti_golden_registers[] =
|
||||
{
|
||||
@ -1258,6 +1259,11 @@ static bool si_need_full_reset(struct amdgpu_device *adev)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool si_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static int si_get_pcie_lanes(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 link_width_cntl;
|
||||
@ -1323,6 +1329,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
|
||||
WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
|
||||
}
|
||||
|
||||
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &si_read_disabled_bios,
|
||||
@ -1339,6 +1391,8 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||
.flush_hdp = &si_flush_hdp,
|
||||
.invalidate_hdp = &si_invalidate_hdp,
|
||||
.need_full_reset = &si_need_full_reset,
|
||||
.get_pcie_usage = &si_get_pcie_usage,
|
||||
.need_reset_on_init = &si_need_reset_on_init,
|
||||
};
|
||||
|
||||
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
|
||||
|
@ -63,7 +63,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
|
||||
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
|
||||
|
@ -57,9 +57,9 @@ static void si_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
|
||||
static int si_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih = &adev->irq.ih;
|
||||
int rb_bufsz;
|
||||
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
|
||||
u64 wptr_off;
|
||||
|
||||
si_ih_disable_interrupts(adev);
|
||||
WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8);
|
||||
@ -76,9 +76,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
|
||||
(rb_bufsz << 1) |
|
||||
IH_WPTR_WRITEBACK_ENABLE;
|
||||
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32(IH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
|
||||
WREG32(IH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
|
||||
WREG32(IH_RB_CNTL, ih_rb_cntl);
|
||||
WREG32(IH_RB_RPTR, 0);
|
||||
WREG32(IH_RB_WPTR, 0);
|
||||
@ -100,34 +99,36 @@ static void si_ih_irq_disable(struct amdgpu_device *adev)
|
||||
mdelay(1);
|
||||
}
|
||||
|
||||
static u32 si_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 si_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
|
||||
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
|
||||
adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
|
||||
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
|
||||
ih->rptr = (wptr + 16) & ih->ptr_mask;
|
||||
tmp = RREG32(IH_RB_CNTL);
|
||||
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
|
||||
WREG32(IH_RB_CNTL, tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
static void si_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[4];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
|
||||
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
|
||||
entry->src_id = dw[0] & 0xff;
|
||||
@ -135,12 +136,13 @@ static void si_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->ring_id = dw[2] & 0xff;
|
||||
entry->vmid = (dw[2] >> 8) & 0xff;
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
ih->rptr += 16;
|
||||
}
|
||||
|
||||
static void si_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void si_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
WREG32(IH_RB_RPTR, adev->irq.ih.rptr);
|
||||
WREG32(IH_RB_RPTR, ih->rptr);
|
||||
}
|
||||
|
||||
static int si_ih_early_init(void *handle)
|
||||
|
@ -43,6 +43,10 @@
|
||||
#include "hdp/hdp_4_0_sh_mask.h"
|
||||
#include "smuio/smuio_9_0_offset.h"
|
||||
#include "smuio/smuio_9_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_default.h"
|
||||
#include "nbio/nbio_7_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_smn.h"
|
||||
#include "mp/mp_9_0_offset.h"
|
||||
|
||||
#include "soc15.h"
|
||||
#include "soc15_common.h"
|
||||
@ -385,14 +389,13 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||
static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
|
||||
|
||||
dev_info(adev->dev, "GPU reset\n");
|
||||
dev_info(adev->dev, "GPU mode1 reset\n");
|
||||
|
||||
/* disable BM */
|
||||
pci_clear_master(adev->pdev);
|
||||
@ -417,6 +420,63 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
|
||||
{
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
|
||||
*cap = false;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return pp_funcs->get_asic_baco_capability(pp_handle, cap);
|
||||
}
|
||||
|
||||
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
|
||||
return -ENOENT;
|
||||
|
||||
/* enter BACO state */
|
||||
if (pp_funcs->set_asic_baco_state(pp_handle, 1))
|
||||
return -EIO;
|
||||
|
||||
/* exit BACO state */
|
||||
if (pp_funcs->set_asic_baco_state(pp_handle, 0))
|
||||
return -EIO;
|
||||
|
||||
dev_info(adev->dev, "GPU BACO reset\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
bool baco_reset;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA20:
|
||||
soc15_asic_get_baco_capability(adev, &baco_reset);
|
||||
break;
|
||||
default:
|
||||
baco_reset = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (baco_reset)
|
||||
ret = soc15_asic_baco_reset(adev);
|
||||
else
|
||||
ret = soc15_asic_mode1_reset(adev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
|
||||
u32 cntl_reg, u32 status_reg)
|
||||
{
|
||||
@ -535,10 +595,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||
}
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
@ -560,7 +622,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
|
||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
|
||||
amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
|
||||
@ -601,6 +664,68 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev)
|
||||
/* change this when we implement soft reset */
|
||||
return true;
|
||||
}
|
||||
static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 sol_reg;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return false;
|
||||
|
||||
/* Check sOS sign of life register to confirm sys driver and sOS
|
||||
* are already been loaded.
|
||||
*/
|
||||
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
|
||||
if (sol_reg)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||
{
|
||||
@ -617,6 +742,8 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega10_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
};
|
||||
|
||||
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||
@ -634,6 +761,8 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
};
|
||||
|
||||
static int soc15_common_early_init(void *handle)
|
||||
@ -840,6 +969,22 @@ static int soc15_common_sw_fini(void *handle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void soc15_doorbell_range_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
||||
ring->use_doorbell, ring->doorbell_index,
|
||||
adev->doorbell_index.sdma_doorbell_range);
|
||||
}
|
||||
|
||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||
adev->irq.ih.doorbell_index);
|
||||
}
|
||||
|
||||
static int soc15_common_hw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
@ -852,6 +997,12 @@ static int soc15_common_hw_init(void *handle)
|
||||
adev->nbio_funcs->init_registers(adev);
|
||||
/* enable the doorbell aperture */
|
||||
soc15_enable_doorbell_aperture(adev, true);
|
||||
/* HW doorbell routing policy: doorbell writing not
|
||||
* in SDMA/IH/MM/ACV range will be routed to CP. So
|
||||
* we need to init SDMA/IH/MM/ACV doorbell range prior
|
||||
* to CP ip block init and ring test.
|
||||
*/
|
||||
soc15_doorbell_range_init(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -99,9 +99,9 @@ static void tonga_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int tonga_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int rb_bufsz;
|
||||
u32 interrupt_cntl, ih_rb_cntl, ih_doorbell_rtpr;
|
||||
u64 wptr_off;
|
||||
struct amdgpu_ih_ring *ih = &adev->irq.ih;
|
||||
int rb_bufsz;
|
||||
|
||||
/* disable irqs */
|
||||
tonga_ih_disable_interrupts(adev);
|
||||
@ -118,10 +118,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
|
||||
WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
|
||||
|
||||
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
WREG32(mmIH_RB_BASE, adev->irq.ih.rb_dma_addr >> 8);
|
||||
else
|
||||
WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
|
||||
WREG32(mmIH_RB_BASE, ih->gpu_addr >> 8);
|
||||
|
||||
rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
|
||||
ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
@ -136,12 +133,8 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
|
||||
WREG32(mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
else
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
|
||||
WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32(mmIH_RB_RPTR, 0);
|
||||
@ -193,14 +186,12 @@ static void tonga_ih_irq_disable(struct amdgpu_device *adev)
|
||||
* Used by cz_irq_process(VI).
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
wptr = le32_to_cpu(adev->irq.ih.ring[adev->irq.ih.wptr_offs]);
|
||||
else
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
@ -209,13 +200,13 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
|
||||
adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
|
||||
wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
|
||||
ih->rptr = (wptr + 16) & ih->ptr_mask;
|
||||
tmp = RREG32(mmIH_RB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32(mmIH_RB_CNTL, tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -227,16 +218,17 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* position and also advance the position.
|
||||
*/
|
||||
static void tonga_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* wptr/rptr are in bytes! */
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[4];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
|
||||
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
|
||||
entry->src_id = dw[0] & 0xff;
|
||||
@ -246,7 +238,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->pasid = (dw[2] >> 16) & 0xffff;
|
||||
|
||||
/* wptr/rptr are in bytes! */
|
||||
adev->irq.ih.rptr += 16;
|
||||
ih->rptr += 16;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -256,17 +248,15 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void tonga_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void tonga_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
if (adev->irq.ih.use_doorbell) {
|
||||
if (ih->use_doorbell) {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
adev->irq.ih.ring[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr;
|
||||
else
|
||||
adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr;
|
||||
WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr);
|
||||
*ih->rptr_cpu = ih->rptr;
|
||||
WDOORBELL32(ih->doorbell_index, ih->rptr);
|
||||
} else {
|
||||
WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
|
||||
WREG32(mmIH_RB_RPTR, ih->rptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -511,7 +511,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
|
||||
static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
|
||||
amdgpu_ring_write(ring, ib->gpu_addr);
|
||||
|
@ -526,7 +526,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@ -977,7 +977,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
||||
static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
@ -1003,7 +1003,7 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -1272,7 +1272,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
|
||||
static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
@ -1303,7 +1303,7 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -834,7 +834,7 @@ static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
|
||||
static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -947,7 +947,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
|
||||
#endif
|
||||
|
||||
static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, bool ctx_switch)
|
||||
struct amdgpu_ib *ib, uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -1371,7 +1371,7 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
|
||||
static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
@ -1531,7 +1531,7 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
|
||||
static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
@ -1736,7 +1736,7 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6
|
||||
static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
uint32_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
@ -50,6 +50,22 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
adev->irq.ih.enabled = true;
|
||||
|
||||
if (adev->irq.ih1.ring_size) {
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
adev->irq.ih1.enabled = true;
|
||||
}
|
||||
|
||||
if (adev->irq.ih2.ring_size) {
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
adev->irq.ih2.enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -71,6 +87,53 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
|
||||
adev->irq.ih.enabled = false;
|
||||
adev->irq.ih.rptr = 0;
|
||||
|
||||
if (adev->irq.ih1.ring_size) {
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||
adev->irq.ih1.enabled = false;
|
||||
adev->irq.ih1.rptr = 0;
|
||||
}
|
||||
|
||||
if (adev->irq.ih2.ring_size) {
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||
adev->irq.ih2.enabled = false;
|
||||
adev->irq.ih2.rptr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
|
||||
{
|
||||
int rb_bufsz = order_base_2(ih->ring_size / 4);
|
||||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
MC_SPACE, ih->use_bus_addr ? 1 : 4);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_OVERFLOW_CLEAR, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_OVERFLOW_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
|
||||
/* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
|
||||
* value is written to memory
|
||||
*/
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_WRITEBACK_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
|
||||
|
||||
return ih_rb_cntl;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -86,50 +149,32 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||
*/
|
||||
static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih;
|
||||
int ret = 0;
|
||||
int rb_bufsz;
|
||||
u32 ih_rb_cntl, ih_doorbell_rtpr;
|
||||
u32 tmp;
|
||||
u64 wptr_off;
|
||||
|
||||
/* disable irqs */
|
||||
vega10_ih_disable_interrupts(adev);
|
||||
|
||||
adev->nbio_funcs->ih_control(adev);
|
||||
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
|
||||
ih = &adev->irq.ih;
|
||||
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
|
||||
if (adev->irq.ih.use_bus_addr) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.rb_dma_addr >> 8);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, ((u64)adev->irq.ih.rb_dma_addr >> 40) & 0xff);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 1);
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (adev->irq.ih.gpu_addr >> 40) & 0xff);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 4);
|
||||
}
|
||||
rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
|
||||
/* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
|
||||
|
||||
if (adev->irq.msi_enabled)
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff);
|
||||
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
|
||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
|
||||
!!adev->irq.msi_enabled);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
else
|
||||
wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
|
||||
lower_32_bits(ih->wptr_addr));
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI,
|
||||
upper_32_bits(ih->wptr_addr) & 0xFFFF);
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
|
||||
@ -137,17 +182,48 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||
|
||||
ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR);
|
||||
if (adev->irq.ih.use_doorbell) {
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
|
||||
OFFSET, adev->irq.ih.doorbell_index);
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR, OFFSET,
|
||||
adev->irq.ih.doorbell_index);
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR,
|
||||
ENABLE, 1);
|
||||
} else {
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR,
|
||||
ENABLE, 0);
|
||||
}
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
|
||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||
adev->irq.ih.doorbell_index);
|
||||
|
||||
ih = &adev->irq.ih1;
|
||||
if (ih->ring_size) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING1, ih->gpu_addr >> 8);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING1,
|
||||
(ih->gpu_addr >> 40) & 0xff);
|
||||
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||
}
|
||||
|
||||
ih = &adev->irq.ih2;
|
||||
if (ih->ring_size) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING2, ih->gpu_addr >> 8);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2,
|
||||
(ih->gpu_addr >> 40) & 0xff);
|
||||
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||
}
|
||||
|
||||
tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
|
||||
@ -191,32 +267,58 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev)
|
||||
* ring buffer overflow and deal with it.
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
|
||||
static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
u32 wptr, reg, tmp;
|
||||
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
wptr = le32_to_cpu(adev->irq.ih.ring[adev->irq.ih.wptr_offs]);
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
|
||||
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
|
||||
goto out;
|
||||
|
||||
/* Double check that the overflow wasn't already cleared. */
|
||||
|
||||
if (ih == &adev->irq.ih)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
|
||||
else if (ih == &adev->irq.ih1)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
|
||||
else if (ih == &adev->irq.ih2)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
|
||||
else
|
||||
wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
|
||||
BUG();
|
||||
|
||||
if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
wptr = RREG32_NO_KIQ(reg);
|
||||
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
|
||||
goto out;
|
||||
|
||||
/* When a ring buffer overflow happen start parsing interrupt
|
||||
* from the last not overwritten vector (wptr + 32). Hopefully
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
tmp = (wptr + 32) & adev->irq.ih.ptr_mask;
|
||||
dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, adev->irq.ih.rptr, tmp);
|
||||
adev->irq.ih.rptr = tmp;
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
|
||||
tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL));
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp);
|
||||
}
|
||||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
/* When a ring buffer overflow happen start parsing interrupt
|
||||
* from the last not overwritten vector (wptr + 32). Hopefully
|
||||
* this should allow us to catchup.
|
||||
*/
|
||||
tmp = (wptr + 32) & ih->ptr_mask;
|
||||
dev_warn(adev->dev, "IH ring buffer overflow "
|
||||
"(0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, ih->rptr, tmp);
|
||||
ih->rptr = tmp;
|
||||
|
||||
if (ih == &adev->irq.ih)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
|
||||
else if (ih == &adev->irq.ih1)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
else if (ih == &adev->irq.ih2)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
else
|
||||
BUG();
|
||||
|
||||
tmp = RREG32_NO_KIQ(reg);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32_NO_KIQ(reg, tmp);
|
||||
|
||||
out:
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -228,20 +330,21 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
|
||||
* position and also advance the position.
|
||||
*/
|
||||
static void vega10_ih_decode_iv(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
struct amdgpu_ih_ring *ih,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* wptr/rptr are in bytes! */
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
uint32_t dw[8];
|
||||
|
||||
dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
|
||||
dw[4] = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
|
||||
dw[5] = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
|
||||
dw[6] = le32_to_cpu(adev->irq.ih.ring[ring_index + 6]);
|
||||
dw[7] = le32_to_cpu(adev->irq.ih.ring[ring_index + 7]);
|
||||
dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
|
||||
dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
|
||||
dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
|
||||
dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
|
||||
dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
|
||||
dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
|
||||
dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
|
||||
|
||||
entry->client_id = dw[0] & 0xff;
|
||||
entry->src_id = (dw[0] >> 8) & 0xff;
|
||||
@ -257,9 +360,8 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
|
||||
entry->src_data[2] = dw[6];
|
||||
entry->src_data[3] = dw[7];
|
||||
|
||||
|
||||
/* wptr/rptr are in bytes! */
|
||||
adev->irq.ih.rptr += 32;
|
||||
ih->rptr += 32;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -269,37 +371,95 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void vega10_ih_set_rptr(struct amdgpu_device *adev)
|
||||
static void vega10_ih_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
if (adev->irq.ih.use_doorbell) {
|
||||
if (ih->use_doorbell) {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
if (adev->irq.ih.use_bus_addr)
|
||||
adev->irq.ih.ring[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr;
|
||||
else
|
||||
adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr;
|
||||
WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr);
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, adev->irq.ih.rptr);
|
||||
*ih->rptr_cpu = ih->rptr;
|
||||
WDOORBELL32(ih->doorbell_index, ih->rptr);
|
||||
} else if (ih == &adev->irq.ih) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
|
||||
} else if (ih == &adev->irq.ih1) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, ih->rptr);
|
||||
} else if (ih == &adev->irq.ih2) {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, ih->rptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vega10_ih_self_irq - dispatch work for ring 1 and 2
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @source: irq source
|
||||
* @entry: IV with WPTR update
|
||||
*
|
||||
* Update the WPTR from the IV and schedule work to handle the entries.
|
||||
*/
|
||||
static int vega10_ih_self_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t wptr = cpu_to_le32(entry->src_data[0]);
|
||||
|
||||
switch (entry->ring_id) {
|
||||
case 1:
|
||||
*adev->irq.ih1.wptr_cpu = wptr;
|
||||
schedule_work(&adev->irq.ih1_work);
|
||||
break;
|
||||
case 2:
|
||||
*adev->irq.ih2.wptr_cpu = wptr;
|
||||
schedule_work(&adev->irq.ih2_work);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs vega10_ih_self_irq_funcs = {
|
||||
.process = vega10_ih_self_irq,
|
||||
};
|
||||
|
||||
static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->irq.self_irq.num_types = 0;
|
||||
adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs;
|
||||
}
|
||||
|
||||
static int vega10_ih_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
vega10_ih_set_interrupt_funcs(adev);
|
||||
vega10_ih_set_self_irq_funcs(adev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vega10_ih_sw_init(void *handle)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int r;
|
||||
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
|
||||
&adev->irq.self_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (adev->asic_type == CHIP_VEGA10) {
|
||||
r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* TODO add doorbell for IH1 & IH2 as well */
|
||||
adev->irq.ih.use_doorbell = true;
|
||||
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
|
||||
|
||||
@ -313,6 +473,8 @@ static int vega10_ih_sw_fini(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
amdgpu_irq_fini(adev);
|
||||
amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
|
||||
amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
|
||||
amdgpu_ih_ring_fini(adev, &adev->irq.ih);
|
||||
|
||||
return 0;
|
||||
|
@ -70,8 +70,8 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
|
||||
adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START;
|
||||
adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END;
|
||||
adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0;
|
||||
adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL64_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL64_sDMA_ENGINE1;
|
||||
adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1;
|
||||
adev->doorbell_index.ih = AMDGPU_DOORBELL64_IH;
|
||||
adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_DOORBELL64_UVD_RING0_1;
|
||||
adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_DOORBELL64_UVD_RING2_3;
|
||||
@ -83,5 +83,6 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
|
||||
adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;
|
||||
/* In unit of dword doorbell */
|
||||
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1;
|
||||
adev->doorbell_index.sdma_doorbell_range = 4;
|
||||
}
|
||||
|
||||
|
@ -68,14 +68,14 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
|
||||
adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START;
|
||||
adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END;
|
||||
adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0;
|
||||
adev->doorbell_index.sdma_engine0 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine1 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1;
|
||||
adev->doorbell_index.sdma_engine2 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2;
|
||||
adev->doorbell_index.sdma_engine3 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3;
|
||||
adev->doorbell_index.sdma_engine4 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4;
|
||||
adev->doorbell_index.sdma_engine5 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5;
|
||||
adev->doorbell_index.sdma_engine6 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6;
|
||||
adev->doorbell_index.sdma_engine7 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7;
|
||||
adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1;
|
||||
adev->doorbell_index.sdma_engine[2] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2;
|
||||
adev->doorbell_index.sdma_engine[3] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3;
|
||||
adev->doorbell_index.sdma_engine[4] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4;
|
||||
adev->doorbell_index.sdma_engine[5] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5;
|
||||
adev->doorbell_index.sdma_engine[6] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6;
|
||||
adev->doorbell_index.sdma_engine[7] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7;
|
||||
adev->doorbell_index.ih = AMDGPU_VEGA20_DOORBELL_IH;
|
||||
adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1;
|
||||
adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3;
|
||||
@ -86,5 +86,6 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
|
||||
adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
|
||||
adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
|
||||
adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;
|
||||
adev->doorbell_index.sdma_doorbell_range = 20;
|
||||
}
|
||||
|
||||
|
@ -941,6 +941,69 @@ static bool vi_need_full_reset(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 clock_cntl, pc;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return false;
|
||||
|
||||
/* check if the SMC is already running */
|
||||
clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
|
||||
pc = RREG32_SMC(ixSMC_PC_C);
|
||||
if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
|
||||
(0x20100 <= pc))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs vi_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &vi_read_disabled_bios,
|
||||
@ -956,6 +1019,8 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
|
||||
.invalidate_hdp = &vi_invalidate_hdp,
|
||||
.need_full_reset = &vi_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &vi_get_pcie_usage,
|
||||
.need_reset_on_init = &vi_need_reset_on_init,
|
||||
};
|
||||
|
||||
#define CZ_REV_BRISTOL(rev) \
|
||||
@ -1726,8 +1791,8 @@ void legacy_doorbell_index_init(struct amdgpu_device *adev)
|
||||
adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL_MEC_RING6;
|
||||
adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL_MEC_RING7;
|
||||
adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL_GFX_RING0;
|
||||
adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL_sDMA_ENGINE1;
|
||||
adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL_sDMA_ENGINE0;
|
||||
adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL_sDMA_ENGINE1;
|
||||
adev->doorbell_index.ih = AMDGPU_DOORBELL_IH;
|
||||
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_MAX_ASSIGNMENT;
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
config HSA_AMD
|
||||
bool "HSA kernel driver for AMD GPU devices"
|
||||
depends on DRM_AMDGPU && X86_64
|
||||
imply AMD_IOMMU_V2
|
||||
depends on DRM_AMDGPU && (X86_64 || ARM64)
|
||||
imply AMD_IOMMU_V2 if X86_64
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Enable this if you want to use HSA features on AMD GPU devices.
|
||||
|
@ -863,6 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if CONFIG_X86_64
|
||||
static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
|
||||
uint32_t *num_entries,
|
||||
struct crat_subtype_iolink *sub_type_hdr)
|
||||
@ -905,6 +906,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
|
||||
*
|
||||
@ -920,7 +922,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
|
||||
struct crat_subtype_generic *sub_type_hdr;
|
||||
int avail_size = *size;
|
||||
int numa_node_id;
|
||||
#ifdef CONFIG_X86_64
|
||||
uint32_t entries = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
|
||||
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
|
||||
@ -982,6 +986,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
|
||||
sub_type_hdr->length);
|
||||
|
||||
/* Fill in Subtype: IO Link */
|
||||
#ifdef CONFIG_X86_64
|
||||
ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
|
||||
&entries,
|
||||
(struct crat_subtype_iolink *)sub_type_hdr);
|
||||
@ -992,6 +997,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
|
||||
|
||||
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
|
||||
sub_type_hdr->length * entries);
|
||||
#else
|
||||
pr_info("IO link not available for non x86 platforms\n");
|
||||
#endif
|
||||
|
||||
crat_table->num_domains++;
|
||||
}
|
||||
|
@ -23,22 +23,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/device.h>
|
||||
#include "kfd_priv.h"
|
||||
|
||||
static const struct kgd2kfd_calls kgd2kfd = {
|
||||
.exit = kgd2kfd_exit,
|
||||
.probe = kgd2kfd_probe,
|
||||
.device_init = kgd2kfd_device_init,
|
||||
.device_exit = kgd2kfd_device_exit,
|
||||
.interrupt = kgd2kfd_interrupt,
|
||||
.suspend = kgd2kfd_suspend,
|
||||
.resume = kgd2kfd_resume,
|
||||
.quiesce_mm = kgd2kfd_quiesce_mm,
|
||||
.resume_mm = kgd2kfd_resume_mm,
|
||||
.schedule_evict_and_restore_process =
|
||||
kgd2kfd_schedule_evict_and_restore_process,
|
||||
.pre_reset = kgd2kfd_pre_reset,
|
||||
.post_reset = kgd2kfd_post_reset,
|
||||
};
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
static int kfd_init(void)
|
||||
{
|
||||
@ -91,20 +76,10 @@ static void kfd_exit(void)
|
||||
kfd_chardev_exit();
|
||||
}
|
||||
|
||||
int kgd2kfd_init(unsigned int interface_version,
|
||||
const struct kgd2kfd_calls **g2f)
|
||||
int kgd2kfd_init()
|
||||
{
|
||||
int err;
|
||||
|
||||
err = kfd_init();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*g2f = &kgd2kfd;
|
||||
|
||||
return 0;
|
||||
return kfd_init();
|
||||
}
|
||||
EXPORT_SYMBOL(kgd2kfd_init);
|
||||
|
||||
void kgd2kfd_exit(void)
|
||||
{
|
||||
|
@ -266,14 +266,6 @@ struct kfd_dev {
|
||||
bool pci_atomic_requested;
|
||||
};
|
||||
|
||||
/* KGD2KFD callbacks */
|
||||
void kgd2kfd_exit(void);
|
||||
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
|
||||
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g);
|
||||
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
const struct kgd2kfd_shared_resources *gpu_resources);
|
||||
void kgd2kfd_device_exit(struct kfd_dev *kfd);
|
||||
|
||||
enum kfd_mempool {
|
||||
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
|
||||
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
|
||||
@ -541,11 +533,6 @@ struct qcm_process_device {
|
||||
/* Approx. time before evicting the process again */
|
||||
#define PROCESS_ACTIVE_TIME_MS 10
|
||||
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm);
|
||||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
|
||||
/* 8 byte handle containing GPU ID in the most significant 4 bytes and
|
||||
* idr_handle in the least significant 4 bytes
|
||||
*/
|
||||
@ -800,20 +787,11 @@ int kfd_numa_node_to_apic_id(int numa_node_id);
|
||||
/* Interrupts */
|
||||
int kfd_interrupt_init(struct kfd_dev *dev);
|
||||
void kfd_interrupt_exit(struct kfd_dev *dev);
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
bool interrupt_is_wanted(struct kfd_dev *dev,
|
||||
const uint32_t *ih_ring_entry,
|
||||
uint32_t *patched_ihre, bool *flag);
|
||||
|
||||
/* Power Management */
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd);
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd);
|
||||
|
||||
/* GPU reset */
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
||||
|
||||
/* amdkfd Apertures */
|
||||
int kfd_init_apertures(struct kfd_process *process);
|
||||
|
||||
|
@ -1093,8 +1093,6 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
|
||||
* the GPU device is not already present in the topology device
|
||||
* list then return NULL. This means a new topology device has to
|
||||
* be created for this GPU.
|
||||
* TODO: Rather than assiging @gpu to first topology device withtout
|
||||
* gpu attached, it will better to have more stringent check.
|
||||
*/
|
||||
static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
||||
{
|
||||
@ -1102,12 +1100,20 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
||||
struct kfd_topology_device *out_dev = NULL;
|
||||
|
||||
down_write(&topology_lock);
|
||||
list_for_each_entry(dev, &topology_device_list, list)
|
||||
list_for_each_entry(dev, &topology_device_list, list) {
|
||||
/* Discrete GPUs need their own topology device list
|
||||
* entries. Don't assign them to CPU/APU nodes.
|
||||
*/
|
||||
if (!gpu->device_info->needs_iommu_device &&
|
||||
dev->node_props.cpu_cores_count)
|
||||
continue;
|
||||
|
||||
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
|
||||
dev->gpu = gpu;
|
||||
out_dev = dev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
up_write(&topology_lock);
|
||||
return out_dev;
|
||||
}
|
||||
@ -1392,7 +1398,6 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
|
||||
|
||||
static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
|
||||
{
|
||||
const struct cpuinfo_x86 *cpuinfo;
|
||||
int first_cpu_of_numa_node;
|
||||
|
||||
if (!cpumask || cpumask == cpu_none_mask)
|
||||
@ -1400,9 +1405,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
|
||||
first_cpu_of_numa_node = cpumask_first(cpumask);
|
||||
if (first_cpu_of_numa_node >= nr_cpu_ids)
|
||||
return -1;
|
||||
cpuinfo = &cpu_data(first_cpu_of_numa_node);
|
||||
|
||||
return cpuinfo->apicid;
|
||||
#ifdef CONFIG_X86_64
|
||||
return cpu_data(first_cpu_of_numa_node).apicid;
|
||||
#else
|
||||
return first_cpu_of_numa_node;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -64,8 +64,10 @@ amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name,
|
||||
|
||||
int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
|
||||
{
|
||||
struct amdgpu_device *adev = crtc->dev->dev_private;
|
||||
struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state);
|
||||
struct dc_stream_state *stream_state = crtc_state->stream;
|
||||
bool enable;
|
||||
|
||||
enum amdgpu_dm_pipe_crc_source source = dm_parse_crc_source(src_name);
|
||||
|
||||
@ -80,29 +82,33 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* When enabling CRC, we should also disable dithering. */
|
||||
if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) {
|
||||
if (dc_stream_configure_crc(stream_state->ctx->dc,
|
||||
stream_state,
|
||||
true, true)) {
|
||||
crtc_state->crc_enabled = true;
|
||||
dc_stream_set_dither_option(stream_state,
|
||||
DITHER_OPTION_TRUN8);
|
||||
}
|
||||
else
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (dc_stream_configure_crc(stream_state->ctx->dc,
|
||||
stream_state,
|
||||
false, false)) {
|
||||
crtc_state->crc_enabled = false;
|
||||
dc_stream_set_dither_option(stream_state,
|
||||
DITHER_OPTION_DEFAULT);
|
||||
}
|
||||
else
|
||||
return -EINVAL;
|
||||
enable = (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO);
|
||||
|
||||
mutex_lock(&adev->dm.dc_lock);
|
||||
if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
|
||||
enable, enable)) {
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* When enabling CRC, we should also disable dithering. */
|
||||
dc_stream_set_dither_option(stream_state,
|
||||
enable ? DITHER_OPTION_TRUN8
|
||||
: DITHER_OPTION_DEFAULT);
|
||||
|
||||
mutex_unlock(&adev->dm.dc_lock);
|
||||
|
||||
/*
|
||||
* Reading the CRC requires the vblank interrupt handler to be
|
||||
* enabled. Keep a reference until CRC capture stops.
|
||||
*/
|
||||
if (!crtc_state->crc_enabled && enable)
|
||||
drm_crtc_vblank_get(crtc);
|
||||
else if (crtc_state->crc_enabled && !enable)
|
||||
drm_crtc_vblank_put(crtc);
|
||||
|
||||
crtc_state->crc_enabled = enable;
|
||||
|
||||
/* Reset crc_skipped on dm state */
|
||||
crtc_state->crc_skip_count = 0;
|
||||
return 0;
|
||||
|
@ -783,6 +783,45 @@ static ssize_t dtn_log_write(
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight at this moment. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*/
|
||||
static int current_backlight_read(struct seq_file *m, void *data)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)m->private;
|
||||
struct drm_device *dev = node->minor->dev;
|
||||
struct amdgpu_device *adev = dev->dev_private;
|
||||
struct dc *dc = adev->dm.dc;
|
||||
unsigned int backlight = dc_get_current_backlight_pwm(dc);
|
||||
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backlight value that is being approached. Read only.
|
||||
* As written to display, taking ABM and backlight lut into account.
|
||||
* Ranges from 0x0 to 0x10000 (= 100% PWM)
|
||||
*/
|
||||
static int target_backlight_read(struct seq_file *m, void *data)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)m->private;
|
||||
struct drm_device *dev = node->minor->dev;
|
||||
struct amdgpu_device *adev = dev->dev_private;
|
||||
struct dc *dc = adev->dm.dc;
|
||||
unsigned int backlight = dc_get_target_backlight_pwm(dc);
|
||||
|
||||
seq_printf(m, "0x%x\n", backlight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct drm_info_list amdgpu_dm_debugfs_list[] = {
|
||||
{"amdgpu_current_backlight_pwm", ¤t_backlight_read},
|
||||
{"amdgpu_target_backlight_pwm", &target_backlight_read},
|
||||
};
|
||||
|
||||
int dtn_debugfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
static const struct file_operations dtn_log_fops = {
|
||||
@ -793,9 +832,15 @@ int dtn_debugfs_init(struct amdgpu_device *adev)
|
||||
};
|
||||
|
||||
struct drm_minor *minor = adev->ddev->primary;
|
||||
struct dentry *root = minor->debugfs_root;
|
||||
struct dentry *ent, *root = minor->debugfs_root;
|
||||
int ret;
|
||||
|
||||
struct dentry *ent = debugfs_create_file(
|
||||
ret = amdgpu_debugfs_add_files(adev, amdgpu_dm_debugfs_list,
|
||||
ARRAY_SIZE(amdgpu_dm_debugfs_list));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ent = debugfs_create_file(
|
||||
"amdgpu_dm_dtn_log",
|
||||
0644,
|
||||
root,
|
||||
|
@ -192,7 +192,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
|
||||
int bpp = 0;
|
||||
int pbn = 0;
|
||||
|
||||
aconnector = stream->sink->priv;
|
||||
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
|
||||
|
||||
if (!aconnector || !aconnector->mst_port)
|
||||
return false;
|
||||
@ -205,7 +205,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
|
||||
mst_port = aconnector->port;
|
||||
|
||||
if (enable) {
|
||||
clock = stream->timing.pix_clk_khz;
|
||||
clock = stream->timing.pix_clk_100hz / 10;
|
||||
|
||||
switch (stream->timing.display_color_depth) {
|
||||
|
||||
@ -263,6 +263,13 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* poll pending down reply before clear payload allocation table
|
||||
*/
|
||||
void dm_helpers_dp_mst_poll_pending_down_reply(
|
||||
struct dc_context *ctx,
|
||||
const struct dc_link *link)
|
||||
{}
|
||||
|
||||
/*
|
||||
* Clear payload allocation table before enable MST DP link.
|
||||
@ -284,7 +291,7 @@ bool dm_helpers_dp_mst_poll_for_allocation_change_trigger(
|
||||
struct drm_dp_mst_topology_mgr *mst_mgr;
|
||||
int ret;
|
||||
|
||||
aconnector = stream->sink->priv;
|
||||
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
|
||||
|
||||
if (!aconnector || !aconnector->mst_port)
|
||||
return false;
|
||||
@ -312,7 +319,7 @@ bool dm_helpers_dp_mst_send_payload_allocation(
|
||||
struct drm_dp_mst_port *mst_port;
|
||||
int ret;
|
||||
|
||||
aconnector = stream->sink->priv;
|
||||
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
|
||||
|
||||
if (!aconnector || !aconnector->mst_port)
|
||||
return false;
|
||||
|
@ -35,6 +35,8 @@
|
||||
|
||||
#include "dc_link_ddc.h"
|
||||
|
||||
#include "i2caux_interface.h"
|
||||
|
||||
/* #define TRACE_DPCD */
|
||||
|
||||
#ifdef TRACE_DPCD
|
||||
@ -81,80 +83,24 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
|
||||
struct drm_dp_aux_msg *msg)
|
||||
{
|
||||
ssize_t result = 0;
|
||||
enum i2caux_transaction_action action;
|
||||
enum aux_transaction_type type;
|
||||
struct aux_payload payload;
|
||||
|
||||
if (WARN_ON(msg->size > 16))
|
||||
return -E2BIG;
|
||||
|
||||
switch (msg->request & ~DP_AUX_I2C_MOT) {
|
||||
case DP_AUX_NATIVE_READ:
|
||||
type = AUX_TRANSACTION_TYPE_DP;
|
||||
action = I2CAUX_TRANSACTION_ACTION_DP_READ;
|
||||
payload.address = msg->address;
|
||||
payload.data = msg->buffer;
|
||||
payload.length = msg->size;
|
||||
payload.reply = &msg->reply;
|
||||
payload.i2c_over_aux = (msg->request & DP_AUX_NATIVE_WRITE) == 0;
|
||||
payload.write = (msg->request & DP_AUX_I2C_READ) == 0;
|
||||
payload.mot = (msg->request & DP_AUX_I2C_MOT) != 0;
|
||||
payload.defer_delay = 0;
|
||||
|
||||
result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service,
|
||||
msg->address,
|
||||
&msg->reply,
|
||||
msg->buffer,
|
||||
msg->size,
|
||||
type,
|
||||
action);
|
||||
break;
|
||||
case DP_AUX_NATIVE_WRITE:
|
||||
type = AUX_TRANSACTION_TYPE_DP;
|
||||
action = I2CAUX_TRANSACTION_ACTION_DP_WRITE;
|
||||
result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, &payload);
|
||||
|
||||
dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service,
|
||||
msg->address,
|
||||
&msg->reply,
|
||||
msg->buffer,
|
||||
msg->size,
|
||||
type,
|
||||
action);
|
||||
if (payload.write)
|
||||
result = msg->size;
|
||||
break;
|
||||
case DP_AUX_I2C_READ:
|
||||
type = AUX_TRANSACTION_TYPE_I2C;
|
||||
if (msg->request & DP_AUX_I2C_MOT)
|
||||
action = I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT;
|
||||
else
|
||||
action = I2CAUX_TRANSACTION_ACTION_I2C_READ;
|
||||
|
||||
result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service,
|
||||
msg->address,
|
||||
&msg->reply,
|
||||
msg->buffer,
|
||||
msg->size,
|
||||
type,
|
||||
action);
|
||||
break;
|
||||
case DP_AUX_I2C_WRITE:
|
||||
type = AUX_TRANSACTION_TYPE_I2C;
|
||||
if (msg->request & DP_AUX_I2C_MOT)
|
||||
action = I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT;
|
||||
else
|
||||
action = I2CAUX_TRANSACTION_ACTION_I2C_WRITE;
|
||||
|
||||
dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service,
|
||||
msg->address,
|
||||
&msg->reply,
|
||||
msg->buffer,
|
||||
msg->size,
|
||||
type,
|
||||
action);
|
||||
result = msg->size;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#ifdef TRACE_DPCD
|
||||
log_dpcd(msg->request,
|
||||
msg->address,
|
||||
msg->buffer,
|
||||
msg->size,
|
||||
r == DDC_RESULT_SUCESSFULL);
|
||||
#endif
|
||||
|
||||
if (result < 0) /* DC doesn't know about kernel error codes */
|
||||
result = -EIO;
|
||||
@ -228,6 +174,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
|
||||
aconnector->edid = edid;
|
||||
}
|
||||
|
||||
if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) {
|
||||
dc_sink_release(aconnector->dc_sink);
|
||||
aconnector->dc_sink = NULL;
|
||||
}
|
||||
|
||||
if (!aconnector->dc_sink) {
|
||||
struct dc_sink *dc_sink;
|
||||
struct dc_sink_init_data init_params = {
|
||||
|
@ -559,6 +559,58 @@ void pp_rv_set_pme_wa_enable(struct pp_smu *pp)
|
||||
pp_funcs->notify_smu_enable_pwe(pp_handle);
|
||||
}
|
||||
|
||||
void pp_rv_set_active_display_count(struct pp_smu *pp, int count)
|
||||
{
|
||||
const struct dc_context *ctx = pp->dm;
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs || !pp_funcs->set_active_display_count)
|
||||
return;
|
||||
|
||||
pp_funcs->set_active_display_count(pp_handle, count);
|
||||
}
|
||||
|
||||
void pp_rv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int clock)
|
||||
{
|
||||
const struct dc_context *ctx = pp->dm;
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs || !pp_funcs->set_min_deep_sleep_dcefclk)
|
||||
return;
|
||||
|
||||
pp_funcs->set_min_deep_sleep_dcefclk(pp_handle, clock);
|
||||
}
|
||||
|
||||
void pp_rv_set_hard_min_dcefclk_by_freq(struct pp_smu *pp, int clock)
|
||||
{
|
||||
const struct dc_context *ctx = pp->dm;
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs || !pp_funcs->set_hard_min_dcefclk_by_freq)
|
||||
return;
|
||||
|
||||
pp_funcs->set_hard_min_dcefclk_by_freq(pp_handle, clock);
|
||||
}
|
||||
|
||||
void pp_rv_set_hard_min_fclk_by_freq(struct pp_smu *pp, int mhz)
|
||||
{
|
||||
const struct dc_context *ctx = pp->dm;
|
||||
struct amdgpu_device *adev = ctx->driver_context;
|
||||
void *pp_handle = adev->powerplay.pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
|
||||
|
||||
if (!pp_funcs || !pp_funcs->set_hard_min_fclk_by_freq)
|
||||
return;
|
||||
|
||||
pp_funcs->set_hard_min_fclk_by_freq(pp_handle, mhz);
|
||||
}
|
||||
|
||||
void dm_pp_get_funcs_rv(
|
||||
struct dc_context *ctx,
|
||||
struct pp_smu_funcs_rv *funcs)
|
||||
@ -567,4 +619,9 @@ void dm_pp_get_funcs_rv(
|
||||
funcs->set_display_requirement = pp_rv_set_display_requirement;
|
||||
funcs->set_wm_ranges = pp_rv_set_wm_ranges;
|
||||
funcs->set_pme_wa_enable = pp_rv_set_pme_wa_enable;
|
||||
funcs->set_display_count = pp_rv_set_active_display_count;
|
||||
funcs->set_min_deep_sleep_dcfclk = pp_rv_set_min_deep_sleep_dcfclk;
|
||||
funcs->set_hard_min_dcfclk_by_freq = pp_rv_set_hard_min_dcefclk_by_freq;
|
||||
funcs->set_hard_min_fclk_by_freq = pp_rv_set_hard_min_fclk_by_freq;
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
# Makefile for Display Core (dc) component.
|
||||
#
|
||||
|
||||
DC_LIBS = basics bios calcs dce gpio i2caux irq virtual
|
||||
DC_LIBS = basics bios calcs dce gpio irq virtual
|
||||
|
||||
ifdef CONFIG_DRM_AMD_DC_DCN1_0
|
||||
DC_LIBS += dcn10 dml
|
||||
@ -41,7 +41,8 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
|
||||
include $(AMD_DC)
|
||||
|
||||
DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
|
||||
dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o
|
||||
dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
|
||||
dc_vm_helper.o
|
||||
|
||||
AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE))
|
||||
|
||||
|
@ -835,18 +835,6 @@ static enum bp_result bios_parser_enable_crtc(
|
||||
return bp->cmd_tbl.enable_crtc(bp, id, enable);
|
||||
}
|
||||
|
||||
static enum bp_result bios_parser_crtc_source_select(
|
||||
struct dc_bios *dcb,
|
||||
struct bp_crtc_source_select *bp_params)
|
||||
{
|
||||
struct bios_parser *bp = BP_FROM_DCB(dcb);
|
||||
|
||||
if (!bp->cmd_tbl.select_crtc_source)
|
||||
return BP_RESULT_FAILURE;
|
||||
|
||||
return bp->cmd_tbl.select_crtc_source(bp, bp_params);
|
||||
}
|
||||
|
||||
static enum bp_result bios_parser_enable_disp_power_gating(
|
||||
struct dc_bios *dcb,
|
||||
enum controller_id controller_id,
|
||||
@ -2842,8 +2830,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
|
||||
|
||||
.program_crtc_timing = bios_parser_program_crtc_timing, /* still use. should probably retire and program directly */
|
||||
|
||||
.crtc_source_select = bios_parser_crtc_source_select, /* still use. should probably retire and program directly */
|
||||
|
||||
.program_display_engine_pll = bios_parser_program_display_engine_pll,
|
||||
|
||||
.enable_disp_power_gating = bios_parser_enable_disp_power_gating,
|
||||
|
@ -1083,18 +1083,6 @@ static enum bp_result bios_parser_enable_crtc(
|
||||
return bp->cmd_tbl.enable_crtc(bp, id, enable);
|
||||
}
|
||||
|
||||
static enum bp_result bios_parser_crtc_source_select(
|
||||
struct dc_bios *dcb,
|
||||
struct bp_crtc_source_select *bp_params)
|
||||
{
|
||||
struct bios_parser *bp = BP_FROM_DCB(dcb);
|
||||
|
||||
if (!bp->cmd_tbl.select_crtc_source)
|
||||
return BP_RESULT_FAILURE;
|
||||
|
||||
return bp->cmd_tbl.select_crtc_source(bp, bp_params);
|
||||
}
|
||||
|
||||
static enum bp_result bios_parser_enable_disp_power_gating(
|
||||
struct dc_bios *dcb,
|
||||
enum controller_id controller_id,
|
||||
@ -1899,8 +1887,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
|
||||
|
||||
.is_accelerated_mode = bios_parser_is_accelerated_mode,
|
||||
|
||||
.is_active_display = bios_is_active_display,
|
||||
|
||||
.set_scratch_critical_state = bios_parser_set_scratch_critical_state,
|
||||
|
||||
|
||||
@ -1917,8 +1903,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
|
||||
|
||||
.program_crtc_timing = bios_parser_program_crtc_timing,
|
||||
|
||||
.crtc_source_select = bios_parser_crtc_source_select,
|
||||
|
||||
.enable_disp_power_gating = bios_parser_enable_disp_power_gating,
|
||||
|
||||
.bios_parser_destroy = firmware_parser_destroy,
|
||||
|
@ -83,101 +83,7 @@ uint32_t bios_get_vga_enabled_displays(
|
||||
{
|
||||
uint32_t active_disp = 1;
|
||||
|
||||
if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/
|
||||
active_disp = REG_READ(BIOS_SCRATCH_3) & 0XFFFF;
|
||||
active_disp = REG_READ(BIOS_SCRATCH_3) & 0XFFFF;
|
||||
return active_disp;
|
||||
}
|
||||
|
||||
bool bios_is_active_display(
|
||||
struct dc_bios *bios,
|
||||
enum signal_type signal,
|
||||
const struct connector_device_tag_info *device_tag)
|
||||
{
|
||||
uint32_t active = 0;
|
||||
uint32_t connected = 0;
|
||||
uint32_t bios_scratch_0 = 0;
|
||||
uint32_t bios_scratch_3 = 0;
|
||||
|
||||
switch (signal) {
|
||||
case SIGNAL_TYPE_DVI_SINGLE_LINK:
|
||||
case SIGNAL_TYPE_DVI_DUAL_LINK:
|
||||
case SIGNAL_TYPE_HDMI_TYPE_A:
|
||||
case SIGNAL_TYPE_DISPLAY_PORT:
|
||||
case SIGNAL_TYPE_DISPLAY_PORT_MST:
|
||||
{
|
||||
if (device_tag->dev_id.device_type == DEVICE_TYPE_DFP) {
|
||||
switch (device_tag->dev_id.enum_id) {
|
||||
case 1:
|
||||
{
|
||||
active = ATOM_S3_DFP1_ACTIVE;
|
||||
connected = 0x0008; //ATOM_DISPLAY_DFP1_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
active = ATOM_S3_DFP2_ACTIVE;
|
||||
connected = 0x0080; //ATOM_DISPLAY_DFP2_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
case 3:
|
||||
{
|
||||
active = ATOM_S3_DFP3_ACTIVE;
|
||||
connected = 0x0200; //ATOM_DISPLAY_DFP3_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
{
|
||||
active = ATOM_S3_DFP4_ACTIVE;
|
||||
connected = 0x0400; //ATOM_DISPLAY_DFP4_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
case 5:
|
||||
{
|
||||
active = ATOM_S3_DFP5_ACTIVE;
|
||||
connected = 0x0800; //ATOM_DISPLAY_DFP5_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
case 6:
|
||||
{
|
||||
active = ATOM_S3_DFP6_ACTIVE;
|
||||
connected = 0x0040; //ATOM_DISPLAY_DFP6_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case SIGNAL_TYPE_LVDS:
|
||||
case SIGNAL_TYPE_EDP:
|
||||
{
|
||||
active = ATOM_S3_LCD1_ACTIVE;
|
||||
connected = 0x0002; //ATOM_DISPLAY_LCD1_CONNECT
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (bios->regs->BIOS_SCRATCH_0) /*follow up with other asic, todo*/
|
||||
bios_scratch_0 = REG_READ(BIOS_SCRATCH_0);
|
||||
if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/
|
||||
bios_scratch_3 = REG_READ(BIOS_SCRATCH_3);
|
||||
|
||||
bios_scratch_3 &= ATOM_S3_DEVICE_ACTIVE_MASK;
|
||||
if ((active & bios_scratch_3) && (connected & bios_scratch_0))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user