mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 02:05:33 +00:00
Merge branch 'drm-next' of https://gitlab.freedesktop.org/drm/kernel.git
# Conflicts: # drivers/gpu/drm/display/drm_bridge_connector.c # drivers/gpu/drm/i915/display/intel_display_driver.c # drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
This commit is contained in:
commit
3f4ae5e4fd
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
@ -0,0 +1,281 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=========
|
||||
AMD NPU
|
||||
=========
|
||||
|
||||
:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
|
||||
:Author: Sonal Santan <sonal.santan@amd.com>
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
|
||||
integrated into AMD client APU. NPU enables efficient execution of Machine
|
||||
Learning applications like CNN, LLM, etc. NPU is based on
|
||||
`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
|
||||
|
||||
|
||||
Hardware Description
|
||||
====================
|
||||
|
||||
AMD NPU consists of the following hardware components:
|
||||
|
||||
AMD XDNA Array
|
||||
--------------
|
||||
|
||||
AMD XDNA Array comprises of 2D array of compute and memory tiles built with
|
||||
`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
|
||||
row of memory tile. Each compute tile contains a VLIW processor with its own
|
||||
dedicated program and data memory. The memory tile acts as L2 memory. The 2D
|
||||
array can be partitioned at a column boundary creating a spatially isolated
|
||||
partition which can be bound to a workload context.
|
||||
|
||||
Each column also has dedicated DMA engines to move data between host DDR and
|
||||
memory tile.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
|
||||
compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
|
||||
topology, i.e., 4 rows of compute tiles arranged into 8 columns.
|
||||
|
||||
Shared L2 Memory
|
||||
----------------
|
||||
|
||||
The single row of memory tiles create a pool of software managed on chip L2
|
||||
memory. DMA engines are used to move data between host DDR and memory tiles.
|
||||
AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
|
||||
AMD Strix Point NPU has a total of 4096 KB of L2 memory.
|
||||
|
||||
Microcontroller
|
||||
---------------
|
||||
|
||||
A microcontroller runs NPU Firmware which is responsible for command processing,
|
||||
XDNA Array partition setup, XDNA Array configuration, workload context
|
||||
management and workload orchestration.
|
||||
|
||||
NPU Firmware uses a dedicated instance of an isolated non-privileged context
|
||||
called ERT to service each workload context. ERT is also used to execute user
|
||||
provided ``ctrlcode`` associated with the workload context.
|
||||
|
||||
NPU Firmware uses a single isolated privileged context called MERT to service
|
||||
management commands from the amdxdna driver.
|
||||
|
||||
Mailboxes
|
||||
---------
|
||||
|
||||
The microcontroller and amdxdna driver use a privileged channel for management
|
||||
tasks like setting up of contexts, telemetry, query, error handling, setting up
|
||||
user channel, etc. As mentioned before, privileged channel requests are
|
||||
serviced by MERT. The privileged channel is bound to a single mailbox.
|
||||
|
||||
The microcontroller and amdxdna driver use a dedicated user channel per
|
||||
workload context. The user channel is primarily used for submitting work to
|
||||
the NPU. As mentioned before, a user channel requests are serviced by an
|
||||
instance of ERT. Each user channel is bound to its own dedicated mailbox.
|
||||
|
||||
PCIe EP
|
||||
-------
|
||||
|
||||
NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
|
||||
MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
|
||||
for reading or writing into host memory. Each instance of ERT gets its own
|
||||
dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
|
||||
|
||||
The number of PCIe BARs varies depending on the specific device. Based on their
|
||||
functions, PCIe BARs can generally be categorized into the following types.
|
||||
|
||||
* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
|
||||
* SMU BAR: Expose the AMD SMU (System Management Unit) function
|
||||
* SRAM BAR: Expose ring buffers for the mailbox
|
||||
* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
|
||||
registers etc.)
|
||||
* Public Register BAR: Expose public registers
|
||||
|
||||
On specific devices, the above-mentioned BAR type might be combined into a
|
||||
single physical PCIe BAR. Or a module might require two physical PCIe BARs to
|
||||
be fully functional. For example,
|
||||
|
||||
* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
|
||||
* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
|
||||
index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
|
||||
and PCIe BAR index 4 (PSP BAR).
|
||||
|
||||
Process Isolation Hardware
|
||||
--------------------------
|
||||
|
||||
As explained before, XDNA Array can be dynamically divided into isolated
|
||||
spatial partitions, each of which may have one or more columns. The spatial
|
||||
partition is setup by programming the column isolation registers by the
|
||||
microcontroller. Each spatial partition is associated with a PASID which is
|
||||
also programmed by the microcontroller. Hence multiple spatial partitions in
|
||||
the NPU can make concurrent host access protected by PASID.
|
||||
|
||||
The NPU FW itself uses microcontroller MMU enforced isolated contexts for
|
||||
servicing user and privileged channel requests.
|
||||
|
||||
|
||||
Mixed Spatial and Temporal Scheduling
|
||||
=====================================
|
||||
|
||||
AMD XDNA architecture supports mixed spatial and temporal (time sharing)
|
||||
scheduling of 2D array. This means that spatial partitions may be setup and
|
||||
torn down dynamically to accommodate various workloads. A *spatial* partition
|
||||
may be *exclusively* bound to one workload context while another partition may
|
||||
be *temporarily* bound to more than one workload contexts. The microcontroller
|
||||
updates the PASID for a temporarily shared partition to match the context that
|
||||
has been bound to the partition at any moment.
|
||||
|
||||
Resource Solver
|
||||
---------------
|
||||
|
||||
The Resource Solver component of the amdxdna driver manages the allocation
|
||||
of 2D array among various workloads. Every workload describes the number
|
||||
of columns required to run the NPU binary in its metadata. The Resource Solver
|
||||
component uses hints passed by the workload and its own heuristics to
|
||||
decide 2D array (re)partition strategy and mapping of workloads for spatial and
|
||||
temporal sharing of columns. The FW enforces the context-to-column(s) resource
|
||||
binding decisions made by the Resource Solver.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
|
||||
contexts. AMD Strix Point can support 16 concurrent workload contexts.
|
||||
|
||||
|
||||
Application Binaries
|
||||
====================
|
||||
|
||||
A NPU application workload is comprised of two separate binaries which are
|
||||
generated by the NPU compiler.
|
||||
|
||||
1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
|
||||
The overlay contains instructions for setting up the stream switch
|
||||
configuration and ELF for the compute tiles. The overlay is loaded on the
|
||||
spatial partition bound to the workload by the associated ERT instance.
|
||||
Refer to the
|
||||
`Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
|
||||
|
||||
2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
|
||||
partition. ``ctrlcode`` is executed by the ERT running in protected mode on
|
||||
the microcontroller in the context of the workload. ``ctrlcode`` is made up
|
||||
of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
|
||||
`AI Engine Run Time`_ for more details.
|
||||
|
||||
|
||||
Special Host Buffers
|
||||
====================
|
||||
|
||||
Per-context Instruction Buffer
|
||||
------------------------------
|
||||
|
||||
Every workload context uses a host resident 64 MB buffer which is memory
|
||||
mapped into the ERT instance created to service the workload. The ``ctrlcode``
|
||||
used by the workload is copied into this special memory. This buffer is
|
||||
protected by PASID like all other input/output buffers used by that workload.
|
||||
Instruction buffer is also mapped into the user space of the workload.
|
||||
|
||||
Global Privileged Buffer
|
||||
------------------------
|
||||
|
||||
In addition, the driver also allocates a single buffer for maintenance tasks
|
||||
like recording errors from MERT. This global buffer uses the global IOMMU
|
||||
domain and is only accessible by MERT.
|
||||
|
||||
|
||||
High-level Use Flow
|
||||
===================
|
||||
|
||||
Here are the steps to run a workload on AMD NPU:
|
||||
|
||||
1. Compile the workload into an overlay and a ``ctrlcode`` binary.
|
||||
2. Userspace opens a context in the driver and provides the overlay.
|
||||
3. The driver checks with the Resource Solver for provisioning a set of columns
|
||||
for the workload.
|
||||
4. The driver then asks MERT to create a context on the device with the desired
|
||||
columns.
|
||||
5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
|
||||
into ERT memory.
|
||||
6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
|
||||
7. Userspace then creates a command buffer with pointers to input, output, and
|
||||
instruction buffer; it then submits command buffer with the driver and goes
|
||||
to sleep waiting for completion.
|
||||
8. The driver sends the command over the Mailbox to ERT.
|
||||
9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
|
||||
10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
|
||||
AMD XDNA Array is running.
|
||||
11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
|
||||
signal to the driver which then wakes up the waiting workload.
|
||||
|
||||
|
||||
Boot Flow
|
||||
=========
|
||||
|
||||
amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
|
||||
of the NPU microcontroller. amdxdna driver then waits for the alive signal in
|
||||
a special location on BAR 0. The NPU is switched off during SoC suspend and
|
||||
turned on after resume where the NPU FW is reloaded, and the handshake is
|
||||
performed again.
|
||||
|
||||
|
||||
Userspace components
|
||||
====================
|
||||
|
||||
Compiler
|
||||
--------
|
||||
|
||||
Peano is an LLVM based open-source compiler for AMD XDNA Array compute tile
|
||||
available at:
|
||||
https://github.com/Xilinx/llvm-aie
|
||||
|
||||
The open-source IREE compiler supports graph compilation of ML models for AMD
|
||||
NPU and uses Peano underneath. It is available at:
|
||||
https://github.com/nod-ai/iree-amd-aie
|
||||
|
||||
Usermode Driver (UMD)
|
||||
---------------------
|
||||
|
||||
The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
|
||||
can be found at:
|
||||
https://github.com/Xilinx/XRT
|
||||
|
||||
The open-source XRT shim for NPU is can be found at:
|
||||
https://github.com/amd/xdna-driver
|
||||
|
||||
|
||||
DMA Operation
|
||||
=============
|
||||
|
||||
DMA operation instructions are encoded in the ``ctrlcode`` as
|
||||
``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
|
||||
operations between host DDR and L2 memory are effected.
|
||||
|
||||
|
||||
Error Handling
|
||||
==============
|
||||
|
||||
When MERT detects an error in AMD XDNA Array, it pauses execution for that
|
||||
workload context and sends an asynchronous message to the driver over the
|
||||
privileged channel. The driver then sends a buffer pointer to MERT to capture
|
||||
the register states for the partition bound to faulting workload context. The
|
||||
driver then decodes the error by reading the contents of the buffer pointer.
|
||||
|
||||
|
||||
Telemetry
|
||||
=========
|
||||
|
||||
MERT can report various kinds of telemetry information like the following:
|
||||
|
||||
* L1 interrupt counter
|
||||
* DMA counter
|
||||
* Deep Sleep counter
|
||||
* etc.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
|
||||
- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
|
||||
- `Peano <https://github.com/Xilinx/llvm-aie>`_
|
||||
- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
|
||||
- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_
|
11
Documentation/accel/amdxdna/index.rst
Normal file
11
Documentation/accel/amdxdna/index.rst
Normal file
@ -0,0 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
=====================================
|
||||
accel/amdxdna NPU driver
|
||||
=====================================
|
||||
|
||||
The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
|
||||
|
||||
.. toctree::
|
||||
|
||||
amdnpu
|
@ -8,6 +8,7 @@ Compute Accelerators
|
||||
:maxdepth: 1
|
||||
|
||||
introduction
|
||||
amdxdna/index
|
||||
qaic/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||
5-6. Device
|
||||
5-7. RDMA
|
||||
5-7-1. RDMA Interface Files
|
||||
5-8. HugeTLB
|
||||
5.8-1. HugeTLB Interface Files
|
||||
5-9. Misc
|
||||
5.9-1 Miscellaneous cgroup Interface Files
|
||||
5.9-2 Migration and Ownership
|
||||
5-10. Others
|
||||
5-10-1. perf_event
|
||||
5-8. DMEM
|
||||
5-9. HugeTLB
|
||||
5.9-1. HugeTLB Interface Files
|
||||
5-10. Misc
|
||||
5.10-1 Miscellaneous cgroup Interface Files
|
||||
5.10-2 Migration and Ownership
|
||||
5-11. Others
|
||||
5-11-1. perf_event
|
||||
5-N. Non-normative information
|
||||
5-N-1. CPU controller root cgroup process behaviour
|
||||
5-N-2. IO controller root cgroup process behaviour
|
||||
@ -2626,6 +2627,49 @@ RDMA Interface Files
|
||||
mlx4_0 hca_handle=1 hca_object=20
|
||||
ocrdma1 hca_handle=1 hca_object=23
|
||||
|
||||
DMEM
|
||||
----
|
||||
|
||||
The "dmem" controller regulates the distribution and accounting of
|
||||
device memory regions. Because each memory region may have its own page size,
|
||||
which does not have to be equal to the system page size, the units are always bytes.
|
||||
|
||||
DMEM Interface Files
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
dmem.max, dmem.min, dmem.low
|
||||
A readwrite nested-keyed file that exists for all the cgroups
|
||||
except root that describes current configured resource limit
|
||||
for a region.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 1073741824
|
||||
drm/0000:03:00.0/stolen max
|
||||
|
||||
The semantics are the same as for the memory cgroup controller, and are
|
||||
calculated in the same way.
|
||||
|
||||
dmem.capacity
|
||||
A read-only file that describes maximum region capacity.
|
||||
It only exists on the root cgroup. Not all memory can be
|
||||
allocated by cgroups, as the kernel reserves some for
|
||||
internal use.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 8514437120
|
||||
drm/0000:03:00.0/stolen 67108864
|
||||
|
||||
dmem.current
|
||||
A read-only file that describes current resource usage.
|
||||
It exists for all the cgroup except root.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 12550144
|
||||
drm/0000:03:00.0/stolen 8650752
|
||||
|
||||
HugeTLB
|
||||
-------
|
||||
|
||||
|
9
Documentation/core-api/cgroup.rst
Normal file
9
Documentation/core-api/cgroup.rst
Normal file
@ -0,0 +1,9 @@
|
||||
==================
|
||||
Cgroup Kernel APIs
|
||||
==================
|
||||
|
||||
Device Memory Cgroup API (dmemcg)
|
||||
=========================
|
||||
.. kernel-doc:: kernel/cgroup/dmem.c
|
||||
:export:
|
||||
|
@ -110,6 +110,7 @@ more memory-management documentation in Documentation/mm/index.rst.
|
||||
dma-isa-lpc
|
||||
swiotlb
|
||||
mm-api
|
||||
cgroup
|
||||
genalloc
|
||||
pin_user_pages
|
||||
boot-time-mm
|
||||
|
@ -14,6 +14,8 @@ properties:
|
||||
enum:
|
||||
- brcm,bcm2711-hdmi0
|
||||
- brcm,bcm2711-hdmi1
|
||||
- brcm,bcm2712-hdmi0
|
||||
- brcm,bcm2712-hdmi1
|
||||
|
||||
reg:
|
||||
items:
|
||||
|
@ -13,6 +13,7 @@ properties:
|
||||
compatible:
|
||||
enum:
|
||||
- brcm,bcm2711-hvs
|
||||
- brcm,bcm2712-hvs
|
||||
- brcm,bcm2835-hvs
|
||||
|
||||
reg:
|
||||
@ -36,7 +37,9 @@ if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: brcm,bcm2711-hvs
|
||||
enum:
|
||||
- brcm,bcm2711-hvs
|
||||
- brcm,bcm2712-hvs
|
||||
|
||||
then:
|
||||
required:
|
||||
|
@ -20,6 +20,9 @@ properties:
|
||||
- brcm,bcm2711-pixelvalve2
|
||||
- brcm,bcm2711-pixelvalve3
|
||||
- brcm,bcm2711-pixelvalve4
|
||||
- brcm,bcm2712-pixelvalve0
|
||||
- brcm,bcm2712-pixelvalve1
|
||||
- brcm,bcm2712-pixelvalve2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -11,7 +11,10 @@ maintainers:
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: brcm,bcm2835-txp
|
||||
enum:
|
||||
- brcm,bcm2712-mop
|
||||
- brcm,bcm2712-moplet
|
||||
- brcm,bcm2835-txp
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -18,6 +18,7 @@ properties:
|
||||
compatible:
|
||||
enum:
|
||||
- brcm,bcm2711-vc5
|
||||
- brcm,bcm2712-vc6
|
||||
- brcm,bcm2835-vc4
|
||||
- brcm,cygnus-vc4
|
||||
|
||||
|
@ -19,6 +19,7 @@ properties:
|
||||
enum:
|
||||
- renesas,r8a779a0-dsi-csi2-tx # for V3U
|
||||
- renesas,r8a779g0-dsi-csi2-tx # for V4H
|
||||
- renesas,r8a779h0-dsi-csi2-tx # for V4M
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -80,12 +80,12 @@ properties:
|
||||
- const: 4
|
||||
|
||||
port@2:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Video port for LVDS Channel-A output (panel or bridge).
|
||||
$ref: '#/$defs/lvds-port'
|
||||
|
||||
port@3:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Video port for LVDS Channel-B output (panel or bridge).
|
||||
$ref: '#/$defs/lvds-port'
|
||||
|
||||
required:
|
||||
- port@0
|
||||
@ -96,6 +96,36 @@ required:
|
||||
- reg
|
||||
- ports
|
||||
|
||||
$defs:
|
||||
lvds-port:
|
||||
$ref: /schemas/graph.yaml#/$defs/port-base
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
endpoint:
|
||||
$ref: /schemas/media/video-interfaces.yaml#
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
ti,lvds-termination-ohms:
|
||||
description: The value of near end differential termination in ohms.
|
||||
enum: [100, 200]
|
||||
default: 200
|
||||
|
||||
ti,lvds-vod-swing-clock-microvolt:
|
||||
description: LVDS diferential output voltage <min max> for clock
|
||||
lanes in microvolts.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
ti,lvds-vod-swing-data-microvolt:
|
||||
description: LVDS diferential output voltage <min max> for data
|
||||
lanes in microvolts.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
|
@ -8,6 +8,7 @@ title: MSM Display Port Controller
|
||||
|
||||
maintainers:
|
||||
- Kuogee Hsieh <quic_khsieh@quicinc.com>
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
|
||||
description: |
|
||||
Device tree bindings for DisplayPort host controller for MSM targets
|
||||
|
@ -30,6 +30,7 @@ properties:
|
||||
- qcom,sdm845-dsi-ctrl
|
||||
- qcom,sm6115-dsi-ctrl
|
||||
- qcom,sm6125-dsi-ctrl
|
||||
- qcom,sm6150-dsi-ctrl
|
||||
- qcom,sm6350-dsi-ctrl
|
||||
- qcom,sm6375-dsi-ctrl
|
||||
- qcom,sm7150-dsi-ctrl
|
||||
@ -349,6 +350,7 @@ allOf:
|
||||
enum:
|
||||
- qcom,sc7180-dsi-ctrl
|
||||
- qcom,sc7280-dsi-ctrl
|
||||
- qcom,sm6150-dsi-ctrl
|
||||
- qcom,sm7150-dsi-ctrl
|
||||
- qcom,sm8150-dsi-ctrl
|
||||
- qcom,sm8250-dsi-ctrl
|
||||
|
@ -20,6 +20,7 @@ properties:
|
||||
- qcom,dsi-phy-14nm-660
|
||||
- qcom,dsi-phy-14nm-8953
|
||||
- qcom,sm6125-dsi-phy-14nm
|
||||
- qcom,sm6150-dsi-phy-14nm
|
||||
|
||||
reg:
|
||||
items:
|
||||
|
@ -0,0 +1,108 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/msm/qcom,sm6150-dpu.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm SM6150 Display DPU
|
||||
|
||||
maintainers:
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
|
||||
|
||||
$ref: /schemas/display/msm/dpu-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dpu
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: Address offset and size for mdp register set
|
||||
- description: Address offset and size for vbif register set
|
||||
|
||||
reg-names:
|
||||
items:
|
||||
- const: mdp
|
||||
- const: vbif
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Display ahb clock
|
||||
- description: Display hf axi clock
|
||||
- description: Display core clock
|
||||
- description: Display vsync clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: iface
|
||||
- const: bus
|
||||
- const: core
|
||||
- const: vsync
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/qcom,rpmhpd.h>
|
||||
|
||||
display-controller@ae01000 {
|
||||
compatible = "qcom,sm6150-dpu";
|
||||
reg = <0x0ae01000 0x8f000>,
|
||||
<0x0aeb0000 0x2008>;
|
||||
reg-names = "mdp", "vbif";
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>,
|
||||
<&dispcc_mdss_vsync_clk>;
|
||||
clock-names = "iface", "bus", "core", "vsync";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_vsync_clk>;
|
||||
assigned-clock-rates = <19200000>;
|
||||
|
||||
operating-points-v2 = <&mdp_opp_table>;
|
||||
power-domains = <&rpmhpd RPMHPD_CX>;
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
dpu_intf0_out: endpoint {
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
dpu_intf1_out: endpoint {
|
||||
remote-endpoint = <&mdss_dsi0_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdp_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-19200000 {
|
||||
opp-hz = /bits/ 64 <19200000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
|
||||
opp-25600000 {
|
||||
opp-hz = /bits/ 64 <25600000>;
|
||||
required-opps = <&rpmhpd_opp_svs>;
|
||||
};
|
||||
|
||||
opp-307200000 {
|
||||
opp-hz = /bits/ 64 <307200000>;
|
||||
required-opps = <&rpmhpd_opp_nom>;
|
||||
};
|
||||
};
|
||||
};
|
||||
...
|
@ -0,0 +1,245 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/msm/qcom,sm6150-mdss.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm SM6150 Display MDSS
|
||||
|
||||
maintainers:
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
|
||||
|
||||
description:
|
||||
Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates
|
||||
sub-blocks like DPU display controller, DSI and DP interfaces etc. Device tree
|
||||
bindings of MDSS are mentioned for SM6150 target.
|
||||
|
||||
$ref: /schemas/display/msm/mdss-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: qcom,sm6150-mdss
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Display AHB clock from gcc
|
||||
- description: Display hf axi clock
|
||||
- description: Display core clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: iface
|
||||
- const: bus
|
||||
- const: core
|
||||
|
||||
iommus:
|
||||
maxItems: 1
|
||||
|
||||
interconnects:
|
||||
maxItems: 2
|
||||
|
||||
interconnect-names:
|
||||
maxItems: 2
|
||||
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: qcom,sm6150-dsi-ctrl
|
||||
- const: qcom,mdss-dsi-ctrl
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dsi-phy-14nm
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
#include <dt-bindings/interconnect/qcom,icc.h>
|
||||
#include <dt-bindings/interconnect/qcom,qcs615-rpmh.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/qcom,rpmhpd.h>
|
||||
|
||||
display-subsystem@ae00000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "qcom,sm6150-mdss";
|
||||
reg = <0x0ae00000 0x1000>;
|
||||
reg-names = "mdss";
|
||||
|
||||
interconnects = <&mmss_noc MASTER_MDP0 QCOM_ICC_TAG_ALWAYS
|
||||
&mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
|
||||
<&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY
|
||||
&config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>;
|
||||
interconnect-names = "mdp0-mem", "cpu-cfg";
|
||||
|
||||
power-domains = <&dispcc_mdss_gdsc>;
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>;
|
||||
|
||||
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
|
||||
iommus = <&apps_smmu 0x800 0x0>;
|
||||
|
||||
ranges;
|
||||
|
||||
display-controller@ae01000 {
|
||||
compatible = "qcom,sm6150-dpu";
|
||||
reg = <0x0ae01000 0x8f000>,
|
||||
<0x0aeb0000 0x2008>;
|
||||
reg-names = "mdp", "vbif";
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>,
|
||||
<&dispcc_mdss_vsync_clk>;
|
||||
clock-names = "iface", "bus", "core", "vsync";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_vsync_clk>;
|
||||
assigned-clock-rates = <19200000>;
|
||||
|
||||
operating-points-v2 = <&mdp_opp_table>;
|
||||
power-domains = <&rpmhpd RPMHPD_CX>;
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
dpu_intf0_out: endpoint {
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
dpu_intf1_out: endpoint {
|
||||
remote-endpoint = <&mdss_dsi0_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdp_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-19200000 {
|
||||
opp-hz = /bits/ 64 <19200000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
|
||||
opp-25600000 {
|
||||
opp-hz = /bits/ 64 <25600000>;
|
||||
required-opps = <&rpmhpd_opp_svs>;
|
||||
};
|
||||
|
||||
opp-307200000 {
|
||||
opp-hz = /bits/ 64 <307200000>;
|
||||
required-opps = <&rpmhpd_opp_nom>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
dsi@ae94000 {
|
||||
compatible = "qcom,sm6150-dsi-ctrl",
|
||||
"qcom,mdss-dsi-ctrl";
|
||||
reg = <0x0ae94000 0x400>;
|
||||
reg-names = "dsi_ctrl";
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <4>;
|
||||
|
||||
clocks = <&dispcc_mdss_byte0_clk>,
|
||||
<&dispcc_mdss_byte0_intf_clk>,
|
||||
<&dispcc_mdss_pclk0_clk>,
|
||||
<&dispcc_mdss_esc0_clk>,
|
||||
<&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>;
|
||||
clock-names = "byte",
|
||||
"byte_intf",
|
||||
"pixel",
|
||||
"core",
|
||||
"iface",
|
||||
"bus";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_byte0_clk_src>,
|
||||
<&dispcc_mdss_pclk0_clk_src>;
|
||||
assigned-clock-parents = <&mdss_dsi0_phy 0>,
|
||||
<&mdss_dsi0_phy 1>;
|
||||
|
||||
operating-points-v2 = <&dsi0_opp_table>;
|
||||
|
||||
phys = <&mdss_dsi0_phy>;
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
mdss_dsi0_in: endpoint {
|
||||
remote-endpoint = <&dpu_intf1_out>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
mdss_dsi0_out: endpoint {
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
dsi0_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-164000000 {
|
||||
opp-hz = /bits/ 64 <164000000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdss_dsi0_phy: phy@ae94400 {
|
||||
compatible = "qcom,sm6150-dsi-phy-14nm";
|
||||
reg = <0x0ae94400 0x100>,
|
||||
<0x0ae94500 0x300>,
|
||||
<0x0ae94800 0x188>;
|
||||
reg-names = "dsi_phy",
|
||||
"dsi_phy_lane",
|
||||
"dsi_pll";
|
||||
|
||||
#clock-cells = <1>;
|
||||
#phy-cells = <0>;
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&rpmhcc RPMH_CXO_CLK>;
|
||||
clock-names = "iface", "ref";
|
||||
};
|
||||
};
|
||||
...
|
@ -42,6 +42,8 @@ properties:
|
||||
# Admatec 9904379 10.1" 1024x600 LVDS panel
|
||||
- admatec,9904379
|
||||
- auo,b101ew05
|
||||
# AUO G084SN05 V9 8.4" 800x600 LVDS panel
|
||||
- auo,g084sn05
|
||||
# Chunghwa Picture Tubes Ltd. 7" WXGA (800x1280) TFT LCD LVDS panel
|
||||
- chunghwa,claa070wp03xg
|
||||
# EDT ETML0700Z9NDHA 7.0" WSVGA (1024x600) color TFT LCD LVDS panel
|
||||
|
@ -206,12 +206,16 @@ properties:
|
||||
- mitsubishi,aa070mc01-ca1
|
||||
# Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
|
||||
- mitsubishi,aa084xe01
|
||||
# Multi-Inno Technology Co.,Ltd MI0700A2T-30 7" 800x480 TFT Resistive Touch Module
|
||||
- multi-inno,mi0700a2t-30
|
||||
# Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
|
||||
- multi-inno,mi0700s4t-6
|
||||
# Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
|
||||
- multi-inno,mi0800ft-9
|
||||
# Multi-Inno Technology Co.,Ltd MI1010AIT-1CP 10.1" 1280x800 LVDS IPS Cap Touch Mod.
|
||||
- multi-inno,mi1010ait-1cp
|
||||
# Multi-Inno Technology Co.,Ltd MI1010Z1T-1CP11 10.1" 1024x600 TFT Resistive Touch Module
|
||||
- multi-inno,mi1010z1t-1cp11
|
||||
# NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel
|
||||
- nec,nl12880bc20-05
|
||||
# NEC LCD Technologies,Ltd. WQVGA TFT LCD panel
|
||||
@ -280,10 +284,14 @@ properties:
|
||||
- team-source-display,tst043015cmhx
|
||||
# Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070jdhg30
|
||||
# Tianma Micro-electronics TM070JDHG34-00 7.0" WXGA (1280x800) LVDS TFT LCD panel
|
||||
- tianma,tm070jdhg34-00
|
||||
# Tianma Micro-electronics TM070JVHG33 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070jvhg33
|
||||
# Tianma Micro-electronics TM070RVHG71 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070rvhg71
|
||||
# Topland TIAN-G07017-01 7.0" WSVGA TFT-LCD panel with capacitive touch
|
||||
- topland,tian-g07017-01
|
||||
# Toshiba 8.9" WXGA (1280x768) TFT LCD panel
|
||||
- toshiba,lt089ac29000
|
||||
# TPK U.S.A. LLC Fusion 7" 800 x 480 (WVGA) LCD panel with capacitive touch
|
||||
|
@ -23,6 +23,8 @@ properties:
|
||||
- samsung,atna45af01
|
||||
# Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel
|
||||
- samsung,atna45dc02
|
||||
# Samsung 15.6" 3K (2880x1620 pixels) eDP AMOLED panel
|
||||
- samsung,atna56ac03
|
||||
- const: samsung,atna33xc20
|
||||
|
||||
enable-gpios: true
|
||||
|
@ -41,6 +41,7 @@ properties:
|
||||
- renesas,du-r8a77995 # for R-Car D3 compatible DU
|
||||
- renesas,du-r8a779a0 # for R-Car V3U compatible DU
|
||||
- renesas,du-r8a779g0 # for R-Car V4H compatible DU
|
||||
- renesas,du-r8a779h0 # for R-Car V4M compatible DU
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
@ -69,14 +70,12 @@ properties:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
- port@0
|
||||
- port@1
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
renesas,cmms:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
minItems: 1
|
||||
maxItems: 4
|
||||
items:
|
||||
maxItems: 1
|
||||
description:
|
||||
@ -85,6 +84,8 @@ properties:
|
||||
|
||||
renesas,vsps:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
minItems: 1
|
||||
maxItems: 4
|
||||
items:
|
||||
items:
|
||||
- description: phandle to VSP instance that serves the DU channel
|
||||
@ -489,9 +490,11 @@ allOf:
|
||||
|
||||
renesas,cmms:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
@ -558,9 +561,11 @@ allOf:
|
||||
|
||||
renesas,cmms:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
@ -627,9 +632,11 @@ allOf:
|
||||
|
||||
renesas,cmms:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
@ -683,7 +690,7 @@ allOf:
|
||||
- port@1
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 1
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
@ -746,9 +753,11 @@ allOf:
|
||||
|
||||
renesas,cmms:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
@ -799,6 +808,54 @@ allOf:
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
- interrupts
|
||||
- resets
|
||||
- reset-names
|
||||
- renesas,vsps
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- renesas,du-r8a779h0
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: Functional clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: du.0
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
reset-names:
|
||||
items:
|
||||
- const: du.0
|
||||
|
||||
ports:
|
||||
properties:
|
||||
port@0:
|
||||
description: DSI 0
|
||||
port@1: false
|
||||
port@2: false
|
||||
port@3: false
|
||||
|
||||
required:
|
||||
- port@0
|
||||
|
||||
renesas,vsps:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
@ -0,0 +1,120 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/rockchip/rockchip,rk3588-mipi-dsi2.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Rockchip specific extensions to the Synopsys Designware MIPI DSI2
|
||||
|
||||
maintainers:
|
||||
- Heiko Stuebner <heiko@sntech.de>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- rockchip,rk3588-mipi-dsi2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 2
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: pclk
|
||||
- const: sys
|
||||
|
||||
rockchip,grf:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
This SoC uses GRF regs to switch between vopl/vopb.
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
|
||||
phy-names:
|
||||
const: dcphy
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
reset-names:
|
||||
const: apb
|
||||
|
||||
ports:
|
||||
$ref: /schemas/graph.yaml#/properties/ports
|
||||
|
||||
properties:
|
||||
port@0:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Input node to receive pixel data.
|
||||
|
||||
port@1:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: DSI output node to panel.
|
||||
|
||||
required:
|
||||
- port@0
|
||||
- port@1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- clocks
|
||||
- clock-names
|
||||
- rockchip,grf
|
||||
- phys
|
||||
- phy-names
|
||||
- ports
|
||||
- reg
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/display/dsi-controller.yaml#
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/rockchip,rk3588-cru.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
#include <dt-bindings/phy/phy.h>
|
||||
#include <dt-bindings/power/rk3588-power.h>
|
||||
#include <dt-bindings/reset/rockchip,rk3588-cru.h>
|
||||
|
||||
soc {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
|
||||
dsi@fde20000 {
|
||||
compatible = "rockchip,rk3588-mipi-dsi2";
|
||||
reg = <0x0 0xfde20000 0x0 0x10000>;
|
||||
interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH 0>;
|
||||
clocks = <&cru PCLK_DSIHOST0>, <&cru CLK_DSIHOST0>;
|
||||
clock-names = "pclk", "sys";
|
||||
resets = <&cru SRST_P_DSIHOST0>;
|
||||
reset-names = "apb";
|
||||
power-domains = <&power RK3588_PD_VOP>;
|
||||
phys = <&mipidcphy0 PHY_TYPE_DPHY>;
|
||||
phy-names = "dcphy";
|
||||
rockchip,grf = <&vop_grf>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
dsi0_in: port@0 {
|
||||
reg = <0>;
|
||||
};
|
||||
|
||||
dsi0_out: port@1 {
|
||||
reg = <1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
@ -100,12 +100,16 @@ properties:
|
||||
- description: Video layer, plane 1 (U/V or U)
|
||||
- description: Video layer, plane 2 (V)
|
||||
- description: Graphics layer
|
||||
- description: Audio channel 0
|
||||
- description: Audio channel 1
|
||||
dma-names:
|
||||
items:
|
||||
- const: vid0
|
||||
- const: vid1
|
||||
- const: vid2
|
||||
- const: gfx0
|
||||
- const: aud0
|
||||
- const: aud1
|
||||
|
||||
phys:
|
||||
description: PHYs for the DP data lanes
|
||||
@ -194,11 +198,13 @@ examples:
|
||||
power-domains = <&pd_dp>;
|
||||
resets = <&reset ZYNQMP_RESET_DP>;
|
||||
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0";
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0", "aud0", "aud1";
|
||||
dmas = <&xlnx_dpdma 0>,
|
||||
<&xlnx_dpdma 1>,
|
||||
<&xlnx_dpdma 2>,
|
||||
<&xlnx_dpdma 3>;
|
||||
<&xlnx_dpdma 3>,
|
||||
<&xlnx_dpdma 4>,
|
||||
<&xlnx_dpdma 5>;
|
||||
|
||||
phys = <&psgtr 1 PHY_TYPE_DP 0 3>,
|
||||
<&psgtr 0 PHY_TYPE_DP 1 3>;
|
||||
|
@ -1528,6 +1528,8 @@ patternProperties:
|
||||
description: Topeet
|
||||
"^topic,.*":
|
||||
description: Topic Embedded Systems
|
||||
"^topland,.*":
|
||||
description: Topland Electronics (H.K) Co., Ltd.
|
||||
"^toppoly,.*":
|
||||
description: TPO (deprecated, use tpo)
|
||||
deprecated: true
|
||||
|
54
Documentation/gpu/drm-compute.rst
Normal file
54
Documentation/gpu/drm-compute.rst
Normal file
@ -0,0 +1,54 @@
|
||||
==================================
|
||||
Long running workloads and compute
|
||||
==================================
|
||||
|
||||
Long running workloads (compute) are workloads that will not complete in 10
|
||||
seconds. (The time let the user wait before he reaches for the power button).
|
||||
This means that other techniques need to be used to manage those workloads,
|
||||
that cannot use fences.
|
||||
|
||||
Some hardware may schedule compute jobs, and have no way to pre-empt them, or
|
||||
have their memory swapped out from them. Or they simply want their workload
|
||||
not to be preempted or swapped out at all.
|
||||
|
||||
This means that it differs from what is described in driver-api/dma-buf.rst.
|
||||
|
||||
As with normal compute jobs, dma-fence may not be used at all. In this case,
|
||||
not even to force preemption. The driver with is simply forced to unmap a BO
|
||||
from the long compute job's address space on unbind immediately, not even
|
||||
waiting for the workload to complete. Effectively this terminates the workload
|
||||
when there is no hardware support to recover.
|
||||
|
||||
Since this is undesirable, there need to be mitigations to prevent a workload
|
||||
from being terminated. There are several possible approach, all with their
|
||||
advantages and drawbacks.
|
||||
|
||||
The first approach you will likely try is to pin all buffers used by compute.
|
||||
This guarantees that the job will run uninterrupted, but also allows a very
|
||||
denial of service attack by pinning as much memory as possible, hogging the
|
||||
all GPU memory, and possibly a huge chunk of CPU memory.
|
||||
|
||||
A second approach that will work slightly better on its own is adding an option
|
||||
not to evict when creating a new job (any kind). If all of userspace opts in
|
||||
to this flag, it would prevent cooperating userspace from forced terminating
|
||||
older compute jobs to start a new one.
|
||||
|
||||
If job preemption and recoverable pagefaults are not available, those are the
|
||||
only approaches possible. So even with those, you want a separate way of
|
||||
controlling resources. The standard kernel way of doing so is cgroups.
|
||||
|
||||
This creates a third option, using cgroups to prevent eviction. Both GPU and
|
||||
driver-allocated CPU memory would be accounted to the correct cgroup, and
|
||||
eviction would be made cgroup aware. This allows the GPU to be partitioned
|
||||
into cgroups, that will allow jobs to run next to each other without
|
||||
interference.
|
||||
|
||||
The interface to the cgroup would be similar to the current CPU memory
|
||||
interface, with similar semantics for min/low/high/max, if eviction can
|
||||
be made cgroup aware.
|
||||
|
||||
What should be noted is that each memory region (tiled memory for example)
|
||||
should have its own accounting.
|
||||
|
||||
The key is set to the regionid set by the driver, for example "tile0".
|
||||
For the value of $card, we use drmGetUnique().
|
@ -221,6 +221,9 @@ Panel Helper Reference
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_panel_orientation_quirks.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_panel_backlight_quirks.c
|
||||
:export:
|
||||
|
||||
Panel Self Refresh Helper Reference
|
||||
===================================
|
||||
|
||||
|
@ -145,57 +145,57 @@ both.
|
||||
Memory
|
||||
^^^^^^
|
||||
|
||||
- drm-memory-<region>: <uint> [KiB|MiB]
|
||||
|
||||
Each possible memory type which can be used to store buffer objects by the
|
||||
GPU in question shall be given a stable and unique name to be returned as the
|
||||
string here.
|
||||
Each possible memory type which can be used to store buffer objects by the GPU
|
||||
in question shall be given a stable and unique name to be used as the "<region>"
|
||||
string.
|
||||
|
||||
The region name "memory" is reserved to refer to normal system memory.
|
||||
|
||||
Value shall reflect the amount of storage currently consumed by the buffer
|
||||
The value shall reflect the amount of storage currently consumed by the buffer
|
||||
objects belong to this client, in the respective memory region.
|
||||
|
||||
Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB'
|
||||
indicating kibi- or mebi-bytes.
|
||||
|
||||
This key is deprecated and is an alias for drm-resident-<region>. Only one of
|
||||
the two should be present in the output.
|
||||
- drm-total-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of all requested buffers, including both shared and private
|
||||
memory. The backing store for the buffers does not need to be currently
|
||||
instantiated to count under this category. To avoid double-counting, if a buffer
|
||||
has multiple regions where it can be allocated to, the implementation should
|
||||
consistently select a single region for accounting purposes.
|
||||
|
||||
- drm-shared-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are shared with another file (e.g., have more
|
||||
than a single handle).
|
||||
|
||||
- drm-total-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of all created buffers including shared and private memory. The
|
||||
backing store for the buffers does not have to be currently instantiated to be
|
||||
counted under this category.
|
||||
The total size of buffers that are shared with another file (i.e., have more
|
||||
than one handle). The same requirement to avoid double-counting that applies to
|
||||
drm-total-<region> also applies here.
|
||||
|
||||
- drm-resident-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are resident (have their backing store present or
|
||||
instantiated) in the specified region.
|
||||
The total size of buffers that are resident (i.e., have their backing store
|
||||
present or instantiated) in the specified region.
|
||||
|
||||
This is an alias for drm-memory-<region> and only one of the two should be
|
||||
present in the output.
|
||||
- drm-memory-<region>: <uint> [KiB|MiB]
|
||||
|
||||
This key is deprecated and is only printed by amdgpu; it is an alias for
|
||||
drm-resident-<region>.
|
||||
|
||||
- drm-purgeable-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are purgeable.
|
||||
The total size of buffers that are resident and purgeable.
|
||||
|
||||
For example drivers which implement a form of 'madvise' like functionality can
|
||||
here count buffers which have instantiated backing store, but have been marked
|
||||
with an equivalent of MADV_DONTNEED.
|
||||
For example, drivers that implement functionality similar to 'madvise' can count
|
||||
buffers that have instantiated backing stores but have been marked with an
|
||||
equivalent of MADV_DONTNEED.
|
||||
|
||||
- drm-active-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are active on one or more engines.
|
||||
|
||||
One practical example of this can be presence of unsignaled fences in an GEM
|
||||
buffer reservation object. Therefore the active category is a subset of
|
||||
resident.
|
||||
One practical example of this could be the presence of unsignaled fences in a
|
||||
GEM buffer reservation object. Therefore, the active category is a subset of the
|
||||
resident category.
|
||||
|
||||
Implementation Details
|
||||
======================
|
||||
|
@ -23,4 +23,5 @@ DG2, etc is provided to prototype the driver.
|
||||
xe_firmware
|
||||
xe_tile
|
||||
xe_debugging
|
||||
xe_devcoredump
|
||||
xe-drm-usage-stats.rst
|
||||
|
14
Documentation/gpu/xe/xe_devcoredump.rst
Normal file
14
Documentation/gpu/xe/xe_devcoredump.rst
Normal file
@ -0,0 +1,14 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
|
||||
|
||||
==================
|
||||
Xe Device Coredump
|
||||
==================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
|
||||
:doc: Xe device coredump
|
||||
|
||||
Internal API
|
||||
============
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
|
||||
:internal:
|
17
MAINTAINERS
17
MAINTAINERS
@ -1193,6 +1193,17 @@ L: linux-spi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/spi/spi-amd.c
|
||||
|
||||
AMD XDNA DRIVER
|
||||
M: Min Ma <min.ma@amd.com>
|
||||
M: Lizhi Hou <lizhi.hou@amd.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Supported
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: Documentation/accel/amdxdna/
|
||||
F: drivers/accel/amdxdna/
|
||||
F: include/trace/events/amdxdna.h
|
||||
F: include/uapi/drm/amdxdna_accel.h
|
||||
|
||||
AMD XGBE DRIVER
|
||||
M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
|
||||
L: netdev@vger.kernel.org
|
||||
@ -7090,7 +7101,8 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: drivers/gpu/drm/sun4i/sun8i*
|
||||
|
||||
DRM DRIVER FOR ARM PL111 CLCD
|
||||
S: Orphan
|
||||
M: Linus Walleij <linus.walleij@linaro.org>
|
||||
S: Maintained
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: drivers/gpu/drm/pl111/
|
||||
|
||||
@ -7405,7 +7417,7 @@ L: virtualization@lists.linux.dev
|
||||
S: Obsolete
|
||||
W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: drivers/gpu/drm/tiny/cirrus.c
|
||||
F: drivers/gpu/drm/tiny/cirrus-qemu.c
|
||||
|
||||
DRM DRIVER FOR QXL VIRTUAL GPU
|
||||
M: Dave Airlie <airlied@redhat.com>
|
||||
@ -7816,6 +7828,7 @@ F: drivers/gpu/drm/rockchip/
|
||||
|
||||
DRM DRIVERS FOR STI
|
||||
M: Alain Volmat <alain.volmat@foss.st.com>
|
||||
M: Raphael Gallais-Pou <rgallaispou@gmail.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
|
@ -1306,11 +1306,14 @@
|
||||
"dp_vtc_pixel_clk_in";
|
||||
power-domains = <&zynqmp_firmware PD_DP>;
|
||||
resets = <&zynqmp_reset ZYNQMP_RESET_DP>;
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0";
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0",
|
||||
"aud0", "aud1";
|
||||
dmas = <&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO0>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO1>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO2>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_GRAPHICS>;
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_GRAPHICS>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_AUDIO0>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_AUDIO1>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
|
@ -24,6 +24,7 @@ menuconfig DRM_ACCEL
|
||||
different device files, called accel/accel* (in /dev, sysfs
|
||||
and debugfs).
|
||||
|
||||
source "drivers/accel/amdxdna/Kconfig"
|
||||
source "drivers/accel/habanalabs/Kconfig"
|
||||
source "drivers/accel/ivpu/Kconfig"
|
||||
source "drivers/accel/qaic/Kconfig"
|
||||
|
@ -1,5 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
|
||||
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
|
||||
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
|
||||
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
|
||||
|
18
drivers/accel/amdxdna/Kconfig
Normal file
18
drivers/accel/amdxdna/Kconfig
Normal file
@ -0,0 +1,18 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config DRM_ACCEL_AMDXDNA
|
||||
tristate "AMD AI Engine"
|
||||
depends on AMD_IOMMU
|
||||
depends on DRM_ACCEL
|
||||
depends on PCI && HAS_IOMEM
|
||||
depends on X86_64
|
||||
select DRM_SCHED
|
||||
select DRM_GEM_SHMEM_HELPER
|
||||
select FW_LOADER
|
||||
select HMM_MIRROR
|
||||
help
|
||||
Choose this option to enable support for NPU integrated into AMD
|
||||
client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
|
||||
accelerate machine learning applications.
|
||||
|
||||
If "M" is selected, the driver module will be amdxdna.
|
23
drivers/accel/amdxdna/Makefile
Normal file
23
drivers/accel/amdxdna/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
amdxdna-y := \
|
||||
aie2_ctx.o \
|
||||
aie2_error.o \
|
||||
aie2_message.o \
|
||||
aie2_pci.o \
|
||||
aie2_pm.o \
|
||||
aie2_psp.o \
|
||||
aie2_smu.o \
|
||||
aie2_solver.o \
|
||||
amdxdna_ctx.o \
|
||||
amdxdna_gem.o \
|
||||
amdxdna_mailbox.o \
|
||||
amdxdna_mailbox_helper.o \
|
||||
amdxdna_pci_drv.o \
|
||||
amdxdna_sysfs.o \
|
||||
npu1_regs.o \
|
||||
npu2_regs.o \
|
||||
npu4_regs.o \
|
||||
npu5_regs.o \
|
||||
npu6_regs.o
|
||||
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
|
3
drivers/accel/amdxdna/TODO
Normal file
3
drivers/accel/amdxdna/TODO
Normal file
@ -0,0 +1,3 @@
|
||||
- Add import and export BO support
|
||||
- Add debugfs support
|
||||
- Add debug BO support
|
910
drivers/accel/amdxdna/aie2_ctx.c
Normal file
910
drivers/accel/amdxdna/aie2_ctx.c
Normal file
@ -0,0 +1,910 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static bool force_cmdlist;
|
||||
module_param(force_cmdlist, bool, 0600);
|
||||
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
|
||||
|
||||
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
|
||||
|
||||
static void aie2_job_release(struct kref *ref)
|
||||
{
|
||||
struct amdxdna_sched_job *job;
|
||||
|
||||
job = container_of(ref, struct amdxdna_sched_job, refcnt);
|
||||
amdxdna_sched_job_cleanup(job);
|
||||
if (job->out_fence)
|
||||
dma_fence_put(job->out_fence);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
static void aie2_job_put(struct amdxdna_sched_job *job)
|
||||
{
|
||||
kref_put(&job->refcnt, aie2_job_release);
|
||||
}
|
||||
|
||||
/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
|
||||
static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
|
||||
struct drm_sched_job *bad_job)
|
||||
{
|
||||
drm_sched_stop(&hwctx->priv->sched, bad_job);
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_gem_obj *heap = hwctx->priv->heap;
|
||||
int ret;
|
||||
|
||||
ret = aie2_create_context(xdna->dev_handle, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
|
||||
heap->mem.userptr, heap->mem.size);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY) {
|
||||
XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_config_cu(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_restart_ctx(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
|
||||
if (hwctx->status != HWCTX_STAT_STOP)
|
||||
continue;
|
||||
|
||||
hwctx->status = hwctx->old_status;
|
||||
XDNA_DBG(xdna, "Resetting %s", hwctx->name);
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
}
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
|
||||
{
|
||||
struct dma_fence *fence, *out_fence = NULL;
|
||||
int ret;
|
||||
|
||||
fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
ret = dma_fence_chain_find_seqno(&fence, seq);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out_fence = dma_fence_get(dma_fence_chain_contained(fence));
|
||||
|
||||
out:
|
||||
dma_fence_put(fence);
|
||||
return out_fence;
|
||||
}
|
||||
|
||||
static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
|
||||
fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
dma_fence_wait(fence, false);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
/*
|
||||
* Command timeout is unlikely. But if it happens, it doesn't
|
||||
* break the system. aie2_hwctx_stop() will destroy mailbox
|
||||
* and abort all commands.
|
||||
*/
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
aie2_hwctx_stop(xdna, hwctx, NULL);
|
||||
hwctx->old_status = hwctx->status;
|
||||
hwctx->status = HWCTX_STAT_STOP;
|
||||
}
|
||||
|
||||
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
/*
|
||||
* The resume path cannot guarantee that mailbox channel can be
|
||||
* regenerated. If this happen, when submit message to this
|
||||
* mailbox channel, error will return.
|
||||
*/
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
hwctx->status = hwctx->old_status;
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_sched_notify(struct amdxdna_sched_job *job)
|
||||
{
|
||||
struct dma_fence *fence = job->fence;
|
||||
|
||||
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
|
||||
job->hwctx->priv->completed++;
|
||||
dma_fence_signal(fence);
|
||||
|
||||
up(&job->hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
dma_fence_put(fence);
|
||||
mmput_async(job->mm);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
if (status == AIE2_STATUS_SUCCESS)
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
else
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
struct cmd_chain_resp *resp;
|
||||
struct amdxdna_dev *xdna;
|
||||
u32 fail_cmd_status;
|
||||
u32 fail_cmd_idx;
|
||||
u32 ret = 0;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
resp = (struct cmd_chain_resp *)data;
|
||||
xdna = job->hwctx->client->xdna;
|
||||
XDNA_DBG(xdna, "Status 0x%x", resp->status);
|
||||
if (resp->status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Slow path to handle error, read from ringbuf on BAR */
|
||||
fail_cmd_idx = resp->fail_cmd_idx;
|
||||
fail_cmd_status = resp->fail_cmd_status;
|
||||
XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
|
||||
fail_cmd_idx, fail_cmd_status);
|
||||
|
||||
if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
|
||||
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
|
||||
|
||||
cc->error_index = fail_cmd_idx;
|
||||
if (cc->error_index >= cc->command_count)
|
||||
cc->error_index = 0;
|
||||
}
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct dma_fence *
|
||||
aie2_sched_job_run(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct dma_fence *fence;
|
||||
int ret;
|
||||
|
||||
if (!mmget_not_zero(job->mm))
|
||||
return ERR_PTR(-ESRCH);
|
||||
|
||||
kref_get(&job->refcnt);
|
||||
fence = dma_fence_get(job->fence);
|
||||
|
||||
if (unlikely(!cmd_abo)) {
|
||||
ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
|
||||
goto out;
|
||||
}
|
||||
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
|
||||
ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else if (force_cmdlist)
|
||||
ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else
|
||||
ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
dma_fence_put(job->fence);
|
||||
aie2_job_put(job);
|
||||
mmput(job->mm);
|
||||
fence = ERR_PTR(ret);
|
||||
}
|
||||
trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static void aie2_sched_job_free(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
|
||||
trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
|
||||
if (!job->job_done)
|
||||
up(&hwctx->priv->job_sem);
|
||||
|
||||
drm_sched_job_cleanup(sched_job);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static enum drm_gpu_sched_stat
|
||||
aie2_sched_job_timedout(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct amdxdna_dev *xdna;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
aie2_hwctx_stop(xdna, hwctx, sched_job);
|
||||
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return DRM_GPU_SCHED_STAT_NOMINAL;
|
||||
}
|
||||
|
||||
const struct drm_sched_backend_ops sched_ops = {
|
||||
.run_job = aie2_sched_job_run,
|
||||
.free_job = aie2_sched_job_free,
|
||||
.timedout_job = aie2_sched_job_timedout,
|
||||
};
|
||||
|
||||
static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int start, end, first, last;
|
||||
u32 width = 1, entries = 0;
|
||||
int i;
|
||||
|
||||
if (!hwctx->num_tiles) {
|
||||
XDNA_ERR(xdna, "Number of tiles is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
if (unlikely(!ndev->metadata.core.row_count)) {
|
||||
XDNA_WARN(xdna, "Core tile row count is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
|
||||
if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
|
||||
XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ndev->priv->col_align == COL_ALIGN_NATURE)
|
||||
width = hwctx->num_col;
|
||||
|
||||
/*
|
||||
* In range [start, end], find out columns that is multiple of width.
|
||||
* 'first' is the first column,
|
||||
* 'last' is the last column,
|
||||
* 'entries' is the total number of columns.
|
||||
*/
|
||||
start = xdna->dev_info->first_col;
|
||||
end = ndev->total_col - hwctx->num_col;
|
||||
if (start > 0 && end == 0) {
|
||||
XDNA_DBG(xdna, "Force start from col 0");
|
||||
start = 0;
|
||||
}
|
||||
first = start + (width - start % width) % width;
|
||||
last = end - end % width;
|
||||
if (last >= first)
|
||||
entries = (last - first) / width + 1;
|
||||
XDNA_DBG(xdna, "start %d end %d first %d last %d",
|
||||
start, end, first, last);
|
||||
|
||||
if (unlikely(!entries)) {
|
||||
XDNA_ERR(xdna, "Start %d end %d width %d",
|
||||
start, end, width);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
|
||||
if (!hwctx->col_list)
|
||||
return -ENOMEM;
|
||||
|
||||
hwctx->col_list_len = entries;
|
||||
hwctx->col_list[0] = first;
|
||||
for (i = 1; i < entries; i++)
|
||||
hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
|
||||
|
||||
print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
|
||||
entries * sizeof(*hwctx->col_list), false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct alloc_requests *xrs_req;
|
||||
int ret;
|
||||
|
||||
xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
|
||||
if (!xrs_req)
|
||||
return -ENOMEM;
|
||||
|
||||
xrs_req->cdo.start_cols = hwctx->col_list;
|
||||
xrs_req->cdo.cols_len = hwctx->col_list_len;
|
||||
xrs_req->cdo.ncols = hwctx->num_col;
|
||||
xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
|
||||
|
||||
xrs_req->rqos.gops = hwctx->qos.gops;
|
||||
xrs_req->rqos.fps = hwctx->qos.fps;
|
||||
xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
|
||||
xrs_req->rqos.latency = hwctx->qos.latency;
|
||||
xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
|
||||
xrs_req->rqos.priority = hwctx->qos.priority;
|
||||
|
||||
xrs_req->rid = (uintptr_t)hwctx;
|
||||
|
||||
ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
|
||||
|
||||
kfree(xrs_req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
int ret;
|
||||
|
||||
ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
|
||||
}
|
||||
|
||||
static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct drm_file *filp = hwctx->client->filp;
|
||||
struct drm_syncobj *syncobj;
|
||||
u32 hdl;
|
||||
int ret;
|
||||
|
||||
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
|
||||
|
||||
ret = drm_syncobj_create(&syncobj, 0, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
|
||||
if (ret) {
|
||||
drm_syncobj_put(syncobj);
|
||||
XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
hwctx->priv->syncobj = syncobj;
|
||||
hwctx->syncobj_hdl = hdl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
/*
|
||||
* The syncobj_hdl is owned by user space and will be cleaned up
|
||||
* separately.
|
||||
*/
|
||||
drm_syncobj_put(hwctx->priv->syncobj);
|
||||
}
|
||||
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct drm_gpu_scheduler *sched;
|
||||
struct amdxdna_hwctx_priv *priv;
|
||||
struct amdxdna_gem_obj *heap;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int i, ret;
|
||||
|
||||
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
hwctx->priv = priv;
|
||||
|
||||
mutex_lock(&client->mm_lock);
|
||||
heap = client->dev_heap;
|
||||
if (!heap) {
|
||||
XDNA_ERR(xdna, "The client dev heap object not exist");
|
||||
mutex_unlock(&client->mm_lock);
|
||||
ret = -ENOENT;
|
||||
goto free_priv;
|
||||
}
|
||||
drm_gem_object_get(to_gobj(heap));
|
||||
mutex_unlock(&client->mm_lock);
|
||||
priv->heap = heap;
|
||||
sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
|
||||
|
||||
ret = amdxdna_gem_pin(heap);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
|
||||
goto put_heap;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct amdxdna_drm_create_bo args = {
|
||||
.flags = 0,
|
||||
.type = AMDXDNA_BO_DEV,
|
||||
.vaddr = 0,
|
||||
.size = MAX_CHAIN_CMDBUF_SIZE,
|
||||
};
|
||||
|
||||
abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
|
||||
if (IS_ERR(abo)) {
|
||||
ret = PTR_ERR(abo);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
|
||||
i, abo->mem.dev_addr, abo->mem.size);
|
||||
priv->cmd_buf[i] = abo;
|
||||
}
|
||||
|
||||
sched = &priv->sched;
|
||||
mutex_init(&priv->io_lock);
|
||||
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&priv->io_lock);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
|
||||
ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
|
||||
HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
|
||||
NULL, NULL, hwctx->name, xdna->ddev.dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
|
||||
&sched, 1, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
|
||||
goto free_sched;
|
||||
}
|
||||
|
||||
ret = aie2_hwctx_col_list(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
|
||||
goto free_entity;
|
||||
}
|
||||
|
||||
ret = aie2_alloc_resource(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
|
||||
goto free_col_list;
|
||||
}
|
||||
|
||||
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
|
||||
heap->mem.userptr, heap->mem.size);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
ret = aie2_ctx_syncobj_create(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
hwctx->status = HWCTX_STAT_INIT;
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->hwctx_num++;
|
||||
|
||||
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
|
||||
|
||||
return 0;
|
||||
|
||||
release_resource:
|
||||
aie2_release_resource(hwctx);
|
||||
free_col_list:
|
||||
kfree(hwctx->col_list);
|
||||
free_entity:
|
||||
drm_sched_entity_destroy(&priv->entity);
|
||||
free_sched:
|
||||
drm_sched_fini(&priv->sched);
|
||||
free_cmd_bufs:
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
if (!priv->cmd_buf[i])
|
||||
continue;
|
||||
drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
|
||||
}
|
||||
amdxdna_gem_unpin(heap);
|
||||
put_heap:
|
||||
drm_gem_object_put(to_gobj(heap));
|
||||
free_priv:
|
||||
kfree(priv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct amdxdna_dev *xdna;
|
||||
int idx;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->hwctx_num--;
|
||||
drm_sched_wqueue_stop(&hwctx->priv->sched);
|
||||
|
||||
/* Now, scheduler will not send command to device. */
|
||||
aie2_release_resource(hwctx);
|
||||
|
||||
/*
|
||||
* All submitted commands are aborted.
|
||||
* Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
|
||||
* will return NODEV if it is called.
|
||||
*/
|
||||
drm_sched_wqueue_start(&hwctx->priv->sched);
|
||||
|
||||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
drm_sched_entity_destroy(&hwctx->priv->entity);
|
||||
drm_sched_fini(&hwctx->priv->sched);
|
||||
aie2_ctx_syncobj_destroy(hwctx);
|
||||
|
||||
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
|
||||
|
||||
for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
|
||||
amdxdna_gem_unpin(hwctx->priv->heap);
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->heap));
|
||||
|
||||
mutex_destroy(&hwctx->priv->io_lock);
|
||||
kfree(hwctx->col_list);
|
||||
kfree(hwctx->priv);
|
||||
kfree(hwctx->cus);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
|
||||
{
|
||||
struct amdxdna_hwctx_param_config_cu *config = buf;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
u32 total_size;
|
||||
int ret;
|
||||
|
||||
XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
|
||||
if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_INIT) {
|
||||
XDNA_ERR(xdna, "Not support re-config CU");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!config->num_cus) {
|
||||
XDNA_ERR(xdna, "Number of CU is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
total_size = struct_size(config, cu_configs, config->num_cus);
|
||||
if (total_size > size) {
|
||||
XDNA_ERR(xdna, "CU config larger than size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
|
||||
if (!hwctx->cus)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = aie2_config_cu(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
|
||||
goto free_cus;
|
||||
}
|
||||
|
||||
wmb(); /* To avoid locking in command submit when check status */
|
||||
hwctx->status = HWCTX_STAT_READY;
|
||||
|
||||
return 0;
|
||||
|
||||
free_cus:
|
||||
kfree(hwctx->cus);
|
||||
hwctx->cus = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
switch (type) {
|
||||
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
|
||||
return aie2_hwctx_cu_config(hwctx, buf, size);
|
||||
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
|
||||
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
|
||||
return -EOPNOTSUPP;
|
||||
default:
|
||||
XDNA_DBG(xdna, "Not supported type %d", type);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct mm_struct *mm = abo->mem.notifier.mm;
|
||||
struct hmm_range range = { 0 };
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
|
||||
abo->mem.userptr, abo->mem.size);
|
||||
range.notifier = &abo->mem.notifier;
|
||||
range.start = abo->mem.userptr;
|
||||
range.end = abo->mem.userptr + abo->mem.size;
|
||||
range.hmm_pfns = abo->mem.pfns;
|
||||
range.default_flags = HMM_PFN_REQ_FAULT;
|
||||
|
||||
if (!mmget_not_zero(mm))
|
||||
return -EFAULT;
|
||||
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
again:
|
||||
range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
|
||||
mmap_read_lock(mm);
|
||||
ret = hmm_range_fault(&range);
|
||||
mmap_read_unlock(mm);
|
||||
if (ret) {
|
||||
if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
if (ret == -EBUSY)
|
||||
goto again;
|
||||
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
goto again;
|
||||
}
|
||||
abo->mem.map_invalid = false;
|
||||
up_read(&xdna->notifier_lock);
|
||||
|
||||
put_mm:
|
||||
mmput(mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct ww_acquire_ctx acquire_ctx;
|
||||
struct dma_fence_chain *chain;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
unsigned long timeout = 0;
|
||||
int ret, i;
|
||||
|
||||
ret = down_interruptible(&hwctx->priv->job_sem);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
chain = dma_fence_chain_alloc();
|
||||
if (!chain) {
|
||||
XDNA_ERR(xdna, "Alloc fence chain failed");
|
||||
ret = -ENOMEM;
|
||||
goto up_sem;
|
||||
}
|
||||
|
||||
ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
|
||||
goto free_chain;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
goto cleanup_job;
|
||||
}
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
abo = to_xdna_obj(job->bos[i]);
|
||||
if (abo->mem.map_invalid) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (!timeout) {
|
||||
timeout = jiffies +
|
||||
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
} else if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
ret = aie2_populate_range(abo);
|
||||
if (ret)
|
||||
goto cleanup_job;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&hwctx->priv->io_lock);
|
||||
drm_sched_job_arm(&job->base);
|
||||
job->out_fence = dma_fence_get(&job->base.s_fence->finished);
|
||||
for (i = 0; i < job->bo_cnt; i++)
|
||||
dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
|
||||
job->seq = hwctx->priv->seq++;
|
||||
kref_get(&job->refcnt);
|
||||
drm_sched_entity_push_job(&job->base);
|
||||
|
||||
*seq = job->seq;
|
||||
drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
|
||||
mutex_unlock(&hwctx->priv->io_lock);
|
||||
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
|
||||
aie2_job_put(job);
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup_job:
|
||||
drm_sched_job_cleanup(&job->base);
|
||||
free_chain:
|
||||
dma_fence_chain_free(chain);
|
||||
up_sem:
|
||||
up(&hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct drm_gem_object *gobj = to_gobj(abo);
|
||||
long ret;
|
||||
|
||||
down_write(&xdna->notifier_lock);
|
||||
abo->mem.map_invalid = true;
|
||||
mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
|
||||
up_write(&xdna->notifier_lock);
|
||||
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
|
||||
true, MAX_SCHEDULE_TIMEOUT);
|
||||
if (!ret || ret == -ERESTARTSYS)
|
||||
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
|
||||
}
|
360
drivers/accel/amdxdna/aie2_error.c
Normal file
360
drivers/accel/amdxdna/aie2_error.c
Normal file
@ -0,0 +1,360 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
struct async_event {
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct async_event_msg_resp resp;
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
u8 *buf;
|
||||
dma_addr_t addr;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
struct async_events {
|
||||
struct workqueue_struct *wq;
|
||||
u8 *buf;
|
||||
dma_addr_t addr;
|
||||
u32 size;
|
||||
u32 event_cnt;
|
||||
struct async_event event[] __counted_by(event_cnt);
|
||||
};
|
||||
|
||||
/*
|
||||
* Below enum, struct and lookup tables are porting from XAIE util header file.
|
||||
*
|
||||
* Below data is defined by AIE device and it is used for decode error message
|
||||
* from the device.
|
||||
*/
|
||||
|
||||
enum aie_module_type {
|
||||
AIE_MEM_MOD = 0,
|
||||
AIE_CORE_MOD,
|
||||
AIE_PL_MOD,
|
||||
};
|
||||
|
||||
enum aie_error_category {
|
||||
AIE_ERROR_SATURATION = 0,
|
||||
AIE_ERROR_FP,
|
||||
AIE_ERROR_STREAM,
|
||||
AIE_ERROR_ACCESS,
|
||||
AIE_ERROR_BUS,
|
||||
AIE_ERROR_INSTRUCTION,
|
||||
AIE_ERROR_ECC,
|
||||
AIE_ERROR_LOCK,
|
||||
AIE_ERROR_DMA,
|
||||
AIE_ERROR_MEM_PARITY,
|
||||
/* Unknown is not from XAIE, added for better category */
|
||||
AIE_ERROR_UNKNOWN,
|
||||
};
|
||||
|
||||
/* Don't pack, unless XAIE side changed */
|
||||
struct aie_error {
|
||||
__u8 row;
|
||||
__u8 col;
|
||||
__u32 mod_type;
|
||||
__u8 event_id;
|
||||
};
|
||||
|
||||
struct aie_err_info {
|
||||
u32 err_cnt;
|
||||
u32 ret_code;
|
||||
u32 rsvd;
|
||||
struct aie_error payload[] __counted_by(err_cnt);
|
||||
};
|
||||
|
||||
struct aie_event_category {
|
||||
u8 event_id;
|
||||
enum aie_error_category category;
|
||||
};
|
||||
|
||||
#define EVENT_CATEGORY(id, cat) { id, cat }
|
||||
static const struct aie_event_category aie_ml_mem_event_cat[] = {
|
||||
EVENT_CATEGORY(88U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(90U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(97U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(98U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(99U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(100U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(101U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_core_event_cat[] = {
|
||||
EVENT_CATEGORY(55U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(56U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(57U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(58U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION),
|
||||
EVENT_CATEGORY(60U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(62U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(64U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(65U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(66U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(67U, AIE_ERROR_LOCK),
|
||||
EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION),
|
||||
EVENT_CATEGORY(71U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(72U, AIE_ERROR_BUS),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_mem_tile_event_cat[] = {
|
||||
EVENT_CATEGORY(130U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(132U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(133U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(134U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(135U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(136U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(137U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(138U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(139U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
|
||||
EVENT_CATEGORY(64U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(65U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(66U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(67U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(68U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(69U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(70U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(71U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(72U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(73U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static enum aie_error_category
|
||||
aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
|
||||
{
|
||||
const struct aie_event_category *lut;
|
||||
int num_entry;
|
||||
int i;
|
||||
|
||||
switch (mod_type) {
|
||||
case AIE_PL_MOD:
|
||||
lut = aie_ml_shim_tile_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat);
|
||||
break;
|
||||
case AIE_CORE_MOD:
|
||||
lut = aie_ml_core_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_core_event_cat);
|
||||
break;
|
||||
case AIE_MEM_MOD:
|
||||
if (row == 1) {
|
||||
lut = aie_ml_mem_tile_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat);
|
||||
} else {
|
||||
lut = aie_ml_mem_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_mem_event_cat);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return AIE_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_entry; i++) {
|
||||
if (event_id != lut[i].event_id)
|
||||
continue;
|
||||
|
||||
return lut[i].category;
|
||||
}
|
||||
|
||||
return AIE_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
|
||||
{
|
||||
struct aie_error *errs = err_info;
|
||||
u32 err_col = 0; /* assume that AIE has less than 32 columns */
|
||||
int i;
|
||||
|
||||
/* Get err column bitmap */
|
||||
for (i = 0; i < num_err; i++) {
|
||||
struct aie_error *err = &errs[i];
|
||||
enum aie_error_category cat;
|
||||
|
||||
cat = aie_get_error_category(err->row, err->event_id, err->mod_type);
|
||||
XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d",
|
||||
err->row, err->col, err->mod_type,
|
||||
err->event_id, cat);
|
||||
|
||||
if (err->col >= 32) {
|
||||
XDNA_WARN(ndev->xdna, "Invalid column number");
|
||||
break;
|
||||
}
|
||||
|
||||
err_col |= (1 << err->col);
|
||||
}
|
||||
|
||||
return err_col;
|
||||
}
|
||||
|
||||
static int aie2_error_async_cb(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct async_event_msg_resp *resp;
|
||||
struct async_event *e = handle;
|
||||
|
||||
if (data) {
|
||||
resp = (struct async_event_msg_resp *)data;
|
||||
e->resp.type = resp->type;
|
||||
wmb(); /* Update status in the end, so that no lock for here */
|
||||
e->resp.status = resp->status;
|
||||
}
|
||||
queue_work(e->wq, &e->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_error_event_send(struct async_event *e)
|
||||
{
|
||||
drm_clflush_virt_range(e->buf, e->size); /* device can access */
|
||||
return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e,
|
||||
aie2_error_async_cb);
|
||||
}
|
||||
|
||||
static void aie2_error_worker(struct work_struct *err_work)
|
||||
{
|
||||
struct aie_err_info *info;
|
||||
struct amdxdna_dev *xdna;
|
||||
struct async_event *e;
|
||||
u32 max_err;
|
||||
u32 err_col;
|
||||
|
||||
e = container_of(err_work, struct async_event, work);
|
||||
|
||||
xdna = e->ndev->xdna;
|
||||
|
||||
if (e->resp.status == MAX_AIE2_STATUS_CODE)
|
||||
return;
|
||||
|
||||
e->resp.status = MAX_AIE2_STATUS_CODE;
|
||||
|
||||
print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4,
|
||||
e->buf, 0x100, false);
|
||||
|
||||
info = (struct aie_err_info *)e->buf;
|
||||
XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code);
|
||||
|
||||
max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error);
|
||||
if (unlikely(info->err_cnt > max_err)) {
|
||||
WARN_ONCE(1, "Error count too large %d\n", info->err_cnt);
|
||||
return;
|
||||
}
|
||||
err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt);
|
||||
if (!err_col) {
|
||||
XDNA_WARN(xdna, "Did not get error column");
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
/* Re-sent this event to firmware */
|
||||
if (aie2_error_event_send(e))
|
||||
XDNA_WARN(xdna, "Unable to register async event");
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
}
|
||||
|
||||
int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct async_event *e;
|
||||
int i, ret;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
for (i = 0; i < ndev->async_events->event_cnt; i++) {
|
||||
e = &ndev->async_events->event[i];
|
||||
ret = aie2_error_event_send(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct async_events *events;
|
||||
|
||||
events = ndev->async_events;
|
||||
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
destroy_workqueue(events->wq);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
|
||||
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
|
||||
events->addr, DMA_FROM_DEVICE);
|
||||
kfree(events);
|
||||
}
|
||||
|
||||
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
u32 total_col = ndev->total_col;
|
||||
u32 total_size = ASYNC_BUF_SIZE * total_col;
|
||||
struct async_events *events;
|
||||
int i, ret;
|
||||
|
||||
events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL);
|
||||
if (!events)
|
||||
return -ENOMEM;
|
||||
|
||||
events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr,
|
||||
DMA_FROM_DEVICE, GFP_KERNEL);
|
||||
if (!events->buf) {
|
||||
ret = -ENOMEM;
|
||||
goto free_events;
|
||||
}
|
||||
events->size = total_size;
|
||||
events->event_cnt = total_col;
|
||||
|
||||
events->wq = alloc_ordered_workqueue("async_wq", 0);
|
||||
if (!events->wq) {
|
||||
ret = -ENOMEM;
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < events->event_cnt; i++) {
|
||||
struct async_event *e = &events->event[i];
|
||||
u32 offset = i * ASYNC_BUF_SIZE;
|
||||
|
||||
e->ndev = ndev;
|
||||
e->wq = events->wq;
|
||||
e->buf = &events->buf[offset];
|
||||
e->addr = events->addr + offset;
|
||||
e->size = ASYNC_BUF_SIZE;
|
||||
e->resp.status = MAX_AIE2_STATUS_CODE;
|
||||
INIT_WORK(&e->work, aie2_error_worker);
|
||||
}
|
||||
|
||||
ndev->async_events = events;
|
||||
|
||||
XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x",
|
||||
events->event_cnt, events->size);
|
||||
return 0;
|
||||
|
||||
free_buf:
|
||||
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
|
||||
events->addr, DMA_FROM_DEVICE);
|
||||
free_events:
|
||||
kfree(events);
|
||||
return ret;
|
||||
}
|
776
drivers/accel/amdxdna/aie2_message.c
Normal file
776
drivers/accel/amdxdna/aie2_message.c
Normal file
@ -0,0 +1,776 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_mailbox_helper.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define DECLARE_AIE2_MSG(name, op) \
|
||||
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
|
||||
|
||||
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
|
||||
struct xdna_mailbox_msg *msg)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct xdna_notify *hdl = msg->handle;
|
||||
int ret;
|
||||
|
||||
if (!ndev->mgmt_chann)
|
||||
return -ENODEV;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
|
||||
if (ret == -ETIME) {
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
ndev->mgmt_chann = NULL;
|
||||
}
|
||||
|
||||
if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
|
||||
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
|
||||
msg->opcode, *hdl->data);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
|
||||
{
|
||||
DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
|
||||
int ret;
|
||||
|
||||
req.type = type;
|
||||
req.value = value;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
|
||||
{
|
||||
DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
|
||||
int ret;
|
||||
|
||||
req.type = type;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
*value = resp.value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
|
||||
{
|
||||
DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
|
||||
|
||||
req.pasid = pasid;
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
|
||||
resp.major, resp.minor);
|
||||
|
||||
version->major = resp.major;
|
||||
version->minor = resp.minor;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
metadata->size = resp.info.size;
|
||||
metadata->cols = resp.info.cols;
|
||||
metadata->rows = resp.info.rows;
|
||||
|
||||
metadata->version.major = resp.info.major;
|
||||
metadata->version.minor = resp.info.minor;
|
||||
|
||||
metadata->core.row_count = resp.info.core_rows;
|
||||
metadata->core.row_start = resp.info.core_row_start;
|
||||
metadata->core.dma_channel_count = resp.info.core_dma_channels;
|
||||
metadata->core.lock_count = resp.info.core_locks;
|
||||
metadata->core.event_reg_count = resp.info.core_events;
|
||||
|
||||
metadata->mem.row_count = resp.info.mem_rows;
|
||||
metadata->mem.row_start = resp.info.mem_row_start;
|
||||
metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
|
||||
metadata->mem.lock_count = resp.info.mem_locks;
|
||||
metadata->mem.event_reg_count = resp.info.mem_events;
|
||||
|
||||
metadata->shim.row_count = resp.info.shim_rows;
|
||||
metadata->shim.row_start = resp.info.shim_row_start;
|
||||
metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
|
||||
metadata->shim.lock_count = resp.info.shim_locks;
|
||||
metadata->shim.event_reg_count = resp.info.shim_events;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
|
||||
struct amdxdna_fw_ver *fw_ver)
|
||||
{
|
||||
DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
fw_ver->major = resp.major;
|
||||
fw_ver->minor = resp.minor;
|
||||
fw_ver->sub = resp.sub;
|
||||
fw_ver->build = resp.build;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct xdna_mailbox_chann_res x2i;
|
||||
struct xdna_mailbox_chann_res i2x;
|
||||
struct cq_pair *cq_pair;
|
||||
u32 intr_reg;
|
||||
int ret;
|
||||
|
||||
req.aie_type = 1;
|
||||
req.start_col = hwctx->start_col;
|
||||
req.num_col = hwctx->num_col;
|
||||
req.num_cq_pairs_requested = 1;
|
||||
req.pasid = hwctx->client->pasid;
|
||||
req.context_priority = 2;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hwctx->fw_ctx_id = resp.context_id;
|
||||
WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
|
||||
|
||||
cq_pair = &resp.cq_pair[0];
|
||||
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
|
||||
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
|
||||
x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
|
||||
x2i.rb_size = cq_pair->x2i_q.buf_size;
|
||||
|
||||
i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
|
||||
i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
|
||||
i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
|
||||
i2x.rb_size = cq_pair->i2x_q.buf_size;
|
||||
|
||||
ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
|
||||
if (ret == -EINVAL) {
|
||||
XDNA_ERR(xdna, "not able to create channel");
|
||||
goto out_destroy_context;
|
||||
}
|
||||
|
||||
intr_reg = i2x.mb_head_ptr_reg + 4;
|
||||
hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
|
||||
intr_reg, ret);
|
||||
if (!hwctx->priv->mbox_chann) {
|
||||
XDNA_ERR(xdna, "not able to create channel");
|
||||
ret = -EINVAL;
|
||||
goto out_destroy_context;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
|
||||
hwctx->name, ret, resp.msix_id);
|
||||
XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
|
||||
hwctx->fw_ctx_id, hwctx->client->pasid);
|
||||
|
||||
return 0;
|
||||
|
||||
out_destroy_context:
|
||||
aie2_destroy_context(ndev, hwctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
if (hwctx->fw_ctx_id == -1)
|
||||
return 0;
|
||||
|
||||
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
|
||||
|
||||
req.context_id = hwctx->fw_ctx_id;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
|
||||
|
||||
xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
|
||||
XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
|
||||
hwctx->fw_ctx_id);
|
||||
hwctx->priv->mbox_chann = NULL;
|
||||
hwctx->fw_ctx_id = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
|
||||
{
|
||||
DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
req.context_id = context_id;
|
||||
req.buf_addr = addr;
|
||||
req.buf_size = size;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
|
||||
context_id, addr, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
|
||||
u32 size, u32 *cols_filled)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct amdxdna_client *client;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
dma_addr_t dma_addr;
|
||||
u32 aie_bitmap = 0;
|
||||
u8 *buff_addr;
|
||||
int ret, idx;
|
||||
|
||||
buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
|
||||
DMA_FROM_DEVICE, GFP_KERNEL);
|
||||
if (!buff_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Go through each hardware context and mark the AIE columns that are active */
|
||||
list_for_each_entry(client, &xdna->client_list, node) {
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
|
||||
aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
}
|
||||
|
||||
*cols_filled = 0;
|
||||
req.dump_buff_addr = dma_addr;
|
||||
req.dump_buff_size = size;
|
||||
req.num_cols = hweight32(aie_bitmap);
|
||||
req.aie_bitmap = aie_bitmap;
|
||||
|
||||
drm_clflush_virt_range(buff_addr, size); /* device can access */
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (resp.status != AIE2_STATUS_SUCCESS) {
|
||||
XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
XDNA_DBG(xdna, "Query NPU status completed");
|
||||
|
||||
if (size < resp.size) {
|
||||
ret = -EINVAL;
|
||||
XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (copy_to_user(buf, buff_addr, resp.size)) {
|
||||
ret = -EFAULT;
|
||||
XDNA_ERR(xdna, "Failed to copy NPU status to user space");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
*cols_filled = aie_bitmap;
|
||||
|
||||
fail:
|
||||
dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
|
||||
void *handle, int (*cb)(void*, const u32 *, size_t))
|
||||
{
|
||||
struct async_event_msg_req req = { 0 };
|
||||
struct xdna_mailbox_msg msg = {
|
||||
.send_data = (u8 *)&req,
|
||||
.send_size = sizeof(req),
|
||||
.handle = handle,
|
||||
.opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
|
||||
.notify_cb = cb,
|
||||
};
|
||||
|
||||
req.buf_addr = addr;
|
||||
req.buf_size = size;
|
||||
|
||||
XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
|
||||
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
|
||||
}
|
||||
|
||||
int aie2_config_cu(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
u32 shift = xdna->dev_info->dev_mem_buf_shift;
|
||||
DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
|
||||
struct drm_gem_object *gobj;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
int ret, i;
|
||||
|
||||
if (!chann)
|
||||
return -ENODEV;
|
||||
|
||||
if (hwctx->cus->num_cus > MAX_NUM_CUS) {
|
||||
XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < hwctx->cus->num_cus; i++) {
|
||||
struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
|
||||
if (!gobj) {
|
||||
XDNA_ERR(xdna, "Lookup GEM object failed");
|
||||
return -EINVAL;
|
||||
}
|
||||
abo = to_xdna_obj(gobj);
|
||||
|
||||
if (abo->type != AMDXDNA_BO_DEV) {
|
||||
drm_gem_object_put(gobj);
|
||||
XDNA_ERR(xdna, "Invalid BO type");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
|
||||
abo->mem.dev_addr >> shift);
|
||||
req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
|
||||
XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
|
||||
abo->mem.dev_addr, req.cfgs[i]);
|
||||
drm_gem_object_put(gobj);
|
||||
}
|
||||
req.num_cus = hwctx->cus->num_cus;
|
||||
|
||||
ret = xdna_send_msg_wait(xdna, chann, &msg);
|
||||
if (ret == -ETIME)
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
|
||||
if (resp.status == AIE2_STATUS_SUCCESS) {
|
||||
XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
|
||||
msg.opcode, resp.status, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
union {
|
||||
struct execute_buffer_req ebuf;
|
||||
struct exec_dpu_req dpu;
|
||||
} req;
|
||||
struct xdna_mailbox_msg msg;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
int cu_idx;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
if (!chann)
|
||||
return -ENODEV;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (!payload) {
|
||||
XDNA_ERR(xdna, "Invalid command, cannot get payload");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
|
||||
if (cu_idx < 0) {
|
||||
XDNA_DBG(xdna, "Invalid cu idx");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
if (unlikely(payload_len > sizeof(req.ebuf.payload)))
|
||||
XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
|
||||
req.ebuf.cu_idx = cu_idx;
|
||||
memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
|
||||
msg.send_size = sizeof(req.ebuf);
|
||||
msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
|
||||
break;
|
||||
case ERT_START_NPU: {
|
||||
struct amdxdna_cmd_start_npu *sn = payload;
|
||||
|
||||
if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
|
||||
XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
|
||||
req.dpu.inst_buf_addr = sn->buffer;
|
||||
req.dpu.inst_size = sn->buffer_size;
|
||||
req.dpu.inst_prop_cnt = sn->prop_count;
|
||||
req.dpu.cu_idx = cu_idx;
|
||||
memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
|
||||
msg.send_size = sizeof(req.dpu);
|
||||
msg.opcode = MSG_OP_EXEC_DPU;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
|
||||
return -EINVAL;
|
||||
}
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
|
||||
0x40, false);
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_cf_has_space(offset, payload_len))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = payload_len / sizeof(u32);
|
||||
memcpy(buf->args, payload, payload_len);
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size = sizeof(*buf) + payload_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
struct amdxdna_cmd_start_npu *sn;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
u32 arg_sz;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
sn = payload;
|
||||
arg_sz = payload_len - sizeof(*sn);
|
||||
if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_dpu_has_space(offset, arg_sz))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->inst_buf_addr = sn->buffer;
|
||||
buf->inst_size = sn->buffer_size;
|
||||
buf->inst_prop_cnt = sn->prop_count;
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = arg_sz / sizeof(u32);
|
||||
memcpy(buf->args, sn->prop_args, arg_sz);
|
||||
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size += sizeof(*buf) + arg_sz;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
u32 this_op = amdxdna_cmd_get_op(abo);
|
||||
void *cmd_buf = cmdbuf_abo->mem.kva;
|
||||
int ret;
|
||||
|
||||
if (this_op != op) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
case ERT_START_NPU:
|
||||
ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
if (ret) {
|
||||
XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
|
||||
op, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct amdxdna_gem_obj *
|
||||
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
|
||||
{
|
||||
int idx = get_job_idx(job->seq);
|
||||
|
||||
return job->hwctx->priv->cmd_buf[idx];
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
|
||||
struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
|
||||
{
|
||||
req->buf_addr = cmdbuf_abo->mem.dev_addr;
|
||||
req->buf_size = size;
|
||||
req->count = cnt;
|
||||
drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
|
||||
XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
|
||||
req->buf_addr, size, cnt);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
aie2_cmd_op_to_msg_op(u32 op)
|
||||
{
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
return MSG_OP_CHAIN_EXEC_BUFFER_CF;
|
||||
case ERT_START_NPU:
|
||||
return MSG_OP_CHAIN_EXEC_DPU;
|
||||
default:
|
||||
return MSG_OP_MAX_OPCODE;
|
||||
}
|
||||
}
|
||||
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_cmd_chain *payload;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 payload_len;
|
||||
u32 offset = 0;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
u32 i;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (op != ERT_CMD_CHAIN || !payload ||
|
||||
payload_len < struct_size(payload, data, payload->command_count))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < payload->command_count; i++) {
|
||||
u32 boh = (u32)(payload->data[i]);
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
|
||||
if (!abo) {
|
||||
XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* All sub-cmd should have same op, use the first one. */
|
||||
if (i == 0)
|
||||
op = amdxdna_cmd_get_op(abo);
|
||||
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
|
||||
amdxdna_gem_put_obj(abo);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
/* The offset is the accumulated total size of the cmd buffer */
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct sync_bo_req req;
|
||||
int ret = 0;
|
||||
|
||||
req.src_addr = 0;
|
||||
req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
|
||||
req.size = abo->mem.size;
|
||||
|
||||
/* Device to Host */
|
||||
req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
|
||||
FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
|
||||
|
||||
XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
|
||||
req.size, req.src_addr, req.dst_addr);
|
||||
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
msg.opcode = MSG_OP_SYNC_BO;
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
370
drivers/accel/amdxdna/aie2_msg_priv.h
Normal file
370
drivers/accel/amdxdna/aie2_msg_priv.h
Normal file
@ -0,0 +1,370 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_MSG_PRIV_H_
|
||||
#define _AIE2_MSG_PRIV_H_
|
||||
|
||||
enum aie2_msg_opcode {
|
||||
MSG_OP_CREATE_CONTEXT = 0x2,
|
||||
MSG_OP_DESTROY_CONTEXT = 0x3,
|
||||
MSG_OP_SYNC_BO = 0x7,
|
||||
MSG_OP_EXECUTE_BUFFER_CF = 0xC,
|
||||
MSG_OP_QUERY_COL_STATUS = 0xD,
|
||||
MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
|
||||
MSG_OP_QUERY_AIE_VERSION = 0xF,
|
||||
MSG_OP_EXEC_DPU = 0x10,
|
||||
MSG_OP_CONFIG_CU = 0x11,
|
||||
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
|
||||
MSG_OP_CHAIN_EXEC_DPU = 0x13,
|
||||
MSG_OP_MAX_XRT_OPCODE,
|
||||
MSG_OP_SUSPEND = 0x101,
|
||||
MSG_OP_RESUME = 0x102,
|
||||
MSG_OP_ASSIGN_MGMT_PASID = 0x103,
|
||||
MSG_OP_INVOKE_SELF_TEST = 0x104,
|
||||
MSG_OP_MAP_HOST_BUFFER = 0x106,
|
||||
MSG_OP_GET_FIRMWARE_VERSION = 0x108,
|
||||
MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
|
||||
MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
|
||||
MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
|
||||
MSG_OP_MAX_DRV_OPCODE,
|
||||
MSG_OP_GET_PROTOCOL_VERSION = 0x301,
|
||||
MSG_OP_MAX_OPCODE
|
||||
};
|
||||
|
||||
enum aie2_msg_status {
|
||||
AIE2_STATUS_SUCCESS = 0x0,
|
||||
/* AIE Error codes */
|
||||
AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001,
|
||||
AIE2_STATUS_AIE_FP_ERROR = 0x1000002,
|
||||
AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003,
|
||||
AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004,
|
||||
AIE2_STATUS_AIE_BUS_ERROR = 0x1000005,
|
||||
AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006,
|
||||
AIE2_STATUS_AIE_ECC_ERROR = 0x1000007,
|
||||
AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008,
|
||||
AIE2_STATUS_AIE_DMA_ERROR = 0x1000009,
|
||||
AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a,
|
||||
AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b,
|
||||
AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c,
|
||||
AIE2_STATUS_MAX_AIE_STATUS_CODE,
|
||||
/* MGMT ERT Error codes */
|
||||
AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001,
|
||||
AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
|
||||
AIE2_STATUS_MGMT_ERT_NOAVAIL,
|
||||
AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
|
||||
AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
|
||||
AIE2_STATUS_MGMT_ERT_BUSY,
|
||||
AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
|
||||
MAX_MGMT_ERT_STATUS_CODE,
|
||||
/* APP ERT Error codes */
|
||||
AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001,
|
||||
AIE2_STATUS_APP_INVALID_INSTR,
|
||||
AIE2_STATUS_APP_LOAD_PDI_FAIL,
|
||||
MAX_APP_ERT_STATUS_CODE,
|
||||
/* NPU RTOS Error Codes */
|
||||
AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001,
|
||||
AIE2_STATUS_INVALID_COMMAND,
|
||||
AIE2_STATUS_INVALID_PARAM,
|
||||
AIE2_STATUS_INVALID_OPERATION = 0x4000006,
|
||||
AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
|
||||
AIE2_STATUS_MAX_RTOS_STATUS_CODE,
|
||||
MAX_AIE2_STATUS_CODE
|
||||
};
|
||||
|
||||
struct assign_mgmt_pasid_req {
|
||||
__u16 pasid;
|
||||
__u16 reserved;
|
||||
} __packed;
|
||||
|
||||
struct assign_mgmt_pasid_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct map_host_buffer_req {
|
||||
__u32 context_id;
|
||||
__u64 buf_addr;
|
||||
__u64 buf_size;
|
||||
} __packed;
|
||||
|
||||
struct map_host_buffer_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
#define MAX_CQ_PAIRS 2
|
||||
struct cq_info {
|
||||
__u32 head_addr;
|
||||
__u32 tail_addr;
|
||||
__u32 buf_addr;
|
||||
__u32 buf_size;
|
||||
};
|
||||
|
||||
struct cq_pair {
|
||||
struct cq_info x2i_q;
|
||||
struct cq_info i2x_q;
|
||||
};
|
||||
|
||||
struct create_ctx_req {
|
||||
__u32 aie_type;
|
||||
__u8 start_col;
|
||||
__u8 num_col;
|
||||
__u16 reserved;
|
||||
__u8 num_cq_pairs_requested;
|
||||
__u8 reserved1;
|
||||
__u16 pasid;
|
||||
__u32 pad[2];
|
||||
__u32 sec_comm_target_type;
|
||||
__u32 context_priority;
|
||||
} __packed;
|
||||
|
||||
struct create_ctx_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 context_id;
|
||||
__u16 msix_id;
|
||||
__u8 num_cq_pairs_allocated;
|
||||
__u8 reserved;
|
||||
struct cq_pair cq_pair[MAX_CQ_PAIRS];
|
||||
} __packed;
|
||||
|
||||
struct destroy_ctx_req {
|
||||
__u32 context_id;
|
||||
} __packed;
|
||||
|
||||
struct destroy_ctx_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct execute_buffer_req {
|
||||
__u32 cu_idx;
|
||||
__u32 payload[19];
|
||||
} __packed;
|
||||
|
||||
struct exec_dpu_req {
|
||||
__u64 inst_buf_addr;
|
||||
__u32 inst_size;
|
||||
__u32 inst_prop_cnt;
|
||||
__u32 cu_idx;
|
||||
__u32 payload[35];
|
||||
} __packed;
|
||||
|
||||
struct execute_buffer_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct aie_tile_info {
|
||||
__u32 size;
|
||||
__u16 major;
|
||||
__u16 minor;
|
||||
__u16 cols;
|
||||
__u16 rows;
|
||||
__u16 core_rows;
|
||||
__u16 mem_rows;
|
||||
__u16 shim_rows;
|
||||
__u16 core_row_start;
|
||||
__u16 mem_row_start;
|
||||
__u16 shim_row_start;
|
||||
__u16 core_dma_channels;
|
||||
__u16 mem_dma_channels;
|
||||
__u16 shim_dma_channels;
|
||||
__u16 core_locks;
|
||||
__u16 mem_locks;
|
||||
__u16 shim_locks;
|
||||
__u16 core_events;
|
||||
__u16 mem_events;
|
||||
__u16 shim_events;
|
||||
__u16 reserved;
|
||||
};
|
||||
|
||||
struct aie_tile_info_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct aie_tile_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
struct aie_tile_info info;
|
||||
} __packed;
|
||||
|
||||
struct aie_version_info_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct aie_version_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u16 major;
|
||||
__u16 minor;
|
||||
} __packed;
|
||||
|
||||
struct aie_column_info_req {
|
||||
__u64 dump_buff_addr;
|
||||
__u32 dump_buff_size;
|
||||
__u32 num_cols;
|
||||
__u32 aie_bitmap;
|
||||
} __packed;
|
||||
|
||||
struct aie_column_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 size;
|
||||
} __packed;
|
||||
|
||||
struct suspend_req {
|
||||
__u32 place_holder;
|
||||
} __packed;
|
||||
|
||||
struct suspend_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct resume_req {
|
||||
__u32 place_holder;
|
||||
} __packed;
|
||||
|
||||
struct resume_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct check_header_hash_req {
|
||||
__u64 hash_high;
|
||||
__u64 hash_low;
|
||||
} __packed;
|
||||
|
||||
struct check_header_hash_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct query_error_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
__u32 next_row;
|
||||
__u32 next_column;
|
||||
__u32 next_module;
|
||||
} __packed;
|
||||
|
||||
struct query_error_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 num_err;
|
||||
__u32 has_next_err;
|
||||
__u32 next_row;
|
||||
__u32 next_column;
|
||||
__u32 next_module;
|
||||
} __packed;
|
||||
|
||||
struct protocol_version_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct protocol_version_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 major;
|
||||
__u32 minor;
|
||||
} __packed;
|
||||
|
||||
struct firmware_version_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct firmware_version_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 major;
|
||||
__u32 minor;
|
||||
__u32 sub;
|
||||
__u32 build;
|
||||
} __packed;
|
||||
|
||||
#define MAX_NUM_CUS 32
|
||||
#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0)
|
||||
#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17)
|
||||
struct config_cu_req {
|
||||
__u32 num_cus;
|
||||
__u32 cfgs[MAX_NUM_CUS];
|
||||
} __packed;
|
||||
|
||||
struct config_cu_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct set_runtime_cfg_req {
|
||||
__u32 type;
|
||||
__u64 value;
|
||||
} __packed;
|
||||
|
||||
struct set_runtime_cfg_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct get_runtime_cfg_req {
|
||||
__u32 type;
|
||||
} __packed;
|
||||
|
||||
struct get_runtime_cfg_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u64 value;
|
||||
} __packed;
|
||||
|
||||
enum async_event_type {
|
||||
ASYNC_EVENT_TYPE_AIE_ERROR,
|
||||
ASYNC_EVENT_TYPE_EXCEPTION,
|
||||
MAX_ASYNC_EVENT_TYPE
|
||||
};
|
||||
|
||||
#define ASYNC_BUF_SIZE SZ_8K
|
||||
struct async_event_msg_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
} __packed;
|
||||
|
||||
struct async_event_msg_resp {
|
||||
enum aie2_msg_status status;
|
||||
enum async_event_type type;
|
||||
} __packed;
|
||||
|
||||
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
|
||||
#define slot_cf_has_space(offset, payload_size) \
|
||||
(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
|
||||
offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
|
||||
struct cmd_chain_slot_execbuf_cf {
|
||||
__u32 cu_idx;
|
||||
__u32 arg_cnt;
|
||||
__u32 args[] __counted_by(arg_cnt);
|
||||
};
|
||||
|
||||
#define slot_dpu_has_space(offset, payload_size) \
|
||||
(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
|
||||
offsetof(struct cmd_chain_slot_dpu, args[0]))
|
||||
struct cmd_chain_slot_dpu {
|
||||
__u64 inst_buf_addr;
|
||||
__u32 inst_size;
|
||||
__u32 inst_prop_cnt;
|
||||
__u32 cu_idx;
|
||||
__u32 arg_cnt;
|
||||
#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
|
||||
__u32 args[] __counted_by(arg_cnt);
|
||||
};
|
||||
|
||||
struct cmd_chain_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
__u32 count;
|
||||
} __packed;
|
||||
|
||||
struct cmd_chain_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 fail_cmd_idx;
|
||||
enum aie2_msg_status fail_cmd_status;
|
||||
} __packed;
|
||||
|
||||
#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0)
|
||||
#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4)
|
||||
struct sync_bo_req {
|
||||
__u64 src_addr;
|
||||
__u64 dst_addr;
|
||||
__u32 size;
|
||||
#define SYNC_BO_DEV_MEM 0
|
||||
#define SYNC_BO_HOST_MEM 2
|
||||
__u32 type;
|
||||
} __packed;
|
||||
|
||||
struct sync_bo_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
#endif /* _AIE2_MSG_PRIV_H_ */
|
928
drivers/accel/amdxdna/aie2_pci.c
Normal file
928
drivers/accel/amdxdna/aie2_pci.c
Normal file
@ -0,0 +1,928 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static int aie2_max_col = XRS_MAX_COL;
|
||||
module_param(aie2_max_col, uint, 0600);
|
||||
MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
|
||||
|
||||
/*
|
||||
* The management mailbox channel is allocated by firmware.
|
||||
* The related register and ring buffer information is on SRAM BAR.
|
||||
* This struct is the register layout.
|
||||
*/
|
||||
#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
|
||||
struct mgmt_mbox_chann_info {
|
||||
__u32 x2i_tail;
|
||||
__u32 x2i_head;
|
||||
__u32 x2i_buf;
|
||||
__u32 x2i_buf_sz;
|
||||
__u32 i2x_tail;
|
||||
__u32 i2x_head;
|
||||
__u32 i2x_buf;
|
||||
__u32 i2x_buf_sz;
|
||||
__u32 magic;
|
||||
__u32 msi_id;
|
||||
__u32 prot_major;
|
||||
__u32 prot_minor;
|
||||
__u32 rsvd[4];
|
||||
};
|
||||
|
||||
static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
|
||||
/*
|
||||
* The driver supported mailbox behavior is defined by
|
||||
* ndev->priv->protocol_major and protocol_minor.
|
||||
*
|
||||
* When protocol_major and fw_major are different, it means driver
|
||||
* and firmware are incompatible.
|
||||
*/
|
||||
if (ndev->priv->protocol_major != fw_major) {
|
||||
XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
|
||||
fw_major, fw_minor);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* When protocol_minor is greater then fw_minor, that means driver
|
||||
* relies on operation the installed firmware does not support.
|
||||
*/
|
||||
if (ndev->priv->protocol_minor > fw_minor) {
|
||||
XDNA_ERR(xdna, "Firmware minor version smaller than supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
|
||||
XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg);
|
||||
XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg);
|
||||
XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr);
|
||||
XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size);
|
||||
XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg);
|
||||
XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg);
|
||||
XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
|
||||
XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
|
||||
XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
|
||||
XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
|
||||
XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
|
||||
}
|
||||
|
||||
static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct mgmt_mbox_chann_info info_regs;
|
||||
struct xdna_mailbox_chann_res *i2x;
|
||||
struct xdna_mailbox_chann_res *x2i;
|
||||
u32 addr, off;
|
||||
u32 *reg;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Once firmware is alive, it will write management channel
|
||||
* information in SRAM BAR and write the address of that information
|
||||
* at FW_ALIVE_OFF offset in SRMA BAR.
|
||||
*
|
||||
* Read a non-zero value from FW_ALIVE_OFF implies that firmware
|
||||
* is alive.
|
||||
*/
|
||||
ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF),
|
||||
addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT);
|
||||
if (ret || !addr)
|
||||
return -ETIME;
|
||||
|
||||
off = AIE2_SRAM_OFF(ndev, addr);
|
||||
reg = (u32 *)&info_regs;
|
||||
for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
|
||||
reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
|
||||
|
||||
if (info_regs.magic != MGMT_MBOX_MAGIC) {
|
||||
XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
i2x = &ndev->mgmt_i2x;
|
||||
x2i = &ndev->mgmt_x2i;
|
||||
|
||||
i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head);
|
||||
i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail);
|
||||
i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf);
|
||||
i2x->rb_size = info_regs.i2x_buf_sz;
|
||||
|
||||
x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head);
|
||||
x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
|
||||
x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
|
||||
x2i->rb_size = info_regs.x2i_buf_sz;
|
||||
|
||||
ndev->mgmt_chan_idx = info_regs.msi_id;
|
||||
ndev->mgmt_prot_major = info_regs.prot_major;
|
||||
ndev->mgmt_prot_minor = info_regs.prot_minor;
|
||||
|
||||
ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
|
||||
|
||||
done:
|
||||
aie2_dump_chann_info_debug(ndev);
|
||||
|
||||
/* Must clear address at FW_ALIVE_OFF */
|
||||
writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
|
||||
enum rt_config_category category, u32 *val)
|
||||
{
|
||||
const struct rt_config *cfg;
|
||||
u32 value;
|
||||
int ret;
|
||||
|
||||
for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
|
||||
if (cfg->category != category)
|
||||
continue;
|
||||
|
||||
value = val ? *val : cfg->value;
|
||||
ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
|
||||
cfg->type, value);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_suspend_fw(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Suspend firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_resume_fw(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Resume firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Runtime config failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_assign_mgmt_pasid(ndev, 0);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Can not assign PASID");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_xdna_reset(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Reset firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!ndev->async_events)
|
||||
return 0;
|
||||
|
||||
ret = aie2_error_async_events_send(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Send async events failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "query firmware version failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_query_aie_version(ndev, &ndev->version);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Query AIE version failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Query AIE metadata failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
if (aie2_suspend_fw(ndev))
|
||||
XDNA_ERR(ndev->xdna, "Suspend_fw failed");
|
||||
XDNA_DBG(ndev->xdna, "Firmware suspended");
|
||||
}
|
||||
|
||||
static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
|
||||
{
|
||||
struct amdxdna_hwctx *hwctx = cb_arg;
|
||||
struct amdxdna_dev *xdna;
|
||||
int ret;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
|
||||
hwctx->start_col = action->part.start_col;
|
||||
hwctx->num_col = action->part.ncols;
|
||||
ret = aie2_create_context(xdna->dev_handle, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "create context failed, ret %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_xrs_unload(void *cb_arg)
|
||||
{
|
||||
struct amdxdna_hwctx *hwctx = cb_arg;
|
||||
struct amdxdna_dev *xdna;
|
||||
int ret;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
|
||||
ret = aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(ddev);
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->dft_dpm_level = dpm_level;
|
||||
if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
|
||||
return 0;
|
||||
|
||||
return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
|
||||
}
|
||||
|
||||
static struct xrs_action_ops aie2_xrs_actions = {
|
||||
.load = aie2_xrs_load,
|
||||
.unload = aie2_xrs_unload,
|
||||
.set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
|
||||
};
|
||||
|
||||
static void aie2_hw_stop(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
|
||||
if (ndev->dev_status <= AIE2_DEV_INIT) {
|
||||
XDNA_ERR(xdna, "device is already stopped");
|
||||
return;
|
||||
}
|
||||
|
||||
aie2_mgmt_fw_fini(ndev);
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
ndev->mgmt_chann = NULL;
|
||||
drmm_kfree(&xdna->ddev, ndev->mbox);
|
||||
ndev->mbox = NULL;
|
||||
aie2_psp_stop(ndev->psp_hdl);
|
||||
aie2_smu_fini(ndev);
|
||||
pci_disable_device(pdev);
|
||||
|
||||
ndev->dev_status = AIE2_DEV_INIT;
|
||||
}
|
||||
|
||||
static int aie2_hw_start(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
struct xdna_mailbox_res mbox_res;
|
||||
u32 xdna_mailbox_intr_reg;
|
||||
int mgmt_mb_irq, ret;
|
||||
|
||||
if (ndev->dev_status >= AIE2_DEV_START) {
|
||||
XDNA_INFO(xdna, "device is already started");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = pci_enable_device(pdev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
pci_set_master(pdev);
|
||||
|
||||
ret = aie2_smu_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
|
||||
goto disable_dev;
|
||||
}
|
||||
|
||||
ret = aie2_psp_start(ndev->psp_hdl);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
|
||||
goto fini_smu;
|
||||
}
|
||||
|
||||
ret = aie2_get_mgmt_chann_info(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "firmware is not alive");
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
mbox_res.ringbuf_base = ndev->sram_base;
|
||||
mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
|
||||
mbox_res.mbox_base = ndev->mbox_base;
|
||||
mbox_res.mbox_size = MBOX_SIZE(ndev);
|
||||
mbox_res.name = "xdna_mailbox";
|
||||
ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
|
||||
if (!ndev->mbox) {
|
||||
XDNA_ERR(xdna, "failed to create mailbox device");
|
||||
ret = -ENODEV;
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
|
||||
if (mgmt_mb_irq < 0) {
|
||||
ret = mgmt_mb_irq;
|
||||
XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret);
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
|
||||
ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
|
||||
&ndev->mgmt_x2i,
|
||||
&ndev->mgmt_i2x,
|
||||
xdna_mailbox_intr_reg,
|
||||
mgmt_mb_irq);
|
||||
if (!ndev->mgmt_chann) {
|
||||
XDNA_ERR(xdna, "failed to create management mailbox channel");
|
||||
ret = -EINVAL;
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
ret = aie2_pm_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
|
||||
goto destroy_mgmt_chann;
|
||||
}
|
||||
|
||||
ret = aie2_mgmt_fw_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
|
||||
goto destroy_mgmt_chann;
|
||||
}
|
||||
|
||||
ndev->dev_status = AIE2_DEV_START;
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_mgmt_chann:
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
stop_psp:
|
||||
aie2_psp_stop(ndev->psp_hdl);
|
||||
fini_smu:
|
||||
aie2_smu_fini(ndev);
|
||||
disable_dev:
|
||||
pci_disable_device(pdev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_init(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
|
||||
struct init_config xrs_cfg = { 0 };
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct psp_config psp_conf;
|
||||
const struct firmware *fw;
|
||||
unsigned long bars = 0;
|
||||
int i, nvec, ret;
|
||||
|
||||
ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
|
||||
if (!ndev)
|
||||
return -ENOMEM;
|
||||
|
||||
ndev->priv = xdna->dev_info->dev_priv;
|
||||
ndev->xdna = xdna;
|
||||
|
||||
ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
|
||||
ndev->priv->fw_path, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pcim_enable_device(pdev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
for (i = 0; i < PSP_MAX_REGS; i++)
|
||||
set_bit(PSP_REG_BAR(ndev, i), &bars);
|
||||
|
||||
set_bit(xdna->dev_info->sram_bar, &bars);
|
||||
set_bit(xdna->dev_info->smu_bar, &bars);
|
||||
set_bit(xdna->dev_info->mbox_bar, &bars);
|
||||
|
||||
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
||||
if (!test_bit(i, &bars))
|
||||
continue;
|
||||
tbl[i] = pcim_iomap(pdev, i, 0);
|
||||
if (!tbl[i]) {
|
||||
XDNA_ERR(xdna, "map bar %d failed", i);
|
||||
ret = -ENOMEM;
|
||||
goto release_fw;
|
||||
}
|
||||
}
|
||||
|
||||
ndev->sram_base = tbl[xdna->dev_info->sram_bar];
|
||||
ndev->smu_base = tbl[xdna->dev_info->smu_bar];
|
||||
ndev->mbox_base = tbl[xdna->dev_info->mbox_bar];
|
||||
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
nvec = pci_msix_vec_count(pdev);
|
||||
if (nvec <= 0) {
|
||||
XDNA_ERR(xdna, "does not get number of interrupt vector");
|
||||
ret = -EINVAL;
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
|
||||
if (ret < 0) {
|
||||
XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
|
||||
goto free_irq;
|
||||
}
|
||||
|
||||
psp_conf.fw_size = fw->size;
|
||||
psp_conf.fw_buf = fw->data;
|
||||
for (i = 0; i < PSP_MAX_REGS; i++)
|
||||
psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
|
||||
ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf);
|
||||
if (!ndev->psp_hdl) {
|
||||
XDNA_ERR(xdna, "failed to create psp");
|
||||
ret = -ENOMEM;
|
||||
goto disable_sva;
|
||||
}
|
||||
xdna->dev_handle = ndev;
|
||||
|
||||
ret = aie2_hw_start(xdna);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "start npu failed, ret %d", ret);
|
||||
goto disable_sva;
|
||||
}
|
||||
|
||||
ret = aie2_mgmt_fw_query(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
|
||||
goto stop_hw;
|
||||
}
|
||||
ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
|
||||
|
||||
xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
|
||||
for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
|
||||
xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
|
||||
xrs_cfg.sys_eff_factor = 1;
|
||||
xrs_cfg.ddev = &xdna->ddev;
|
||||
xrs_cfg.actions = &aie2_xrs_actions;
|
||||
xrs_cfg.total_col = ndev->total_col;
|
||||
|
||||
xdna->xrs_hdl = xrsm_init(&xrs_cfg);
|
||||
if (!xdna->xrs_hdl) {
|
||||
XDNA_ERR(xdna, "Initialize resolver failed");
|
||||
ret = -EINVAL;
|
||||
goto stop_hw;
|
||||
}
|
||||
|
||||
ret = aie2_error_async_events_alloc(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
|
||||
goto stop_hw;
|
||||
}
|
||||
|
||||
ret = aie2_error_async_events_send(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
|
||||
goto async_event_free;
|
||||
}
|
||||
|
||||
/* Issue a command to make sure firmware handled async events */
|
||||
ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Re-query firmware version failed");
|
||||
goto async_event_free;
|
||||
}
|
||||
|
||||
release_firmware(fw);
|
||||
return 0;
|
||||
|
||||
async_event_free:
|
||||
aie2_error_async_events_free(ndev);
|
||||
stop_hw:
|
||||
aie2_hw_stop(xdna);
|
||||
disable_sva:
|
||||
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
free_irq:
|
||||
pci_free_irq_vectors(pdev);
|
||||
release_fw:
|
||||
release_firmware(fw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void aie2_fini(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
|
||||
aie2_hw_stop(xdna);
|
||||
aie2_error_async_events_free(ndev);
|
||||
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
pci_free_irq_vectors(pdev);
|
||||
}
|
||||
|
||||
static int aie2_get_aie_status(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_status status;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) {
|
||||
XDNA_ERR(xdna, "Failed to copy AIE request into kernel");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) {
|
||||
XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.",
|
||||
status.buffer_size, ndev->metadata.cols * ndev->metadata.size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer),
|
||||
status.buffer_size, &status.cols_filled);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) {
|
||||
XDNA_ERR(xdna, "Failed to copy AIE request info to user space");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_aie_metadata(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_metadata *meta;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret = 0;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
|
||||
if (!meta)
|
||||
return -ENOMEM;
|
||||
|
||||
meta->col_size = ndev->metadata.size;
|
||||
meta->cols = ndev->metadata.cols;
|
||||
meta->rows = ndev->metadata.rows;
|
||||
|
||||
meta->version.major = ndev->metadata.version.major;
|
||||
meta->version.minor = ndev->metadata.version.minor;
|
||||
|
||||
meta->core.row_count = ndev->metadata.core.row_count;
|
||||
meta->core.row_start = ndev->metadata.core.row_start;
|
||||
meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
|
||||
meta->core.lock_count = ndev->metadata.core.lock_count;
|
||||
meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
|
||||
|
||||
meta->mem.row_count = ndev->metadata.mem.row_count;
|
||||
meta->mem.row_start = ndev->metadata.mem.row_start;
|
||||
meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
|
||||
meta->mem.lock_count = ndev->metadata.mem.lock_count;
|
||||
meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
|
||||
|
||||
meta->shim.row_count = ndev->metadata.shim.row_count;
|
||||
meta->shim.row_start = ndev->metadata.shim.row_start;
|
||||
meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
|
||||
meta->shim.lock_count = ndev->metadata.shim.lock_count;
|
||||
meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(meta);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_aie_version(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_version version;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
version.major = ndev->version.major;
|
||||
version.minor = ndev->version.minor;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_firmware_version(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_firmware_version version;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
|
||||
version.major = xdna->fw_ver.major;
|
||||
version.minor = xdna->fw_ver.minor;
|
||||
version.patch = xdna->fw_ver.sub;
|
||||
version.build = xdna->fw_ver.build;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_power_mode(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_get_power_mode mode = {};
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
mode.power_mode = ndev->pw_mode;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_clock_metadata(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_clock_metadata *clock;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret = 0;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
clock = kzalloc(sizeof(*clock), GFP_KERNEL);
|
||||
if (!clock)
|
||||
return -ENOMEM;
|
||||
|
||||
snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
|
||||
"MP-NPU Clock");
|
||||
clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
|
||||
snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
|
||||
clock->h_clock.freq_mhz = ndev->hclk_freq;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(clock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_hwctx_status(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_hwctx __user *buf;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_drm_query_hwctx *tmp;
|
||||
struct amdxdna_client *tmp_client;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
bool overflow = false;
|
||||
u32 req_bytes = 0;
|
||||
u32 hw_i = 0;
|
||||
int ret = 0;
|
||||
int idx;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
buf = u64_to_user_ptr(args->buffer);
|
||||
list_for_each_entry(tmp_client, &xdna->client_list, node) {
|
||||
idx = srcu_read_lock(&tmp_client->hwctx_srcu);
|
||||
amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
|
||||
req_bytes += sizeof(*tmp);
|
||||
if (args->buffer_size < req_bytes) {
|
||||
/* Continue iterating to get the required size */
|
||||
overflow = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(tmp, 0, sizeof(*tmp));
|
||||
tmp->pid = tmp_client->pid;
|
||||
tmp->context_id = hwctx->id;
|
||||
tmp->start_col = hwctx->start_col;
|
||||
tmp->num_col = hwctx->num_col;
|
||||
tmp->command_submissions = hwctx->priv->seq;
|
||||
tmp->command_completions = hwctx->priv->completed;
|
||||
|
||||
if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) {
|
||||
ret = -EFAULT;
|
||||
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
|
||||
goto out;
|
||||
}
|
||||
hw_i++;
|
||||
}
|
||||
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
|
||||
}
|
||||
|
||||
if (overflow) {
|
||||
XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.",
|
||||
args->buffer_size, req_bytes);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(tmp);
|
||||
args->buffer_size = req_bytes;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
int ret, idx;
|
||||
|
||||
if (!drm_dev_enter(&xdna->ddev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
switch (args->param) {
|
||||
case DRM_AMDXDNA_QUERY_AIE_STATUS:
|
||||
ret = aie2_get_aie_status(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_AIE_METADATA:
|
||||
ret = aie2_get_aie_metadata(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_AIE_VERSION:
|
||||
ret = aie2_get_aie_version(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
|
||||
ret = aie2_get_clock_metadata(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
|
||||
ret = aie2_get_hwctx_status(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
|
||||
ret = aie2_get_firmware_version(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_GET_POWER_MODE:
|
||||
ret = aie2_get_power_mode(client, args);
|
||||
break;
|
||||
default:
|
||||
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
XDNA_DBG(xdna, "Got param %d", args->param);
|
||||
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_set_power_mode(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_set_state *args)
|
||||
{
|
||||
struct amdxdna_drm_set_power_mode power_state;
|
||||
enum amdxdna_power_mode_type power_mode;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
|
||||
if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
|
||||
sizeof(power_state))) {
|
||||
XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
|
||||
return -EINVAL;
|
||||
|
||||
power_mode = power_state.power_mode;
|
||||
if (power_mode > POWER_MODE_TURBO) {
|
||||
XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return aie2_pm_set_mode(xdna->dev_handle, power_mode);
|
||||
}
|
||||
|
||||
static int aie2_set_state(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_set_state *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
int ret, idx;
|
||||
|
||||
if (!drm_dev_enter(&xdna->ddev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
switch (args->param) {
|
||||
case DRM_AMDXDNA_SET_POWER_MODE:
|
||||
ret = aie2_set_power_mode(client, args);
|
||||
break;
|
||||
default:
|
||||
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct amdxdna_dev_ops aie2_ops = {
|
||||
.init = aie2_init,
|
||||
.fini = aie2_fini,
|
||||
.resume = aie2_hw_start,
|
||||
.suspend = aie2_hw_stop,
|
||||
.get_aie_info = aie2_get_info,
|
||||
.set_aie_state = aie2_set_state,
|
||||
.hwctx_init = aie2_hwctx_init,
|
||||
.hwctx_fini = aie2_hwctx_fini,
|
||||
.hwctx_config = aie2_hwctx_config,
|
||||
.cmd_submit = aie2_cmd_submit,
|
||||
.hmm_invalidate = aie2_hmm_invalidate,
|
||||
.hwctx_suspend = aie2_hwctx_suspend,
|
||||
.hwctx_resume = aie2_hwctx_resume,
|
||||
};
|
297
drivers/accel/amdxdna/aie2_pci.h
Normal file
297
drivers/accel/amdxdna/aie2_pci.h
Normal file
@ -0,0 +1,297 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_PCI_H_
|
||||
#define _AIE2_PCI_H_
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#include "amdxdna_mailbox.h"
|
||||
|
||||
#define AIE2_INTERVAL 20000 /* us */
|
||||
#define AIE2_TIMEOUT 1000000 /* us */
|
||||
|
||||
/* Firmware determines device memory base address and size */
|
||||
#define AIE2_DEVM_BASE 0x4000000
|
||||
#define AIE2_DEVM_SIZE SZ_64M
|
||||
|
||||
#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
|
||||
|
||||
#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
|
||||
#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
|
||||
|
||||
#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
|
||||
#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
|
||||
#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
|
||||
|
||||
#define SMU_REG(ndev, idx) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = ndev; \
|
||||
((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
|
||||
})
|
||||
#define SRAM_GET_ADDR(ndev, idx) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = ndev; \
|
||||
((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
|
||||
})
|
||||
|
||||
#define CHAN_SLOT_SZ SZ_8K
|
||||
#define MBOX_SIZE(ndev) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = (ndev); \
|
||||
((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
|
||||
pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
|
||||
})
|
||||
|
||||
enum aie2_smu_reg_idx {
|
||||
SMU_CMD_REG = 0,
|
||||
SMU_ARG_REG,
|
||||
SMU_INTR_REG,
|
||||
SMU_RESP_REG,
|
||||
SMU_OUT_REG,
|
||||
SMU_MAX_REGS /* Keep this at the end */
|
||||
};
|
||||
|
||||
enum aie2_sram_reg_idx {
|
||||
MBOX_CHANN_OFF = 0,
|
||||
FW_ALIVE_OFF,
|
||||
SRAM_MAX_INDEX /* Keep this at the end */
|
||||
};
|
||||
|
||||
enum psp_reg_idx {
|
||||
PSP_CMD_REG = 0,
|
||||
PSP_ARG0_REG,
|
||||
PSP_ARG1_REG,
|
||||
PSP_ARG2_REG,
|
||||
PSP_NUM_IN_REGS, /* number of input registers */
|
||||
PSP_INTR_REG = PSP_NUM_IN_REGS,
|
||||
PSP_STATUS_REG,
|
||||
PSP_RESP_REG,
|
||||
PSP_MAX_REGS /* Keep this at the end */
|
||||
};
|
||||
|
||||
struct amdxdna_client;
|
||||
struct amdxdna_fw_ver;
|
||||
struct amdxdna_hwctx;
|
||||
struct amdxdna_sched_job;
|
||||
|
||||
struct psp_config {
|
||||
const void *fw_buf;
|
||||
u32 fw_size;
|
||||
void __iomem *psp_regs[PSP_MAX_REGS];
|
||||
};
|
||||
|
||||
struct aie_version {
|
||||
u16 major;
|
||||
u16 minor;
|
||||
};
|
||||
|
||||
struct aie_tile_metadata {
|
||||
u16 row_count;
|
||||
u16 row_start;
|
||||
u16 dma_channel_count;
|
||||
u16 lock_count;
|
||||
u16 event_reg_count;
|
||||
};
|
||||
|
||||
struct aie_metadata {
|
||||
u32 size;
|
||||
u16 cols;
|
||||
u16 rows;
|
||||
struct aie_version version;
|
||||
struct aie_tile_metadata core;
|
||||
struct aie_tile_metadata mem;
|
||||
struct aie_tile_metadata shim;
|
||||
};
|
||||
|
||||
enum rt_config_category {
|
||||
AIE2_RT_CFG_INIT,
|
||||
AIE2_RT_CFG_CLK_GATING,
|
||||
};
|
||||
|
||||
struct rt_config {
|
||||
u32 type;
|
||||
u32 value;
|
||||
u32 category;
|
||||
};
|
||||
|
||||
struct dpm_clk_freq {
|
||||
u32 npuclk;
|
||||
u32 hclk;
|
||||
};
|
||||
|
||||
/*
|
||||
* Define the maximum number of pending commands in a hardware context.
|
||||
* Must be power of 2!
|
||||
*/
|
||||
#define HWCTX_MAX_CMDS 4
|
||||
#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
|
||||
struct amdxdna_hwctx_priv {
|
||||
struct amdxdna_gem_obj *heap;
|
||||
void *mbox_chann;
|
||||
|
||||
struct drm_gpu_scheduler sched;
|
||||
struct drm_sched_entity entity;
|
||||
|
||||
struct mutex io_lock; /* protect seq and cmd order */
|
||||
struct wait_queue_head job_free_wq;
|
||||
u32 num_pending;
|
||||
u64 seq;
|
||||
struct semaphore job_sem;
|
||||
bool job_done;
|
||||
|
||||
/* Completed job counter */
|
||||
u64 completed;
|
||||
|
||||
struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
|
||||
struct drm_syncobj *syncobj;
|
||||
};
|
||||
|
||||
enum aie2_dev_status {
|
||||
AIE2_DEV_UNINIT,
|
||||
AIE2_DEV_INIT,
|
||||
AIE2_DEV_START,
|
||||
};
|
||||
|
||||
struct amdxdna_dev_hdl {
|
||||
struct amdxdna_dev *xdna;
|
||||
const struct amdxdna_dev_priv *priv;
|
||||
void __iomem *sram_base;
|
||||
void __iomem *smu_base;
|
||||
void __iomem *mbox_base;
|
||||
struct psp_device *psp_hdl;
|
||||
|
||||
struct xdna_mailbox_chann_res mgmt_x2i;
|
||||
struct xdna_mailbox_chann_res mgmt_i2x;
|
||||
u32 mgmt_chan_idx;
|
||||
u32 mgmt_prot_major;
|
||||
u32 mgmt_prot_minor;
|
||||
|
||||
u32 total_col;
|
||||
struct aie_version version;
|
||||
struct aie_metadata metadata;
|
||||
|
||||
/* power management and clock*/
|
||||
enum amdxdna_power_mode_type pw_mode;
|
||||
u32 dpm_level;
|
||||
u32 dft_dpm_level;
|
||||
u32 max_dpm_level;
|
||||
u32 clk_gating;
|
||||
u32 npuclk_freq;
|
||||
u32 hclk_freq;
|
||||
|
||||
/* Mailbox and the management channel */
|
||||
struct mailbox *mbox;
|
||||
struct mailbox_channel *mgmt_chann;
|
||||
struct async_events *async_events;
|
||||
|
||||
enum aie2_dev_status dev_status;
|
||||
u32 hwctx_num;
|
||||
};
|
||||
|
||||
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
|
||||
[reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
|
||||
|
||||
struct aie2_bar_off_pair {
|
||||
int bar_idx;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
struct aie2_hw_ops {
|
||||
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
};
|
||||
|
||||
struct amdxdna_dev_priv {
|
||||
const char *fw_path;
|
||||
u64 protocol_major;
|
||||
u64 protocol_minor;
|
||||
const struct rt_config *rt_config;
|
||||
const struct dpm_clk_freq *dpm_clk_tbl;
|
||||
|
||||
#define COL_ALIGN_NONE 0
|
||||
#define COL_ALIGN_NATURE 1
|
||||
u32 col_align;
|
||||
u32 mbox_dev_addr;
|
||||
/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
|
||||
u32 mbox_size;
|
||||
u32 sram_dev_addr;
|
||||
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
|
||||
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
|
||||
struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
|
||||
struct aie2_hw_ops hw_ops;
|
||||
};
|
||||
|
||||
extern const struct amdxdna_dev_ops aie2_ops;
|
||||
|
||||
int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
|
||||
enum rt_config_category category, u32 *val);
|
||||
|
||||
/* aie2 npu hw config */
|
||||
extern const struct dpm_clk_freq npu1_dpm_clk_table[];
|
||||
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
|
||||
extern const struct rt_config npu1_default_rt_cfg[];
|
||||
extern const struct rt_config npu4_default_rt_cfg[];
|
||||
|
||||
/* aie2_smu.c */
|
||||
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
|
||||
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
|
||||
int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
|
||||
/* aie2_pm.c */
|
||||
int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
|
||||
|
||||
/* aie2_psp.c */
|
||||
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
|
||||
int aie2_psp_start(struct psp_device *psp);
|
||||
void aie2_psp_stop(struct psp_device *psp);
|
||||
|
||||
/* aie2_error.c */
|
||||
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
|
||||
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_error_async_msg_thread(void *data);
|
||||
|
||||
/* aie2_message.c */
|
||||
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
|
||||
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
|
||||
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
|
||||
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
|
||||
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
|
||||
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
|
||||
struct amdxdna_fw_ver *fw_ver);
|
||||
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
|
||||
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
|
||||
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
|
||||
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
|
||||
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
|
||||
void *handle, int (*cb)(void*, const u32 *, size_t));
|
||||
int aie2_config_cu(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
|
||||
/* aie2_hwctx.c */
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
|
||||
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
|
||||
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
|
||||
void aie2_restart_ctx(struct amdxdna_client *client);
|
||||
|
||||
#endif /* _AIE2_PCI_H_ */
|
108
drivers/accel/amdxdna/aie2_pm.c
Normal file
108
drivers/accel/amdxdna/aie2_pm.c
Normal file
@ -0,0 +1,108 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define AIE2_CLK_GATING_ENABLE 1
|
||||
#define AIE2_CLK_GATING_DISABLE 0
|
||||
|
||||
static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->clk_gating = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (ndev->dev_status != AIE2_DEV_UNINIT) {
|
||||
/* Resume device */
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
|
||||
ndev->max_dpm_level++;
|
||||
ndev->max_dpm_level--;
|
||||
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->pw_mode = POWER_MODE_DEFAULT;
|
||||
ndev->dft_dpm_level = ndev->max_dpm_level;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
u32 clk_gating, dpm_level;
|
||||
int ret;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
if (ndev->pw_mode == target)
|
||||
return 0;
|
||||
|
||||
switch (target) {
|
||||
case POWER_MODE_TURBO:
|
||||
if (ndev->hwctx_num) {
|
||||
XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
clk_gating = AIE2_CLK_GATING_DISABLE;
|
||||
dpm_level = ndev->max_dpm_level;
|
||||
break;
|
||||
case POWER_MODE_HIGH:
|
||||
clk_gating = AIE2_CLK_GATING_ENABLE;
|
||||
dpm_level = ndev->max_dpm_level;
|
||||
break;
|
||||
case POWER_MODE_DEFAULT:
|
||||
clk_gating = AIE2_CLK_GATING_ENABLE;
|
||||
dpm_level = ndev->dft_dpm_level;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, clk_gating);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->pw_mode = target;
|
||||
|
||||
return 0;
|
||||
}
|
146
drivers/accel/amdxdna/aie2_psp.c
Normal file
146
drivers/accel/amdxdna/aie2_psp.c
Normal file
@ -0,0 +1,146 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/iopoll.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define PSP_STATUS_READY BIT(31)
|
||||
|
||||
/* PSP commands */
|
||||
#define PSP_VALIDATE 1
|
||||
#define PSP_START 2
|
||||
#define PSP_RELEASE_TMR 3
|
||||
|
||||
/* PSP special arguments */
|
||||
#define PSP_START_COPY_FW 1
|
||||
|
||||
/* PSP response error code */
|
||||
#define PSP_ERROR_CANCEL 0xFFFF0002
|
||||
#define PSP_ERROR_BAD_STATE 0xFFFF0007
|
||||
|
||||
#define PSP_FW_ALIGN 0x10000
|
||||
#define PSP_POLL_INTERVAL 20000 /* us */
|
||||
#define PSP_POLL_TIMEOUT 1000000 /* us */
|
||||
|
||||
#define PSP_REG(p, reg) ((p)->psp_regs[reg])
|
||||
|
||||
struct psp_device {
|
||||
struct drm_device *ddev;
|
||||
struct psp_config conf;
|
||||
u32 fw_buf_sz;
|
||||
u64 fw_paddr;
|
||||
void *fw_buffer;
|
||||
void __iomem *psp_regs[PSP_MAX_REGS];
|
||||
};
|
||||
|
||||
static int psp_exec(struct psp_device *psp, u32 *reg_vals)
|
||||
{
|
||||
u32 resp_code;
|
||||
int ret, i;
|
||||
u32 ready;
|
||||
|
||||
/* Write command and argument registers */
|
||||
for (i = 0; i < PSP_NUM_IN_REGS; i++)
|
||||
writel(reg_vals[i], PSP_REG(psp, i));
|
||||
|
||||
/* clear and set PSP INTR register to kick off */
|
||||
writel(0, PSP_REG(psp, PSP_INTR_REG));
|
||||
writel(1, PSP_REG(psp, PSP_INTR_REG));
|
||||
|
||||
/* PSP should be busy. Wait for ready, so we know task is done. */
|
||||
ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
|
||||
FIELD_GET(PSP_STATUS_READY, ready),
|
||||
PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
|
||||
if (resp_code) {
|
||||
drm_err(psp->ddev, "fw return error 0x%x", resp_code);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aie2_psp_stop(struct psp_device *psp)
|
||||
{
|
||||
u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
|
||||
int ret;
|
||||
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret)
|
||||
drm_err(psp->ddev, "release tmr failed, ret %d", ret);
|
||||
}
|
||||
|
||||
int aie2_psp_start(struct psp_device *psp)
|
||||
{
|
||||
u32 reg_vals[PSP_NUM_IN_REGS];
|
||||
int ret;
|
||||
|
||||
reg_vals[0] = PSP_VALIDATE;
|
||||
reg_vals[1] = lower_32_bits(psp->fw_paddr);
|
||||
reg_vals[2] = upper_32_bits(psp->fw_paddr);
|
||||
reg_vals[3] = psp->fw_buf_sz;
|
||||
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(reg_vals, 0, sizeof(reg_vals));
|
||||
reg_vals[0] = PSP_START;
|
||||
reg_vals[1] = PSP_START_COPY_FW;
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "failed to start fw, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf)
|
||||
{
|
||||
struct psp_device *psp;
|
||||
u64 offset;
|
||||
|
||||
psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
|
||||
if (!psp)
|
||||
return NULL;
|
||||
|
||||
psp->ddev = ddev;
|
||||
memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
|
||||
|
||||
psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
|
||||
psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL);
|
||||
if (!psp->fw_buffer) {
|
||||
drm_err(ddev, "no memory for fw buffer");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD Platform Security Processor(PSP) requires host physical
|
||||
* address to load NPU firmware.
|
||||
*/
|
||||
psp->fw_paddr = virt_to_phys(psp->fw_buffer);
|
||||
offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
|
||||
psp->fw_paddr += offset;
|
||||
memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
|
||||
|
||||
return psp;
|
||||
}
|
134
drivers/accel/amdxdna/aie2_smu.c
Normal file
134
drivers/accel/amdxdna/aie2_smu.c
Normal file
@ -0,0 +1,134 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iopoll.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define SMU_RESULT_OK 1
|
||||
|
||||
/* SMU commands */
|
||||
#define AIE2_SMU_POWER_ON 0x3
|
||||
#define AIE2_SMU_POWER_OFF 0x4
|
||||
#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
|
||||
#define AIE2_SMU_SET_HCLK_FREQ 0x6
|
||||
#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
|
||||
#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
|
||||
|
||||
static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
|
||||
u32 reg_arg, u32 *out)
|
||||
{
|
||||
u32 resp;
|
||||
int ret;
|
||||
|
||||
writel(0, SMU_REG(ndev, SMU_RESP_REG));
|
||||
writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG));
|
||||
writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG));
|
||||
|
||||
/* Clear and set SMU_INTR_REG to kick off */
|
||||
writel(0, SMU_REG(ndev, SMU_INTR_REG));
|
||||
writel(1, SMU_REG(ndev, SMU_INTR_REG));
|
||||
|
||||
ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp,
|
||||
resp, AIE2_INTERVAL, AIE2_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (out)
|
||||
*out = readl(SMU_REG(ndev, SMU_OUT_REG));
|
||||
|
||||
if (resp != SMU_RESULT_OK) {
|
||||
XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
|
||||
{
|
||||
u32 freq;
|
||||
int ret;
|
||||
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
|
||||
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
|
||||
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
|
||||
}
|
||||
ndev->npuclk_freq = freq;
|
||||
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
|
||||
ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
|
||||
ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
|
||||
}
|
||||
ndev->hclk_freq = freq;
|
||||
ndev->dpm_level = dpm_level;
|
||||
|
||||
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
|
||||
ndev->npuclk_freq, ndev->hclk_freq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
|
||||
dpm_level, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
|
||||
dpm_level, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
|
||||
ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
|
||||
ndev->dpm_level = dpm_level;
|
||||
|
||||
XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
|
||||
ndev->npuclk_freq, ndev->hclk_freq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ndev->priv->hw_ops.set_dpm(ndev, 0);
|
||||
ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
|
||||
if (ret)
|
||||
XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
|
||||
}
|
380
drivers/accel/amdxdna/aie2_solver.c
Normal file
380
drivers/accel/amdxdna/aie2_solver.c
Normal file
@ -0,0 +1,380 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "aie2_solver.h"
|
||||
|
||||
struct partition_node {
|
||||
struct list_head list;
|
||||
u32 nshared; /* # shared requests */
|
||||
u32 start_col; /* start column */
|
||||
u32 ncols; /* # columns */
|
||||
bool exclusive; /* can not be shared if set */
|
||||
};
|
||||
|
||||
struct solver_node {
|
||||
struct list_head list;
|
||||
u64 rid; /* Request ID from consumer */
|
||||
|
||||
struct partition_node *pt_node;
|
||||
void *cb_arg;
|
||||
u32 dpm_level;
|
||||
u32 cols_len;
|
||||
u32 start_cols[] __counted_by(cols_len);
|
||||
};
|
||||
|
||||
struct solver_rgroup {
|
||||
u32 rgid;
|
||||
u32 nnode;
|
||||
u32 npartition_node;
|
||||
|
||||
DECLARE_BITMAP(resbit, XRS_MAX_COL);
|
||||
struct list_head node_list;
|
||||
struct list_head pt_node_list;
|
||||
};
|
||||
|
||||
struct solver_state {
|
||||
struct solver_rgroup rgp;
|
||||
struct init_config cfg;
|
||||
struct xrs_action_ops *actions;
|
||||
};
|
||||
|
||||
static u32 calculate_gops(struct aie_qos *rqos)
|
||||
{
|
||||
u32 service_rate = 0;
|
||||
|
||||
if (rqos->latency)
|
||||
service_rate = (1000 / rqos->latency);
|
||||
|
||||
if (rqos->fps > service_rate)
|
||||
return rqos->fps * rqos->gops;
|
||||
|
||||
return service_rate * rqos->gops;
|
||||
}
|
||||
|
||||
/*
|
||||
* qos_meet() - Check the QOS request can be met.
|
||||
*/
|
||||
static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
|
||||
{
|
||||
u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
|
||||
|
||||
if (request_gops <= cgops)
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* sanity_check() - Do a basic sanity check on allocation request.
|
||||
*/
|
||||
static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
|
||||
{
|
||||
struct cdo_parts *cdop = &req->cdo;
|
||||
struct aie_qos *rqos = &req->rqos;
|
||||
u32 cu_clk_freq;
|
||||
|
||||
if (cdop->ncols > xrs->cfg.total_col)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We can find at least one CDOs groups that meet the
|
||||
* GOPs requirement.
|
||||
*/
|
||||
cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
|
||||
|
||||
if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
|
||||
{
|
||||
/*
|
||||
* gops is retrieved from the xmodel, so it's always set
|
||||
* fps and latency are the configurable params from the application
|
||||
*/
|
||||
if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
|
||||
{
|
||||
struct solver_rgroup *rgp = &xrs->rgp;
|
||||
struct cdo_parts *cdop = &req->cdo;
|
||||
struct aie_qos *rqos = &req->rqos;
|
||||
u32 freq, max_dpm_level, level;
|
||||
struct solver_node *node;
|
||||
|
||||
max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
|
||||
/* If no QoS parameters are passed, set it to the max DPM level */
|
||||
if (!is_valid_qos_dpm_params(rqos)) {
|
||||
level = max_dpm_level;
|
||||
goto set_dpm;
|
||||
}
|
||||
|
||||
/* Find one CDO group that meet the GOPs requirement. */
|
||||
for (level = 0; level < max_dpm_level; level++) {
|
||||
freq = xrs->cfg.clk_list.cu_clk_list[level];
|
||||
if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
|
||||
break;
|
||||
}
|
||||
|
||||
/* set the dpm level which fits all the sessions */
|
||||
list_for_each_entry(node, &rgp->node_list, list) {
|
||||
if (node->dpm_level > level)
|
||||
level = node->dpm_level;
|
||||
}
|
||||
|
||||
set_dpm:
|
||||
*dpm_level = level;
|
||||
return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
|
||||
}
|
||||
|
||||
static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
|
||||
{
|
||||
struct solver_node *node;
|
||||
|
||||
list_for_each_entry(node, &rgp->node_list, list) {
|
||||
if (node->rid == rid)
|
||||
return node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void remove_partition_node(struct solver_rgroup *rgp,
|
||||
struct partition_node *pt_node)
|
||||
{
|
||||
pt_node->nshared--;
|
||||
if (pt_node->nshared > 0)
|
||||
return;
|
||||
|
||||
list_del(&pt_node->list);
|
||||
rgp->npartition_node--;
|
||||
|
||||
bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
|
||||
kfree(pt_node);
|
||||
}
|
||||
|
||||
static void remove_solver_node(struct solver_rgroup *rgp,
|
||||
struct solver_node *node)
|
||||
{
|
||||
list_del(&node->list);
|
||||
rgp->nnode--;
|
||||
|
||||
if (node->pt_node)
|
||||
remove_partition_node(rgp, node->pt_node);
|
||||
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
static int get_free_partition(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct partition_node *pt_node;
|
||||
u32 ncols = req->cdo.ncols;
|
||||
u32 col, i;
|
||||
|
||||
for (i = 0; i < snode->cols_len; i++) {
|
||||
col = snode->start_cols[i];
|
||||
if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == snode->cols_len)
|
||||
return -ENODEV;
|
||||
|
||||
pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
|
||||
if (!pt_node)
|
||||
return -ENOMEM;
|
||||
|
||||
pt_node->nshared = 1;
|
||||
pt_node->start_col = col;
|
||||
pt_node->ncols = ncols;
|
||||
|
||||
/*
|
||||
* Always set exclusive to false for now.
|
||||
*/
|
||||
pt_node->exclusive = false;
|
||||
|
||||
list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
|
||||
xrs->rgp.npartition_node++;
|
||||
bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
|
||||
|
||||
snode->pt_node = pt_node;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int allocate_partition(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct partition_node *pt_node, *rpt_node = NULL;
|
||||
int idx, ret;
|
||||
|
||||
ret = get_free_partition(xrs, snode, req);
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
/* try to get a share-able partition */
|
||||
list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
|
||||
if (pt_node->exclusive)
|
||||
continue;
|
||||
|
||||
if (rpt_node && pt_node->nshared >= rpt_node->nshared)
|
||||
continue;
|
||||
|
||||
for (idx = 0; idx < snode->cols_len; idx++) {
|
||||
if (snode->start_cols[idx] != pt_node->start_col)
|
||||
continue;
|
||||
|
||||
if (req->cdo.ncols != pt_node->ncols)
|
||||
continue;
|
||||
|
||||
rpt_node = pt_node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rpt_node)
|
||||
return -ENODEV;
|
||||
|
||||
rpt_node->nshared++;
|
||||
snode->pt_node = rpt_node;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct solver_node *create_solver_node(struct solver_state *xrs,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct cdo_parts *cdop = &req->cdo;
|
||||
struct solver_node *node;
|
||||
int ret;
|
||||
|
||||
node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
node->rid = req->rid;
|
||||
node->cols_len = cdop->cols_len;
|
||||
memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
|
||||
|
||||
ret = allocate_partition(xrs, node, req);
|
||||
if (ret)
|
||||
goto free_node;
|
||||
|
||||
list_add_tail(&node->list, &xrs->rgp.node_list);
|
||||
xrs->rgp.nnode++;
|
||||
return node;
|
||||
|
||||
free_node:
|
||||
kfree(node);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void fill_load_action(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct xrs_action_load *action)
|
||||
{
|
||||
action->rid = snode->rid;
|
||||
action->part.start_col = snode->pt_node->start_col;
|
||||
action->part.ncols = snode->pt_node->ncols;
|
||||
}
|
||||
|
||||
int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
|
||||
{
|
||||
struct xrs_action_load load_act;
|
||||
struct solver_node *snode;
|
||||
struct solver_state *xrs;
|
||||
u32 dpm_level;
|
||||
int ret;
|
||||
|
||||
xrs = (struct solver_state *)hdl;
|
||||
|
||||
ret = sanity_check(xrs, req);
|
||||
if (ret) {
|
||||
drm_err(xrs->cfg.ddev, "invalid request");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (rg_search_node(&xrs->rgp, req->rid)) {
|
||||
drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
snode = create_solver_node(xrs, req);
|
||||
if (IS_ERR(snode))
|
||||
return PTR_ERR(snode);
|
||||
|
||||
fill_load_action(xrs, snode, &load_act);
|
||||
ret = xrs->cfg.actions->load(cb_arg, &load_act);
|
||||
if (ret)
|
||||
goto free_node;
|
||||
|
||||
ret = set_dpm_level(xrs, req, &dpm_level);
|
||||
if (ret)
|
||||
goto free_node;
|
||||
|
||||
snode->dpm_level = dpm_level;
|
||||
snode->cb_arg = cb_arg;
|
||||
|
||||
drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
|
||||
snode->pt_node->start_col, snode->pt_node->ncols);
|
||||
|
||||
return 0;
|
||||
|
||||
free_node:
|
||||
remove_solver_node(&xrs->rgp, snode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xrs_release_resource(void *hdl, u64 rid)
|
||||
{
|
||||
struct solver_state *xrs = hdl;
|
||||
struct solver_node *node;
|
||||
|
||||
node = rg_search_node(&xrs->rgp, rid);
|
||||
if (!node) {
|
||||
drm_err(xrs->cfg.ddev, "node not exist");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xrs->cfg.actions->unload(node->cb_arg);
|
||||
remove_solver_node(&xrs->rgp, node);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *xrsm_init(struct init_config *cfg)
|
||||
{
|
||||
struct solver_rgroup *rgp;
|
||||
struct solver_state *xrs;
|
||||
|
||||
xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
|
||||
if (!xrs)
|
||||
return NULL;
|
||||
|
||||
memcpy(&xrs->cfg, cfg, sizeof(*cfg));
|
||||
|
||||
rgp = &xrs->rgp;
|
||||
INIT_LIST_HEAD(&rgp->node_list);
|
||||
INIT_LIST_HEAD(&rgp->pt_node_list);
|
||||
|
||||
return xrs;
|
||||
}
|
155
drivers/accel/amdxdna/aie2_solver.h
Normal file
155
drivers/accel/amdxdna/aie2_solver.h
Normal file
@ -0,0 +1,155 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_SOLVER_H
|
||||
#define _AIE2_SOLVER_H
|
||||
|
||||
#define XRS_MAX_COL 128
|
||||
|
||||
/*
|
||||
* Structure used to describe a partition. A partition is column based
|
||||
* allocation unit described by its start column and number of columns.
|
||||
*/
|
||||
struct aie_part {
|
||||
u32 start_col;
|
||||
u32 ncols;
|
||||
};
|
||||
|
||||
/*
|
||||
* The QoS capabilities of a given AIE partition.
|
||||
*/
|
||||
struct aie_qos_cap {
|
||||
u32 opc; /* operations per cycle */
|
||||
u32 dma_bw; /* DMA bandwidth */
|
||||
};
|
||||
|
||||
/*
|
||||
* QoS requirement of a resource allocation.
|
||||
*/
|
||||
struct aie_qos {
|
||||
u32 gops; /* Giga operations */
|
||||
u32 fps; /* Frames per second */
|
||||
u32 dma_bw; /* DMA bandwidth */
|
||||
u32 latency; /* Frame response latency */
|
||||
u32 exec_time; /* Frame execution time */
|
||||
u32 priority; /* Request priority */
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe a relocatable CDO (Configuration Data Object).
|
||||
*/
|
||||
struct cdo_parts {
|
||||
u32 *start_cols; /* Start column array */
|
||||
u32 cols_len; /* Length of start column array */
|
||||
u32 ncols; /* # of column */
|
||||
struct aie_qos_cap qos_cap; /* CDO QoS capabilities */
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe a request to allocate.
|
||||
*/
|
||||
struct alloc_requests {
|
||||
u64 rid;
|
||||
struct cdo_parts cdo;
|
||||
struct aie_qos rqos; /* Requested QoS */
|
||||
};
|
||||
|
||||
/*
|
||||
* Load callback argument
|
||||
*/
|
||||
struct xrs_action_load {
|
||||
u32 rid;
|
||||
struct aie_part part;
|
||||
};
|
||||
|
||||
/*
|
||||
* Define the power level available
|
||||
*
|
||||
* POWER_LEVEL_MIN:
|
||||
* Lowest power level. Usually set when all actions are unloaded.
|
||||
*
|
||||
* POWER_LEVEL_n
|
||||
* Power levels 0 - n, is a step increase in system frequencies
|
||||
*/
|
||||
enum power_level {
|
||||
POWER_LEVEL_MIN = 0x0,
|
||||
POWER_LEVEL_0 = 0x1,
|
||||
POWER_LEVEL_1 = 0x2,
|
||||
POWER_LEVEL_2 = 0x3,
|
||||
POWER_LEVEL_3 = 0x4,
|
||||
POWER_LEVEL_4 = 0x5,
|
||||
POWER_LEVEL_5 = 0x6,
|
||||
POWER_LEVEL_6 = 0x7,
|
||||
POWER_LEVEL_7 = 0x8,
|
||||
POWER_LEVEL_NUM,
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe the frequency table.
|
||||
* Resource solver chooses the frequency from the table
|
||||
* to meet the QOS requirements.
|
||||
*/
|
||||
struct clk_list_info {
|
||||
u32 num_levels; /* available power levels */
|
||||
u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/
|
||||
};
|
||||
|
||||
struct xrs_action_ops {
|
||||
int (*load)(void *cb_arg, struct xrs_action_load *action);
|
||||
int (*unload)(void *cb_arg);
|
||||
int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe information for solver during initialization.
|
||||
*/
|
||||
struct init_config {
|
||||
u32 total_col;
|
||||
u32 sys_eff_factor; /* system efficiency factor */
|
||||
u32 latency_adj; /* latency adjustment in ms */
|
||||
struct clk_list_info clk_list; /* List of frequencies available in system */
|
||||
struct drm_device *ddev;
|
||||
struct xrs_action_ops *actions;
|
||||
};
|
||||
|
||||
/*
|
||||
* xrsm_init() - Register resource solver. Resource solver client needs
|
||||
* to call this function to register itself.
|
||||
*
|
||||
* @cfg: The system metrics for resource solver to use
|
||||
*
|
||||
* Return: A resource solver handle
|
||||
*
|
||||
* Note: We should only create one handle per AIE array to be managed.
|
||||
*/
|
||||
void *xrsm_init(struct init_config *cfg);
|
||||
|
||||
/*
|
||||
* xrs_allocate_resource() - Request to allocate resources for a given context
|
||||
* and a partition metadata. (See struct part_meta)
|
||||
*
|
||||
* @hdl: Resource solver handle obtained from xrs_init()
|
||||
* @req: Input to the Resource solver including request id
|
||||
* and partition metadata.
|
||||
* @cb_arg: callback argument pointer
|
||||
*
|
||||
* Return: 0 when successful.
|
||||
* Or standard error number when failing
|
||||
*
|
||||
* Note:
|
||||
* There is no lock mechanism inside resource solver. So it is
|
||||
* the caller's responsibility to lock down XCLBINs and grab
|
||||
* necessary lock.
|
||||
*/
|
||||
int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg);
|
||||
|
||||
/*
|
||||
* xrs_release_resource() - Request to free resources for a given context.
|
||||
*
|
||||
* @hdl: Resource solver handle obtained from xrs_init()
|
||||
* @rid: The Request ID to identify the requesting context
|
||||
*/
|
||||
int xrs_release_resource(void *hdl, u64 rid);
|
||||
#endif /* _AIE2_SOLVER_H */
|
550
drivers/accel/amdxdna/amdxdna_ctx.c
Normal file
550
drivers/accel/amdxdna/amdxdna_ctx.c
Normal file
@ -0,0 +1,550 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_file.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define MAX_HWCTX_ID 255
|
||||
#define MAX_ARG_COUNT 4095
|
||||
|
||||
struct amdxdna_fence {
|
||||
struct dma_fence base;
|
||||
spinlock_t lock; /* for base */
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
};
|
||||
|
||||
static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return KBUILD_MODNAME;
|
||||
}
|
||||
|
||||
static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
|
||||
{
|
||||
struct amdxdna_fence *xdna_fence;
|
||||
|
||||
xdna_fence = container_of(fence, struct amdxdna_fence, base);
|
||||
|
||||
return xdna_fence->hwctx->name;
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops fence_ops = {
|
||||
.get_driver_name = amdxdna_fence_get_driver_name,
|
||||
.get_timeline_name = amdxdna_fence_get_timeline_name,
|
||||
};
|
||||
|
||||
static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_fence *fence;
|
||||
|
||||
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
fence->hwctx = hwctx;
|
||||
spin_lock_init(&fence->lock);
|
||||
dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
|
||||
return &fence->base;
|
||||
}
|
||||
|
||||
void amdxdna_hwctx_suspend(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
|
||||
xdna->dev_info->ops->hwctx_suspend(hwctx);
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
void amdxdna_hwctx_resume(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
|
||||
xdna->dev_info->ops->hwctx_resume(hwctx);
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
|
||||
struct srcu_struct *ss)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
synchronize_srcu(ss);
|
||||
|
||||
/* At this point, user is not able to submit new commands */
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
xdna->dev_info->ops->hwctx_fini(hwctx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
kfree(hwctx->name);
|
||||
kfree(hwctx);
|
||||
}
|
||||
|
||||
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
u32 num_masks, count;
|
||||
|
||||
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
|
||||
num_masks = 0;
|
||||
else
|
||||
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
|
||||
|
||||
if (size) {
|
||||
count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
|
||||
if (unlikely(count <= num_masks)) {
|
||||
*size = 0;
|
||||
return NULL;
|
||||
}
|
||||
*size = (count - num_masks) * sizeof(u32);
|
||||
}
|
||||
return &cmd->data[num_masks];
|
||||
}
|
||||
|
||||
int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
u32 num_masks, i;
|
||||
u32 *cu_mask;
|
||||
|
||||
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
|
||||
return -1;
|
||||
|
||||
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
|
||||
cu_mask = cmd->data;
|
||||
for (i = 0; i < num_masks; i++) {
|
||||
if (cu_mask[i])
|
||||
return ffs(cu_mask[i]) - 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should be called in close() and remove(). DO NOT call in other syscalls.
|
||||
* This guarantee that when hwctx and resources will be released, if user
|
||||
* doesn't call amdxdna_drm_destroy_hwctx_ioctl.
|
||||
*/
|
||||
void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
|
||||
XDNA_DBG(client->xdna, "PID %d close HW context %d",
|
||||
client->pid, hwctx->id);
|
||||
xa_erase(&client->hwctx_xa, hwctx->id);
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
}
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_drm_create_hwctx *args = data;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret, idx;
|
||||
|
||||
if (args->ext || args->ext_flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (!drm_dev_enter(dev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
|
||||
if (!hwctx) {
|
||||
ret = -ENOMEM;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
|
||||
XDNA_ERR(xdna, "Access QoS info failed");
|
||||
ret = -EFAULT;
|
||||
goto free_hwctx;
|
||||
}
|
||||
|
||||
hwctx->client = client;
|
||||
hwctx->fw_ctx_id = -1;
|
||||
hwctx->num_tiles = args->num_tiles;
|
||||
hwctx->mem_size = args->mem_size;
|
||||
hwctx->max_opc = args->max_opc;
|
||||
ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
|
||||
XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
|
||||
&client->next_hwctxid, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
|
||||
goto free_hwctx;
|
||||
}
|
||||
|
||||
hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
|
||||
if (!hwctx->name) {
|
||||
ret = -ENOMEM;
|
||||
goto rm_id;
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = xdna->dev_info->ops->hwctx_init(hwctx);
|
||||
if (ret) {
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
|
||||
goto free_name;
|
||||
}
|
||||
args->handle = hwctx->id;
|
||||
args->syncobj_handle = hwctx->syncobj_hdl;
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
|
||||
drm_dev_exit(idx);
|
||||
return 0;
|
||||
|
||||
free_name:
|
||||
kfree(hwctx->name);
|
||||
rm_id:
|
||||
xa_erase(&client->hwctx_xa, hwctx->id);
|
||||
free_hwctx:
|
||||
kfree(hwctx);
|
||||
exit:
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_drm_destroy_hwctx *args = data;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret = 0, idx;
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!drm_dev_enter(dev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
hwctx = xa_erase(&client->hwctx_xa, args->handle);
|
||||
if (!hwctx) {
|
||||
ret = -EINVAL;
|
||||
XDNA_DBG(xdna, "PID %d HW context %d not exist",
|
||||
client->pid, args->handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The pushed jobs are handled by DRM scheduler during destroy.
|
||||
* SRCU to synchronize with exec command ioctls.
|
||||
*/
|
||||
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
|
||||
|
||||
XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
|
||||
out:
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_drm_config_hwctx *args = data;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret, idx;
|
||||
u32 buf_size;
|
||||
void *buf;
|
||||
u64 val;
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!xdna->dev_info->ops->hwctx_config)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
val = args->param_val;
|
||||
buf_size = args->param_val_size;
|
||||
|
||||
switch (args->param_type) {
|
||||
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
|
||||
/* For those types that param_val is pointer */
|
||||
if (buf_size > PAGE_SIZE) {
|
||||
XDNA_ERR(xdna, "Config CU param buffer too large");
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
/* Hwctx needs to keep buf */
|
||||
buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
|
||||
kfree(buf);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
break;
|
||||
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
|
||||
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
|
||||
/* For those types that param_val is a value */
|
||||
buf = NULL;
|
||||
buf_size = 0;
|
||||
break;
|
||||
default:
|
||||
XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
hwctx = xa_load(&client->hwctx_xa, args->handle);
|
||||
if (!hwctx) {
|
||||
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
|
||||
ret = -EINVAL;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
|
||||
|
||||
unlock_srcu:
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
kfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
if (!job->bos[i])
|
||||
break;
|
||||
drm_gem_object_put(job->bos[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
amdxdna_arg_bos_lookup(struct amdxdna_client *client,
|
||||
struct amdxdna_sched_job *job,
|
||||
u32 *bo_hdls, u32 bo_cnt)
|
||||
{
|
||||
struct drm_gem_object *gobj;
|
||||
int i, ret;
|
||||
|
||||
job->bo_cnt = bo_cnt;
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
|
||||
if (!gobj) {
|
||||
ret = -ENOENT;
|
||||
goto put_shmem_bo;
|
||||
}
|
||||
abo = to_xdna_obj(gobj);
|
||||
|
||||
mutex_lock(&abo->lock);
|
||||
if (abo->pinned) {
|
||||
mutex_unlock(&abo->lock);
|
||||
job->bos[i] = gobj;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = amdxdna_gem_pin_nolock(abo);
|
||||
if (ret) {
|
||||
mutex_unlock(&abo->lock);
|
||||
drm_gem_object_put(gobj);
|
||||
goto put_shmem_bo;
|
||||
}
|
||||
abo->pinned = true;
|
||||
mutex_unlock(&abo->lock);
|
||||
|
||||
job->bos[i] = gobj;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
put_shmem_bo:
|
||||
amdxdna_arg_bos_put(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
|
||||
{
|
||||
trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
|
||||
amdxdna_arg_bos_put(job);
|
||||
amdxdna_gem_put_obj(job->cmd_bo);
|
||||
}
|
||||
|
||||
int amdxdna_cmd_submit(struct amdxdna_client *client,
|
||||
u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
|
||||
u32 hwctx_hdl, u64 *seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_sched_job *job;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret, idx;
|
||||
|
||||
XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
|
||||
job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
|
||||
if (!job)
|
||||
return -ENOMEM;
|
||||
|
||||
if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
|
||||
job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
|
||||
if (!job->cmd_bo) {
|
||||
XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
|
||||
ret = -EINVAL;
|
||||
goto free_job;
|
||||
}
|
||||
} else {
|
||||
job->cmd_bo = NULL;
|
||||
}
|
||||
|
||||
ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
|
||||
goto cmd_put;
|
||||
}
|
||||
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
|
||||
if (!hwctx) {
|
||||
XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
|
||||
client->pid, hwctx_hdl);
|
||||
ret = -EINVAL;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY) {
|
||||
XDNA_ERR(xdna, "HW Context is not ready");
|
||||
ret = -EINVAL;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
job->hwctx = hwctx;
|
||||
job->mm = current->mm;
|
||||
|
||||
job->fence = amdxdna_fence_create(hwctx);
|
||||
if (!job->fence) {
|
||||
XDNA_ERR(xdna, "Failed to create fence");
|
||||
ret = -ENOMEM;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
kref_init(&job->refcnt);
|
||||
|
||||
ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
|
||||
if (ret)
|
||||
goto put_fence;
|
||||
|
||||
/*
|
||||
* The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
|
||||
* resource after synchronize_srcu(). The submitted jobs should be
|
||||
* handled by the queue, for example DRM scheduler, in device layer.
|
||||
* For here we can unlock SRCU.
|
||||
*/
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
|
||||
|
||||
return 0;
|
||||
|
||||
put_fence:
|
||||
dma_fence_put(job->fence);
|
||||
unlock_srcu:
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
amdxdna_arg_bos_put(job);
|
||||
cmd_put:
|
||||
amdxdna_gem_put_obj(job->cmd_bo);
|
||||
free_job:
|
||||
kfree(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The submit command ioctl submits a command to firmware. One firmware command
|
||||
* may contain multiple command BOs for processing as a whole.
|
||||
* The command sequence number is returned which can be used for wait command ioctl.
|
||||
*/
|
||||
static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_exec_cmd *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
u32 *arg_bo_hdls;
|
||||
u32 cmd_bo_hdl;
|
||||
int ret;
|
||||
|
||||
if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
|
||||
XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Only support single command for now. */
|
||||
if (args->cmd_count != 1) {
|
||||
XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cmd_bo_hdl = (u32)args->cmd_handles;
|
||||
arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
|
||||
if (!arg_bo_hdls)
|
||||
return -ENOMEM;
|
||||
ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
|
||||
args->arg_count * sizeof(u32));
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto free_cmd_bo_hdls;
|
||||
}
|
||||
|
||||
ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
|
||||
args->arg_count, args->hwctx, &args->seq);
|
||||
if (ret)
|
||||
XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
|
||||
|
||||
free_cmd_bo_hdls:
|
||||
kfree(arg_bo_hdls);
|
||||
if (!ret)
|
||||
XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_drm_exec_cmd *args = data;
|
||||
|
||||
if (args->ext || args->ext_flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (args->type) {
|
||||
case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
|
||||
return amdxdna_drm_submit_execbuf(client, args);
|
||||
}
|
||||
|
||||
XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
|
||||
return -EINVAL;
|
||||
}
|
162
drivers/accel/amdxdna/amdxdna_ctx.h
Normal file
162
drivers/accel/amdxdna/amdxdna_ctx.h
Normal file
@ -0,0 +1,162 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AMDXDNA_CTX_H_
|
||||
#define _AMDXDNA_CTX_H_
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
|
||||
struct amdxdna_hwctx_priv;
|
||||
|
||||
enum ert_cmd_opcode {
|
||||
ERT_START_CU = 0,
|
||||
ERT_CMD_CHAIN = 19,
|
||||
ERT_START_NPU = 20,
|
||||
};
|
||||
|
||||
enum ert_cmd_state {
|
||||
ERT_CMD_STATE_INVALID,
|
||||
ERT_CMD_STATE_NEW,
|
||||
ERT_CMD_STATE_QUEUED,
|
||||
ERT_CMD_STATE_RUNNING,
|
||||
ERT_CMD_STATE_COMPLETED,
|
||||
ERT_CMD_STATE_ERROR,
|
||||
ERT_CMD_STATE_ABORT,
|
||||
ERT_CMD_STATE_SUBMITTED,
|
||||
ERT_CMD_STATE_TIMEOUT,
|
||||
ERT_CMD_STATE_NORESPONSE,
|
||||
};
|
||||
|
||||
/*
|
||||
* Interpretation of the beginning of data payload for ERT_START_NPU in
|
||||
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
|
||||
*/
|
||||
struct amdxdna_cmd_start_npu {
|
||||
u64 buffer; /* instruction buffer address */
|
||||
u32 buffer_size; /* size of buffer in bytes */
|
||||
u32 prop_count; /* properties count */
|
||||
u32 prop_args[]; /* properties and regular kernel arguments */
|
||||
};
|
||||
|
||||
/*
|
||||
* Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
|
||||
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
|
||||
*/
|
||||
struct amdxdna_cmd_chain {
|
||||
u32 command_count;
|
||||
u32 submit_index;
|
||||
u32 error_index;
|
||||
u32 reserved[3];
|
||||
u64 data[] __counted_by(command_count);
|
||||
};
|
||||
|
||||
/* Exec buffer command header format */
|
||||
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
|
||||
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
|
||||
#define AMDXDNA_CMD_COUNT GENMASK(22, 12)
|
||||
#define AMDXDNA_CMD_OPCODE GENMASK(27, 23)
|
||||
struct amdxdna_cmd {
|
||||
u32 header;
|
||||
u32 data[];
|
||||
};
|
||||
|
||||
struct amdxdna_hwctx {
|
||||
struct amdxdna_client *client;
|
||||
struct amdxdna_hwctx_priv *priv;
|
||||
char *name;
|
||||
|
||||
u32 id;
|
||||
u32 max_opc;
|
||||
u32 num_tiles;
|
||||
u32 mem_size;
|
||||
u32 fw_ctx_id;
|
||||
u32 col_list_len;
|
||||
u32 *col_list;
|
||||
u32 start_col;
|
||||
u32 num_col;
|
||||
#define HWCTX_STAT_INIT 0
|
||||
#define HWCTX_STAT_READY 1
|
||||
#define HWCTX_STAT_STOP 2
|
||||
u32 status;
|
||||
u32 old_status;
|
||||
|
||||
struct amdxdna_qos_info qos;
|
||||
struct amdxdna_hwctx_param_config_cu *cus;
|
||||
u32 syncobj_hdl;
|
||||
};
|
||||
|
||||
#define drm_job_to_xdna_job(j) \
|
||||
container_of(j, struct amdxdna_sched_job, base)
|
||||
|
||||
struct amdxdna_sched_job {
|
||||
struct drm_sched_job base;
|
||||
struct kref refcnt;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
struct mm_struct *mm;
|
||||
/* The fence to notice DRM scheduler that job is done by hardware */
|
||||
struct dma_fence *fence;
|
||||
/* user can wait on this fence */
|
||||
struct dma_fence *out_fence;
|
||||
bool job_done;
|
||||
u64 seq;
|
||||
struct amdxdna_gem_obj *cmd_bo;
|
||||
size_t bo_cnt;
|
||||
struct drm_gem_object *bos[] __counted_by(bo_cnt);
|
||||
};
|
||||
|
||||
static inline u32
|
||||
amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
|
||||
}
|
||||
|
||||
static inline void
|
||||
amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
cmd->header &= ~AMDXDNA_CMD_STATE;
|
||||
cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
|
||||
}
|
||||
|
||||
static inline enum ert_cmd_state
|
||||
amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
|
||||
}
|
||||
|
||||
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
|
||||
int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
|
||||
|
||||
static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
return GENMASK(hwctx->start_col + hwctx->num_col - 1,
|
||||
hwctx->start_col);
|
||||
}
|
||||
|
||||
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
|
||||
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
|
||||
void amdxdna_hwctx_suspend(struct amdxdna_client *client);
|
||||
void amdxdna_hwctx_resume(struct amdxdna_client *client);
|
||||
|
||||
int amdxdna_cmd_submit(struct amdxdna_client *client,
|
||||
u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
|
||||
u32 hwctx_hdl, u64 *seq);
|
||||
|
||||
int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
|
||||
u64 seq, u32 timeout);
|
||||
|
||||
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
|
||||
#endif /* _AMDXDNA_CTX_H_ */
|
622
drivers/accel/amdxdna/amdxdna_gem.c
Normal file
622
drivers/accel/amdxdna/amdxdna_gem.c
Normal file
@ -0,0 +1,622 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iosys-map.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define XDNA_MAX_CMD_BO_SIZE SZ_32K
|
||||
|
||||
static int
|
||||
amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap)
|
||||
{
|
||||
struct amdxdna_client *client = abo->client;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_mem *mem = &abo->mem;
|
||||
u64 offset;
|
||||
u32 align;
|
||||
int ret;
|
||||
|
||||
align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
|
||||
ret = drm_mm_insert_node_generic(&abo->dev_heap->mm, &abo->mm_node,
|
||||
mem->size, align,
|
||||
0, DRM_MM_INSERT_BEST);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
mem->dev_addr = abo->mm_node.start;
|
||||
offset = mem->dev_addr - abo->dev_heap->mem.dev_addr;
|
||||
mem->userptr = abo->dev_heap->mem.userptr + offset;
|
||||
mem->pages = &abo->dev_heap->base.pages[offset >> PAGE_SHIFT];
|
||||
mem->nr_pages = mem->size >> PAGE_SHIFT;
|
||||
|
||||
if (use_vmap) {
|
||||
mem->kva = vmap(mem->pages, mem->nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (!mem->kva) {
|
||||
XDNA_ERR(xdna, "Failed to vmap");
|
||||
drm_mm_remove_node(&abo->mm_node);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
|
||||
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
|
||||
struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva);
|
||||
|
||||
XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr);
|
||||
if (abo->pinned)
|
||||
amdxdna_gem_unpin(abo);
|
||||
|
||||
if (abo->type == AMDXDNA_BO_DEV) {
|
||||
mutex_lock(&abo->client->mm_lock);
|
||||
drm_mm_remove_node(&abo->mm_node);
|
||||
mutex_unlock(&abo->client->mm_lock);
|
||||
|
||||
vunmap(abo->mem.kva);
|
||||
drm_gem_object_put(to_gobj(abo->dev_heap));
|
||||
drm_gem_object_release(gobj);
|
||||
mutex_destroy(&abo->lock);
|
||||
kfree(abo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (abo->type == AMDXDNA_BO_DEV_HEAP)
|
||||
drm_mm_takedown(&abo->mm);
|
||||
|
||||
drm_gem_vunmap_unlocked(gobj, &map);
|
||||
mutex_destroy(&abo->lock);
|
||||
drm_gem_shmem_free(&abo->base);
|
||||
}
|
||||
|
||||
static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
|
||||
.free = amdxdna_gem_obj_free,
|
||||
};
|
||||
|
||||
static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
|
||||
const struct mmu_notifier_range *range,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj,
|
||||
mem.notifier);
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
|
||||
XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d",
|
||||
abo->mem.userptr, abo->mem.size, abo->type);
|
||||
|
||||
if (!mmu_notifier_range_blockable(range))
|
||||
return false;
|
||||
|
||||
xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = {
|
||||
.invalidate = amdxdna_hmm_invalidate,
|
||||
};
|
||||
|
||||
static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
|
||||
if (!xdna->dev_info->ops->hmm_invalidate)
|
||||
return;
|
||||
|
||||
mmu_interval_notifier_remove(&abo->mem.notifier);
|
||||
kvfree(abo->mem.pfns);
|
||||
abo->mem.pfns = NULL;
|
||||
}
|
||||
|
||||
static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr,
|
||||
size_t len)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
u32 nr_pages;
|
||||
int ret;
|
||||
|
||||
if (!xdna->dev_info->ops->hmm_invalidate)
|
||||
return 0;
|
||||
|
||||
if (abo->mem.pfns)
|
||||
return -EEXIST;
|
||||
|
||||
nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
|
||||
abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns),
|
||||
GFP_KERNEL);
|
||||
if (!abo->mem.pfns)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier,
|
||||
current->mm,
|
||||
addr,
|
||||
len,
|
||||
&amdxdna_hmm_ops);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret);
|
||||
kvfree(abo->mem.pfns);
|
||||
}
|
||||
abo->mem.userptr = addr;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
|
||||
unsigned long num_pages;
|
||||
int ret;
|
||||
|
||||
ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = drm_gem_shmem_mmap(&abo->base, vma);
|
||||
if (ret)
|
||||
goto hmm_unreg;
|
||||
|
||||
num_pages = gobj->size >> PAGE_SHIFT;
|
||||
/* Try to insert the pages */
|
||||
vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
|
||||
ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages);
|
||||
if (ret)
|
||||
XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret);
|
||||
|
||||
return 0;
|
||||
|
||||
hmm_unreg:
|
||||
amdxdna_hmm_unregister(abo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
return drm_gem_shmem_vm_ops.fault(vmf);
|
||||
}
|
||||
|
||||
static void amdxdna_gem_vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
drm_gem_shmem_vm_ops.open(vma);
|
||||
}
|
||||
|
||||
static void amdxdna_gem_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct drm_gem_object *gobj = vma->vm_private_data;
|
||||
|
||||
amdxdna_hmm_unregister(to_xdna_obj(gobj));
|
||||
drm_gem_shmem_vm_ops.close(vma);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct amdxdna_gem_vm_ops = {
|
||||
.fault = amdxdna_gem_vm_fault,
|
||||
.open = amdxdna_gem_vm_open,
|
||||
.close = amdxdna_gem_vm_close,
|
||||
};
|
||||
|
||||
static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
|
||||
.free = amdxdna_gem_obj_free,
|
||||
.print_info = drm_gem_shmem_object_print_info,
|
||||
.pin = drm_gem_shmem_object_pin,
|
||||
.unpin = drm_gem_shmem_object_unpin,
|
||||
.get_sg_table = drm_gem_shmem_object_get_sg_table,
|
||||
.vmap = drm_gem_shmem_object_vmap,
|
||||
.vunmap = drm_gem_shmem_object_vunmap,
|
||||
.mmap = amdxdna_gem_obj_mmap,
|
||||
.vm_ops = &amdxdna_gem_vm_ops,
|
||||
};
|
||||
|
||||
static struct amdxdna_gem_obj *
|
||||
amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
|
||||
{
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
abo = kzalloc(sizeof(*abo), GFP_KERNEL);
|
||||
if (!abo)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
abo->pinned = false;
|
||||
abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
|
||||
mutex_init(&abo->lock);
|
||||
|
||||
abo->mem.userptr = AMDXDNA_INVALID_ADDR;
|
||||
abo->mem.dev_addr = AMDXDNA_INVALID_ADDR;
|
||||
abo->mem.size = size;
|
||||
|
||||
return abo;
|
||||
}
|
||||
|
||||
/* For drm_driver->gem_create_object callback */
|
||||
struct drm_gem_object *
|
||||
amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
|
||||
{
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
abo = amdxdna_gem_create_obj(dev, size);
|
||||
if (IS_ERR(abo))
|
||||
return ERR_CAST(abo);
|
||||
|
||||
to_gobj(abo)->funcs = &amdxdna_gem_shmem_funcs;
|
||||
|
||||
return to_gobj(abo);
|
||||
}
|
||||
|
||||
static struct amdxdna_gem_obj *
|
||||
amdxdna_drm_alloc_shmem(struct drm_device *dev,
|
||||
struct amdxdna_drm_create_bo *args,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct drm_gem_shmem_object *shmem;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
shmem = drm_gem_shmem_create(dev, args->size);
|
||||
if (IS_ERR(shmem))
|
||||
return ERR_CAST(shmem);
|
||||
|
||||
shmem->map_wc = false;
|
||||
|
||||
abo = to_xdna_obj(&shmem->base);
|
||||
abo->client = client;
|
||||
abo->type = AMDXDNA_BO_SHMEM;
|
||||
|
||||
return abo;
|
||||
}
|
||||
|
||||
static struct amdxdna_gem_obj *
|
||||
amdxdna_drm_create_dev_heap(struct drm_device *dev,
|
||||
struct amdxdna_drm_create_bo *args,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct drm_gem_shmem_object *shmem;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
int ret;
|
||||
|
||||
if (args->size > xdna->dev_info->dev_mem_size) {
|
||||
XDNA_DBG(xdna, "Invalid dev heap size 0x%llx, limit 0x%lx",
|
||||
args->size, xdna->dev_info->dev_mem_size);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mutex_lock(&client->mm_lock);
|
||||
if (client->dev_heap) {
|
||||
XDNA_DBG(client->xdna, "dev heap is already created");
|
||||
ret = -EBUSY;
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
shmem = drm_gem_shmem_create(dev, args->size);
|
||||
if (IS_ERR(shmem)) {
|
||||
ret = PTR_ERR(shmem);
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
shmem->map_wc = false;
|
||||
abo = to_xdna_obj(&shmem->base);
|
||||
|
||||
abo->type = AMDXDNA_BO_DEV_HEAP;
|
||||
abo->client = client;
|
||||
abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
|
||||
drm_mm_init(&abo->mm, abo->mem.dev_addr, abo->mem.size);
|
||||
|
||||
client->dev_heap = abo;
|
||||
drm_gem_object_get(to_gobj(abo));
|
||||
mutex_unlock(&client->mm_lock);
|
||||
|
||||
return abo;
|
||||
|
||||
mm_unlock:
|
||||
mutex_unlock(&client->mm_lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct amdxdna_gem_obj *
|
||||
amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
|
||||
struct amdxdna_drm_create_bo *args,
|
||||
struct drm_file *filp, bool use_vmap)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
size_t aligned_sz = PAGE_ALIGN(args->size);
|
||||
struct amdxdna_gem_obj *abo, *heap;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&client->mm_lock);
|
||||
heap = client->dev_heap;
|
||||
if (!heap) {
|
||||
ret = -EINVAL;
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
if (heap->mem.userptr == AMDXDNA_INVALID_ADDR) {
|
||||
XDNA_ERR(xdna, "Invalid dev heap userptr");
|
||||
ret = -EINVAL;
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
if (args->size > heap->mem.size) {
|
||||
XDNA_ERR(xdna, "Invalid dev bo size 0x%llx, limit 0x%lx",
|
||||
args->size, heap->mem.size);
|
||||
ret = -EINVAL;
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
abo = amdxdna_gem_create_obj(&xdna->ddev, aligned_sz);
|
||||
if (IS_ERR(abo)) {
|
||||
ret = PTR_ERR(abo);
|
||||
goto mm_unlock;
|
||||
}
|
||||
to_gobj(abo)->funcs = &amdxdna_gem_dev_obj_funcs;
|
||||
abo->type = AMDXDNA_BO_DEV;
|
||||
abo->client = client;
|
||||
abo->dev_heap = heap;
|
||||
ret = amdxdna_gem_insert_node_locked(abo, use_vmap);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
|
||||
goto mm_unlock;
|
||||
}
|
||||
|
||||
drm_gem_object_get(to_gobj(heap));
|
||||
drm_gem_private_object_init(&xdna->ddev, to_gobj(abo), aligned_sz);
|
||||
|
||||
mutex_unlock(&client->mm_lock);
|
||||
return abo;
|
||||
|
||||
mm_unlock:
|
||||
mutex_unlock(&client->mm_lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct amdxdna_gem_obj *
|
||||
amdxdna_drm_create_cmd_bo(struct drm_device *dev,
|
||||
struct amdxdna_drm_create_bo *args,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct drm_gem_shmem_object *shmem;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct iosys_map map;
|
||||
int ret;
|
||||
|
||||
if (args->size > XDNA_MAX_CMD_BO_SIZE) {
|
||||
XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (args->size < sizeof(struct amdxdna_cmd)) {
|
||||
XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
shmem = drm_gem_shmem_create(dev, args->size);
|
||||
if (IS_ERR(shmem))
|
||||
return ERR_CAST(shmem);
|
||||
|
||||
shmem->map_wc = false;
|
||||
abo = to_xdna_obj(&shmem->base);
|
||||
|
||||
abo->type = AMDXDNA_BO_CMD;
|
||||
abo->client = filp->driver_priv;
|
||||
|
||||
ret = drm_gem_vmap_unlocked(to_gobj(abo), &map);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret);
|
||||
goto release_obj;
|
||||
}
|
||||
abo->mem.kva = map.vaddr;
|
||||
|
||||
return abo;
|
||||
|
||||
release_obj:
|
||||
drm_gem_shmem_free(shmem);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_drm_create_bo *args = data;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
int ret;
|
||||
|
||||
if (args->flags || args->vaddr || !args->size)
|
||||
return -EINVAL;
|
||||
|
||||
XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx",
|
||||
args->type, args->vaddr, args->size, args->flags);
|
||||
switch (args->type) {
|
||||
case AMDXDNA_BO_SHMEM:
|
||||
abo = amdxdna_drm_alloc_shmem(dev, args, filp);
|
||||
break;
|
||||
case AMDXDNA_BO_DEV_HEAP:
|
||||
abo = amdxdna_drm_create_dev_heap(dev, args, filp);
|
||||
break;
|
||||
case AMDXDNA_BO_DEV:
|
||||
abo = amdxdna_drm_alloc_dev_bo(dev, args, filp, false);
|
||||
break;
|
||||
case AMDXDNA_BO_CMD:
|
||||
abo = amdxdna_drm_create_cmd_bo(dev, args, filp);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
if (IS_ERR(abo))
|
||||
return PTR_ERR(abo);
|
||||
|
||||
/* ready to publish object to userspace */
|
||||
ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create handle failed");
|
||||
goto put_obj;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "BO hdl %d type %d userptr 0x%llx xdna_addr 0x%llx size 0x%lx",
|
||||
args->handle, args->type, abo->mem.userptr,
|
||||
abo->mem.dev_addr, abo->mem.size);
|
||||
put_obj:
|
||||
/* Dereference object reference. Handle holds it now. */
|
||||
drm_gem_object_put(to_gobj(abo));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
int ret;
|
||||
|
||||
switch (abo->type) {
|
||||
case AMDXDNA_BO_SHMEM:
|
||||
case AMDXDNA_BO_DEV_HEAP:
|
||||
ret = drm_gem_shmem_pin(&abo->base);
|
||||
break;
|
||||
case AMDXDNA_BO_DEV:
|
||||
ret = drm_gem_shmem_pin(&abo->dev_heap->base);
|
||||
break;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "BO type %d ret %d", abo->type, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (abo->type == AMDXDNA_BO_DEV)
|
||||
abo = abo->dev_heap;
|
||||
|
||||
mutex_lock(&abo->lock);
|
||||
ret = amdxdna_gem_pin_nolock(abo);
|
||||
mutex_unlock(&abo->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
if (abo->type == AMDXDNA_BO_DEV)
|
||||
abo = abo->dev_heap;
|
||||
|
||||
mutex_lock(&abo->lock);
|
||||
drm_gem_shmem_unpin(&abo->base);
|
||||
mutex_unlock(&abo->lock);
|
||||
}
|
||||
|
||||
struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
|
||||
u32 bo_hdl, u8 bo_type)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct drm_gem_object *gobj;
|
||||
|
||||
gobj = drm_gem_object_lookup(client->filp, bo_hdl);
|
||||
if (!gobj) {
|
||||
XDNA_DBG(xdna, "Can not find bo %d", bo_hdl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
abo = to_xdna_obj(gobj);
|
||||
if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type)
|
||||
return abo;
|
||||
|
||||
drm_gem_object_put(gobj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_drm_get_bo_info *args = data;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct drm_gem_object *gobj;
|
||||
int ret = 0;
|
||||
|
||||
if (args->ext || args->ext_flags || args->pad)
|
||||
return -EINVAL;
|
||||
|
||||
gobj = drm_gem_object_lookup(filp, args->handle);
|
||||
if (!gobj) {
|
||||
XDNA_DBG(xdna, "Lookup GEM object %d failed", args->handle);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
abo = to_xdna_obj(gobj);
|
||||
args->vaddr = abo->mem.userptr;
|
||||
args->xdna_addr = abo->mem.dev_addr;
|
||||
|
||||
if (abo->type != AMDXDNA_BO_DEV)
|
||||
args->map_offset = drm_vma_node_offset_addr(&gobj->vma_node);
|
||||
else
|
||||
args->map_offset = AMDXDNA_INVALID_ADDR;
|
||||
|
||||
XDNA_DBG(xdna, "BO hdl %d map_offset 0x%llx vaddr 0x%llx xdna_addr 0x%llx",
|
||||
args->handle, args->map_offset, args->vaddr, args->xdna_addr);
|
||||
|
||||
drm_gem_object_put(gobj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The sync bo ioctl is to make sure the CPU cache is in sync with memory.
|
||||
* This is required because NPU is not cache coherent device. CPU cache
|
||||
* flushing/invalidation is expensive so it is best to handle this outside
|
||||
* of the command submission path. This ioctl allows explicit cache
|
||||
* flushing/invalidation outside of the critical path.
|
||||
*/
|
||||
int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_drm_sync_bo *args = data;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct drm_gem_object *gobj;
|
||||
int ret;
|
||||
|
||||
gobj = drm_gem_object_lookup(filp, args->handle);
|
||||
if (!gobj) {
|
||||
XDNA_ERR(xdna, "Lookup GEM object failed");
|
||||
return -ENOENT;
|
||||
}
|
||||
abo = to_xdna_obj(gobj);
|
||||
|
||||
ret = amdxdna_gem_pin(abo);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret);
|
||||
goto put_obj;
|
||||
}
|
||||
|
||||
if (abo->type == AMDXDNA_BO_DEV)
|
||||
drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages);
|
||||
else
|
||||
drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
|
||||
|
||||
amdxdna_gem_unpin(abo);
|
||||
|
||||
XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
|
||||
args->handle, args->offset, args->size);
|
||||
|
||||
put_obj:
|
||||
drm_gem_object_put(gobj);
|
||||
return ret;
|
||||
}
|
65
drivers/accel/amdxdna/amdxdna_gem.h
Normal file
65
drivers/accel/amdxdna/amdxdna_gem.h
Normal file
@ -0,0 +1,65 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AMDXDNA_GEM_H_
|
||||
#define _AMDXDNA_GEM_H_
|
||||
|
||||
struct amdxdna_mem {
|
||||
u64 userptr;
|
||||
void *kva;
|
||||
u64 dev_addr;
|
||||
size_t size;
|
||||
struct page **pages;
|
||||
u32 nr_pages;
|
||||
struct mmu_interval_notifier notifier;
|
||||
unsigned long *pfns;
|
||||
bool map_invalid;
|
||||
};
|
||||
|
||||
struct amdxdna_gem_obj {
|
||||
struct drm_gem_shmem_object base;
|
||||
struct amdxdna_client *client;
|
||||
u8 type;
|
||||
bool pinned;
|
||||
struct mutex lock; /* Protects: pinned */
|
||||
struct amdxdna_mem mem;
|
||||
|
||||
/* Below members is uninitialized when needed */
|
||||
struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
|
||||
struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */
|
||||
struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */
|
||||
u32 assigned_hwctx;
|
||||
};
|
||||
|
||||
#define to_gobj(obj) (&(obj)->base.base)
|
||||
|
||||
static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj)
|
||||
{
|
||||
return container_of(gobj, struct amdxdna_gem_obj, base.base);
|
||||
}
|
||||
|
||||
struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client,
|
||||
u32 bo_hdl, u8 bo_type);
|
||||
static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
drm_gem_object_put(to_gobj(abo));
|
||||
}
|
||||
|
||||
struct drm_gem_object *
|
||||
amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size);
|
||||
struct amdxdna_gem_obj *
|
||||
amdxdna_drm_alloc_dev_bo(struct drm_device *dev,
|
||||
struct amdxdna_drm_create_bo *args,
|
||||
struct drm_file *filp, bool use_vmap);
|
||||
|
||||
int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo);
|
||||
int amdxdna_gem_pin(struct amdxdna_gem_obj *abo);
|
||||
void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
|
||||
|
||||
int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
|
||||
#endif /* _AMDXDNA_GEM_H_ */
|
561
drivers/accel/amdxdna/amdxdna_mailbox.c
Normal file
561
drivers/accel/amdxdna/amdxdna_mailbox.c
Normal file
@ -0,0 +1,561 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "amdxdna_mailbox.h"
|
||||
|
||||
#define MB_ERR(chann, fmt, args...) \
|
||||
({ \
|
||||
typeof(chann) _chann = chann; \
|
||||
dev_err((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
|
||||
(_chann)->msix_irq, ##args); \
|
||||
})
|
||||
#define MB_DBG(chann, fmt, args...) \
|
||||
({ \
|
||||
typeof(chann) _chann = chann; \
|
||||
dev_dbg((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
|
||||
(_chann)->msix_irq, ##args); \
|
||||
})
|
||||
#define MB_WARN_ONCE(chann, fmt, args...) \
|
||||
({ \
|
||||
typeof(chann) _chann = chann; \
|
||||
dev_warn_once((_chann)->mb->dev, "xdna_mailbox.%d: "fmt, \
|
||||
(_chann)->msix_irq, ##args); \
|
||||
})
|
||||
|
||||
#define MAGIC_VAL 0x1D000000U
|
||||
#define MAGIC_VAL_MASK 0xFF000000
|
||||
#define MAX_MSG_ID_ENTRIES 256
|
||||
#define MSG_RX_TIMER 200 /* milliseconds */
|
||||
#define MAILBOX_NAME "xdna_mailbox"
|
||||
|
||||
enum channel_res_type {
|
||||
CHAN_RES_X2I,
|
||||
CHAN_RES_I2X,
|
||||
CHAN_RES_NUM
|
||||
};
|
||||
|
||||
struct mailbox {
|
||||
struct device *dev;
|
||||
struct xdna_mailbox_res res;
|
||||
};
|
||||
|
||||
struct mailbox_channel {
|
||||
struct mailbox *mb;
|
||||
struct xdna_mailbox_chann_res res[CHAN_RES_NUM];
|
||||
int msix_irq;
|
||||
u32 iohub_int_addr;
|
||||
struct xarray chan_xa;
|
||||
u32 next_msgid;
|
||||
u32 x2i_tail;
|
||||
|
||||
/* Received msg related fields */
|
||||
struct workqueue_struct *work_q;
|
||||
struct work_struct rx_work;
|
||||
u32 i2x_head;
|
||||
bool bad_state;
|
||||
};
|
||||
|
||||
#define MSG_BODY_SZ GENMASK(10, 0)
|
||||
#define MSG_PROTO_VER GENMASK(23, 16)
|
||||
struct xdna_msg_header {
|
||||
__u32 total_size;
|
||||
__u32 sz_ver;
|
||||
__u32 id;
|
||||
__u32 opcode;
|
||||
} __packed;
|
||||
|
||||
static_assert(sizeof(struct xdna_msg_header) == 16);
|
||||
|
||||
struct mailbox_pkg {
|
||||
struct xdna_msg_header header;
|
||||
__u32 payload[];
|
||||
};
|
||||
|
||||
/* The protocol version. */
|
||||
#define MSG_PROTOCOL_VERSION 0x1
|
||||
/* The tombstone value. */
|
||||
#define TOMBSTONE 0xDEADFACE
|
||||
|
||||
struct mailbox_msg {
|
||||
void *handle;
|
||||
int (*notify_cb)(void *handle, const u32 *data, size_t size);
|
||||
size_t pkg_size; /* package size in bytes */
|
||||
struct mailbox_pkg pkg;
|
||||
};
|
||||
|
||||
static void mailbox_reg_write(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 data)
|
||||
{
|
||||
struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
|
||||
void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
|
||||
|
||||
writel(data, ringbuf_addr);
|
||||
}
|
||||
|
||||
static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg)
|
||||
{
|
||||
struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
|
||||
void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
|
||||
|
||||
return readl(ringbuf_addr);
|
||||
}
|
||||
|
||||
static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val)
|
||||
{
|
||||
struct xdna_mailbox_res *mb_res = &mb_chann->mb->res;
|
||||
void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg;
|
||||
int ret, value;
|
||||
|
||||
/* Poll till value is not zero */
|
||||
ret = readx_poll_timeout(readl, ringbuf_addr, value,
|
||||
value, 1 /* us */, 100);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
*val = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val)
|
||||
{
|
||||
mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_head_ptr_reg, headptr_val);
|
||||
mb_chann->i2x_head = headptr_val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mailbox_set_tailptr(struct mailbox_channel *mb_chann, u32 tailptr_val)
|
||||
{
|
||||
mailbox_reg_write(mb_chann, mb_chann->res[CHAN_RES_X2I].mb_tail_ptr_reg, tailptr_val);
|
||||
mb_chann->x2i_tail = tailptr_val;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
mailbox_get_headptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
|
||||
{
|
||||
return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_head_ptr_reg);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
mailbox_get_tailptr(struct mailbox_channel *mb_chann, enum channel_res_type type)
|
||||
{
|
||||
return mailbox_reg_read(mb_chann, mb_chann->res[type].mb_tail_ptr_reg);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
mailbox_get_ringbuf_size(struct mailbox_channel *mb_chann, enum channel_res_type type)
|
||||
{
|
||||
return mb_chann->res[type].rb_size;
|
||||
}
|
||||
|
||||
static inline int mailbox_validate_msgid(int msg_id)
|
||||
{
|
||||
return (msg_id & MAGIC_VAL_MASK) == MAGIC_VAL;
|
||||
}
|
||||
|
||||
static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
|
||||
{
|
||||
u32 msg_id;
|
||||
int ret;
|
||||
|
||||
ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
|
||||
XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
|
||||
&mb_chann->next_msgid, GFP_NOWAIT);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Add MAGIC_VAL to the higher bits.
|
||||
*/
|
||||
msg_id |= MAGIC_VAL;
|
||||
return msg_id;
|
||||
}
|
||||
|
||||
static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
|
||||
{
|
||||
msg_id &= ~MAGIC_VAL_MASK;
|
||||
xa_erase_irq(&mb_chann->chan_xa, msg_id);
|
||||
}
|
||||
|
||||
static void mailbox_release_msg(struct mailbox_channel *mb_chann,
|
||||
struct mailbox_msg *mb_msg)
|
||||
{
|
||||
MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
|
||||
mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
|
||||
mb_msg->notify_cb(mb_msg->handle, NULL, 0);
|
||||
kfree(mb_msg);
|
||||
}
|
||||
|
||||
static int
|
||||
mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
|
||||
{
|
||||
void __iomem *write_addr;
|
||||
u32 ringbuf_size;
|
||||
u32 head, tail;
|
||||
u32 start_addr;
|
||||
u32 tmp_tail;
|
||||
|
||||
head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I);
|
||||
tail = mb_chann->x2i_tail;
|
||||
ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I);
|
||||
start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr;
|
||||
tmp_tail = tail + mb_msg->pkg_size;
|
||||
|
||||
if (tail < head && tmp_tail >= head)
|
||||
goto no_space;
|
||||
|
||||
if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) &&
|
||||
mb_msg->pkg_size >= head))
|
||||
goto no_space;
|
||||
|
||||
if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) {
|
||||
write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
|
||||
writel(TOMBSTONE, write_addr);
|
||||
|
||||
/* tombstone is set. Write from the start of the ringbuf */
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
|
||||
memcpy_toio(write_addr, &mb_msg->pkg, mb_msg->pkg_size);
|
||||
mailbox_set_tailptr(mb_chann, tail + mb_msg->pkg_size);
|
||||
|
||||
trace_mbox_set_tail(MAILBOX_NAME, mb_chann->msix_irq,
|
||||
mb_msg->pkg.header.opcode,
|
||||
mb_msg->pkg.header.id);
|
||||
|
||||
return 0;
|
||||
|
||||
no_space:
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static int
|
||||
mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *header,
|
||||
void *data)
|
||||
{
|
||||
struct mailbox_msg *mb_msg;
|
||||
int msg_id;
|
||||
int ret;
|
||||
|
||||
msg_id = header->id;
|
||||
if (!mailbox_validate_msgid(msg_id)) {
|
||||
MB_ERR(mb_chann, "Bad message ID 0x%x", msg_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
msg_id &= ~MAGIC_VAL_MASK;
|
||||
mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
|
||||
if (!mb_msg) {
|
||||
MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
|
||||
header->opcode, header->total_size, header->id);
|
||||
ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
|
||||
if (unlikely(ret))
|
||||
MB_ERR(mb_chann, "Message callback ret %d", ret);
|
||||
|
||||
kfree(mb_msg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mailbox_get_msg(struct mailbox_channel *mb_chann)
|
||||
{
|
||||
struct xdna_msg_header header;
|
||||
void __iomem *read_addr;
|
||||
u32 msg_size, rest;
|
||||
u32 ringbuf_size;
|
||||
u32 head, tail;
|
||||
u32 start_addr;
|
||||
int ret;
|
||||
|
||||
if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail))
|
||||
return -EINVAL;
|
||||
head = mb_chann->i2x_head;
|
||||
ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X);
|
||||
start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr;
|
||||
|
||||
if (unlikely(tail > ringbuf_size || !IS_ALIGNED(tail, 4))) {
|
||||
MB_WARN_ONCE(mb_chann, "Invalid tail 0x%x", tail);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* ringbuf empty */
|
||||
if (head == tail)
|
||||
return -ENOENT;
|
||||
|
||||
if (head == ringbuf_size)
|
||||
head = 0;
|
||||
|
||||
/* Peek size of the message or TOMBSTONE */
|
||||
read_addr = mb_chann->mb->res.ringbuf_base + start_addr + head;
|
||||
header.total_size = readl(read_addr);
|
||||
/* size is TOMBSTONE, set next read from 0 */
|
||||
if (header.total_size == TOMBSTONE) {
|
||||
if (head < tail) {
|
||||
MB_WARN_ONCE(mb_chann, "Tombstone, head 0x%x tail 0x%x",
|
||||
head, tail);
|
||||
return -EINVAL;
|
||||
}
|
||||
mailbox_set_headptr(mb_chann, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!header.total_size || !IS_ALIGNED(header.total_size, 4))) {
|
||||
MB_WARN_ONCE(mb_chann, "Invalid total size 0x%x", header.total_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
msg_size = sizeof(header) + header.total_size;
|
||||
|
||||
if (msg_size > ringbuf_size - head || msg_size > tail - head) {
|
||||
MB_WARN_ONCE(mb_chann, "Invalid message size %d, tail %d, head %d",
|
||||
msg_size, tail, head);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rest = sizeof(header) - sizeof(u32);
|
||||
read_addr += sizeof(u32);
|
||||
memcpy_fromio((u32 *)&header + 1, read_addr, rest);
|
||||
read_addr += rest;
|
||||
|
||||
ret = mailbox_get_resp(mb_chann, &header, (u32 *)read_addr);
|
||||
|
||||
mailbox_set_headptr(mb_chann, head + msg_size);
|
||||
/* After update head, it can equal to ringbuf_size. This is expected. */
|
||||
trace_mbox_set_head(MAILBOX_NAME, mb_chann->msix_irq,
|
||||
header.opcode, header.id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static irqreturn_t mailbox_irq_handler(int irq, void *p)
|
||||
{
|
||||
struct mailbox_channel *mb_chann = p;
|
||||
|
||||
trace_mbox_irq_handle(MAILBOX_NAME, irq);
|
||||
/* Schedule a rx_work to call the callback functions */
|
||||
queue_work(mb_chann->work_q, &mb_chann->rx_work);
|
||||
/* Clear IOHUB register */
|
||||
mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void mailbox_rx_worker(struct work_struct *rx_work)
|
||||
{
|
||||
struct mailbox_channel *mb_chann;
|
||||
int ret;
|
||||
|
||||
mb_chann = container_of(rx_work, struct mailbox_channel, rx_work);
|
||||
|
||||
if (READ_ONCE(mb_chann->bad_state)) {
|
||||
MB_ERR(mb_chann, "Channel in bad state, work aborted");
|
||||
return;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* If return is 0, keep consuming next message, until there is
|
||||
* no messages or an error happened.
|
||||
*/
|
||||
ret = mailbox_get_msg(mb_chann);
|
||||
if (ret == -ENOENT)
|
||||
break;
|
||||
|
||||
/* Other error means device doesn't look good, disable irq. */
|
||||
if (unlikely(ret)) {
|
||||
MB_ERR(mb_chann, "Unexpected ret %d, disable irq", ret);
|
||||
WRITE_ONCE(mb_chann->bad_state, true);
|
||||
disable_irq(mb_chann->msix_irq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann,
|
||||
const struct xdna_mailbox_msg *msg, u64 tx_timeout)
|
||||
{
|
||||
struct xdna_msg_header *header;
|
||||
struct mailbox_msg *mb_msg;
|
||||
size_t pkg_size;
|
||||
int ret;
|
||||
|
||||
pkg_size = sizeof(*header) + msg->send_size;
|
||||
if (pkg_size > mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I)) {
|
||||
MB_ERR(mb_chann, "Message size larger than ringbuf size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (unlikely(!IS_ALIGNED(msg->send_size, 4))) {
|
||||
MB_ERR(mb_chann, "Message must be 4 bytes align");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* The fist word in payload can NOT be TOMBSTONE */
|
||||
if (unlikely(((u32 *)msg->send_data)[0] == TOMBSTONE)) {
|
||||
MB_ERR(mb_chann, "Tomb stone in data");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (READ_ONCE(mb_chann->bad_state)) {
|
||||
MB_ERR(mb_chann, "Channel in bad state");
|
||||
return -EPIPE;
|
||||
}
|
||||
|
||||
mb_msg = kzalloc(sizeof(*mb_msg) + pkg_size, GFP_KERNEL);
|
||||
if (!mb_msg)
|
||||
return -ENOMEM;
|
||||
|
||||
mb_msg->handle = msg->handle;
|
||||
mb_msg->notify_cb = msg->notify_cb;
|
||||
mb_msg->pkg_size = pkg_size;
|
||||
|
||||
header = &mb_msg->pkg.header;
|
||||
/*
|
||||
* Hardware use total_size and size to split huge message.
|
||||
* We do not support it here. Thus the values are the same.
|
||||
*/
|
||||
header->total_size = msg->send_size;
|
||||
header->sz_ver = FIELD_PREP(MSG_BODY_SZ, msg->send_size) |
|
||||
FIELD_PREP(MSG_PROTO_VER, MSG_PROTOCOL_VERSION);
|
||||
header->opcode = msg->opcode;
|
||||
memcpy(mb_msg->pkg.payload, msg->send_data, msg->send_size);
|
||||
|
||||
ret = mailbox_acquire_msgid(mb_chann, mb_msg);
|
||||
if (unlikely(ret < 0)) {
|
||||
MB_ERR(mb_chann, "mailbox_acquire_msgid failed");
|
||||
goto msg_id_failed;
|
||||
}
|
||||
header->id = ret;
|
||||
|
||||
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
|
||||
header->opcode, header->total_size, header->id);
|
||||
|
||||
ret = mailbox_send_msg(mb_chann, mb_msg);
|
||||
if (ret) {
|
||||
MB_DBG(mb_chann, "Error in mailbox send msg, ret %d", ret);
|
||||
goto release_id;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
release_id:
|
||||
mailbox_release_msgid(mb_chann, header->id);
|
||||
msg_id_failed:
|
||||
kfree(mb_msg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct mailbox_channel *
|
||||
xdna_mailbox_create_channel(struct mailbox *mb,
|
||||
const struct xdna_mailbox_chann_res *x2i,
|
||||
const struct xdna_mailbox_chann_res *i2x,
|
||||
u32 iohub_int_addr,
|
||||
int mb_irq)
|
||||
{
|
||||
struct mailbox_channel *mb_chann;
|
||||
int ret;
|
||||
|
||||
if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
|
||||
pr_err("Ring buf size must be power of 2");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mb_chann = kzalloc(sizeof(*mb_chann), GFP_KERNEL);
|
||||
if (!mb_chann)
|
||||
return NULL;
|
||||
|
||||
mb_chann->mb = mb;
|
||||
mb_chann->msix_irq = mb_irq;
|
||||
mb_chann->iohub_int_addr = iohub_int_addr;
|
||||
memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
|
||||
memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
|
||||
|
||||
xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
|
||||
mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
|
||||
mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
|
||||
|
||||
INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
|
||||
mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
|
||||
if (!mb_chann->work_q) {
|
||||
MB_ERR(mb_chann, "Create workqueue failed");
|
||||
goto free_and_out;
|
||||
}
|
||||
|
||||
/* Everything look good. Time to enable irq handler */
|
||||
ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann);
|
||||
if (ret) {
|
||||
MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret);
|
||||
goto destroy_wq;
|
||||
}
|
||||
|
||||
mb_chann->bad_state = false;
|
||||
|
||||
MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
|
||||
return mb_chann;
|
||||
|
||||
destroy_wq:
|
||||
destroy_workqueue(mb_chann->work_q);
|
||||
free_and_out:
|
||||
kfree(mb_chann);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
|
||||
{
|
||||
struct mailbox_msg *mb_msg;
|
||||
unsigned long msg_id;
|
||||
|
||||
MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
|
||||
free_irq(mb_chann->msix_irq, mb_chann);
|
||||
destroy_workqueue(mb_chann->work_q);
|
||||
/* We can clean up and release resources */
|
||||
|
||||
xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
|
||||
mailbox_release_msg(mb_chann, mb_msg);
|
||||
|
||||
xa_destroy(&mb_chann->chan_xa);
|
||||
|
||||
MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
|
||||
kfree(mb_chann);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
|
||||
{
|
||||
/* Disable an irq and wait. This might sleep. */
|
||||
disable_irq(mb_chann->msix_irq);
|
||||
|
||||
/* Cancel RX work and wait for it to finish */
|
||||
cancel_work_sync(&mb_chann->rx_work);
|
||||
MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
|
||||
}
|
||||
|
||||
struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
|
||||
const struct xdna_mailbox_res *res)
|
||||
{
|
||||
struct mailbox *mb;
|
||||
|
||||
mb = drmm_kzalloc(ddev, sizeof(*mb), GFP_KERNEL);
|
||||
if (!mb)
|
||||
return NULL;
|
||||
mb->dev = ddev->dev;
|
||||
|
||||
/* mailbox and ring buf base and size information */
|
||||
memcpy(&mb->res, res, sizeof(*res));
|
||||
|
||||
return mb;
|
||||
}
|
124
drivers/accel/amdxdna/amdxdna_mailbox.h
Normal file
124
drivers/accel/amdxdna/amdxdna_mailbox.h
Normal file
@ -0,0 +1,124 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_MAILBOX_H_
|
||||
#define _AIE2_MAILBOX_H_
|
||||
|
||||
struct mailbox;
|
||||
struct mailbox_channel;
|
||||
|
||||
/*
|
||||
* xdna_mailbox_msg - message struct
|
||||
*
|
||||
* @opcode: opcode for firmware
|
||||
* @handle: handle used for the notify callback
|
||||
* @notify_cb: callback function to notify the sender when there is response
|
||||
* @send_data: pointing to sending data
|
||||
* @send_size: size of the sending data
|
||||
*
|
||||
* The mailbox will split the sending data in to multiple firmware message if
|
||||
* the size of the data is too big. This is transparent to the sender. The
|
||||
* sender will receive one notification.
|
||||
*/
|
||||
struct xdna_mailbox_msg {
|
||||
u32 opcode;
|
||||
void *handle;
|
||||
int (*notify_cb)(void *handle, const u32 *data, size_t size);
|
||||
u8 *send_data;
|
||||
size_t send_size;
|
||||
};
|
||||
|
||||
/*
|
||||
* xdna_mailbox_res - mailbox hardware resource
|
||||
*
|
||||
* @ringbuf_base: ring buffer base address
|
||||
* @ringbuf_size: ring buffer size
|
||||
* @mbox_base: mailbox base address
|
||||
* @mbox_size: mailbox size
|
||||
*/
|
||||
struct xdna_mailbox_res {
|
||||
void __iomem *ringbuf_base;
|
||||
size_t ringbuf_size;
|
||||
void __iomem *mbox_base;
|
||||
size_t mbox_size;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
/*
|
||||
* xdna_mailbox_chann_res - resources
|
||||
*
|
||||
* @rb_start_addr: ring buffer start address
|
||||
* @rb_size: ring buffer size
|
||||
* @mb_head_ptr_reg: mailbox head pointer register
|
||||
* @mb_tail_ptr_reg: mailbox tail pointer register
|
||||
*/
|
||||
struct xdna_mailbox_chann_res {
|
||||
u32 rb_start_addr;
|
||||
u32 rb_size;
|
||||
u32 mb_head_ptr_reg;
|
||||
u32 mb_tail_ptr_reg;
|
||||
};
|
||||
|
||||
/*
|
||||
* xdna_mailbox_create() -- create mailbox subsystem and initialize
|
||||
*
|
||||
* @ddev: device pointer
|
||||
* @res: SRAM and mailbox resources
|
||||
*
|
||||
* Return: If success, return a handle of mailbox subsystem.
|
||||
* Otherwise, return NULL pointer.
|
||||
*/
|
||||
struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
|
||||
const struct xdna_mailbox_res *res);
|
||||
|
||||
/*
|
||||
* xdna_mailbox_create_channel() -- Create a mailbox channel instance
|
||||
*
|
||||
* @mailbox: the handle return from xdna_mailbox_create()
|
||||
* @x2i: host to firmware mailbox resources
|
||||
* @i2x: firmware to host mailbox resources
|
||||
* @xdna_mailbox_intr_reg: register addr of MSI-X interrupt
|
||||
* @mb_irq: Linux IRQ number associated with mailbox MSI-X interrupt vector index
|
||||
*
|
||||
* Return: If success, return a handle of mailbox channel. Otherwise, return NULL.
|
||||
*/
|
||||
struct mailbox_channel *
|
||||
xdna_mailbox_create_channel(struct mailbox *mailbox,
|
||||
const struct xdna_mailbox_chann_res *x2i,
|
||||
const struct xdna_mailbox_chann_res *i2x,
|
||||
u32 xdna_mailbox_intr_reg,
|
||||
int mb_irq);
|
||||
|
||||
/*
|
||||
* xdna_mailbox_destroy_channel() -- destroy mailbox channel
|
||||
*
|
||||
* @mailbox_chann: the handle return from xdna_mailbox_create_channel()
|
||||
*
|
||||
* Return: if success, return 0. otherwise return error code
|
||||
*/
|
||||
int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann);
|
||||
|
||||
/*
|
||||
* xdna_mailbox_stop_channel() -- stop mailbox channel
|
||||
*
|
||||
* @mailbox_chann: the handle return from xdna_mailbox_create_channel()
|
||||
*
|
||||
* Return: if success, return 0. otherwise return error code
|
||||
*/
|
||||
void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
|
||||
|
||||
/*
|
||||
* xdna_mailbox_send_msg() -- Send a message
|
||||
*
|
||||
* @mailbox_chann: Mailbox channel handle
|
||||
* @msg: message struct for message information
|
||||
* @tx_timeout: the timeout value for sending the message in ms.
|
||||
*
|
||||
* Return: If success return 0, otherwise, return error code
|
||||
*/
|
||||
int xdna_mailbox_send_msg(struct mailbox_channel *mailbox_chann,
|
||||
const struct xdna_mailbox_msg *msg, u64 tx_timeout);
|
||||
|
||||
#endif /* _AIE2_MAILBOX_ */
|
61
drivers/accel/amdxdna/amdxdna_mailbox_helper.c
Normal file
61
drivers/accel/amdxdna/amdxdna_mailbox_helper.c
Normal file
@ -0,0 +1,61 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/completion.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_mailbox_helper.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
int xdna_msg_cb(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct xdna_notify *cb_arg = handle;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(cb_arg->size != size)) {
|
||||
cb_arg->error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
print_hex_dump_debug("resp data: ", DUMP_PREFIX_OFFSET,
|
||||
16, 4, data, cb_arg->size, true);
|
||||
memcpy(cb_arg->data, data, cb_arg->size);
|
||||
out:
|
||||
ret = cb_arg->error;
|
||||
complete(&cb_arg->comp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
|
||||
struct xdna_mailbox_msg *msg)
|
||||
{
|
||||
struct xdna_notify *hdl = msg->handle;
|
||||
int ret;
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = wait_for_completion_timeout(&hdl->comp,
|
||||
msecs_to_jiffies(RX_TIMEOUT));
|
||||
if (!ret) {
|
||||
XDNA_ERR(xdna, "Wait for completion timeout");
|
||||
return -ETIME;
|
||||
}
|
||||
|
||||
return hdl->error;
|
||||
}
|
42
drivers/accel/amdxdna/amdxdna_mailbox_helper.h
Normal file
42
drivers/accel/amdxdna/amdxdna_mailbox_helper.h
Normal file
@ -0,0 +1,42 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AMDXDNA_MAILBOX_HELPER_H
|
||||
#define _AMDXDNA_MAILBOX_HELPER_H
|
||||
|
||||
#define TX_TIMEOUT 2000 /* milliseconds */
|
||||
#define RX_TIMEOUT 5000 /* milliseconds */
|
||||
|
||||
struct amdxdna_dev;
|
||||
|
||||
struct xdna_notify {
|
||||
struct completion comp;
|
||||
u32 *data;
|
||||
size_t size;
|
||||
int error;
|
||||
};
|
||||
|
||||
#define DECLARE_XDNA_MSG_COMMON(name, op, status) \
|
||||
struct name##_req req = { 0 }; \
|
||||
struct name##_resp resp = { status }; \
|
||||
struct xdna_notify hdl = { \
|
||||
.error = 0, \
|
||||
.data = (u32 *)&resp, \
|
||||
.size = sizeof(resp), \
|
||||
.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
|
||||
}; \
|
||||
struct xdna_mailbox_msg msg = { \
|
||||
.send_data = (u8 *)&req, \
|
||||
.send_size = sizeof(req), \
|
||||
.handle = &hdl, \
|
||||
.opcode = op, \
|
||||
.notify_cb = xdna_msg_cb, \
|
||||
}
|
||||
|
||||
int xdna_msg_cb(void *handle, const u32 *data, size_t size);
|
||||
int xdna_send_msg_wait(struct amdxdna_dev *xdna, struct mailbox_channel *chann,
|
||||
struct xdna_mailbox_msg *msg);
|
||||
|
||||
#endif /* _AMDXDNA_MAILBOX_HELPER_H */
|
429
drivers/accel/amdxdna/amdxdna_pci_drv.c
Normal file
429
drivers/accel/amdxdna/amdxdna_pci_drv.c
Normal file
@ -0,0 +1,429 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_accel.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_ioctl.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
|
||||
|
||||
/*
|
||||
* Bind the driver base on (vendor_id, device_id) pair and later use the
|
||||
* (device_id, rev_id) pair as a key to select the devices. The devices with
|
||||
* same device_id have very similar interface to host driver.
|
||||
*/
|
||||
static const struct pci_device_id pci_ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
|
||||
{0}
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(pci, pci_ids);
|
||||
|
||||
static const struct amdxdna_device_id amdxdna_ids[] = {
|
||||
{ 0x1502, 0x0, &dev_npu1_info },
|
||||
{ 0x17f0, 0x0, &dev_npu2_info },
|
||||
{ 0x17f0, 0x10, &dev_npu4_info },
|
||||
{ 0x17f0, 0x11, &dev_npu5_info },
|
||||
{ 0x17f0, 0x20, &dev_npu6_info },
|
||||
{0}
|
||||
};
|
||||
|
||||
static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(ddev);
|
||||
struct amdxdna_client *client;
|
||||
int ret;
|
||||
|
||||
ret = pm_runtime_resume_and_get(ddev->dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
client = kzalloc(sizeof(*client), GFP_KERNEL);
|
||||
if (!client) {
|
||||
ret = -ENOMEM;
|
||||
goto put_rpm;
|
||||
}
|
||||
|
||||
client->pid = pid_nr(rcu_access_pointer(filp->pid));
|
||||
client->xdna = xdna;
|
||||
|
||||
client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
|
||||
if (IS_ERR(client->sva)) {
|
||||
ret = PTR_ERR(client->sva);
|
||||
XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
|
||||
goto failed;
|
||||
}
|
||||
client->pasid = iommu_sva_get_pasid(client->sva);
|
||||
if (client->pasid == IOMMU_PASID_INVALID) {
|
||||
XDNA_ERR(xdna, "SVA get pasid failed");
|
||||
ret = -ENODEV;
|
||||
goto unbind_sva;
|
||||
}
|
||||
mutex_init(&client->hwctx_lock);
|
||||
init_srcu_struct(&client->hwctx_srcu);
|
||||
xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
|
||||
mutex_init(&client->mm_lock);
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
list_add_tail(&client->node, &xdna->client_list);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
filp->driver_priv = client;
|
||||
client->filp = filp;
|
||||
|
||||
XDNA_DBG(xdna, "pid %d opened", client->pid);
|
||||
return 0;
|
||||
|
||||
unbind_sva:
|
||||
iommu_sva_unbind_device(client->sva);
|
||||
failed:
|
||||
kfree(client);
|
||||
put_rpm:
|
||||
pm_runtime_mark_last_busy(ddev->dev);
|
||||
pm_runtime_put_autosuspend(ddev->dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(ddev);
|
||||
|
||||
XDNA_DBG(xdna, "closing pid %d", client->pid);
|
||||
|
||||
xa_destroy(&client->hwctx_xa);
|
||||
cleanup_srcu_struct(&client->hwctx_srcu);
|
||||
mutex_destroy(&client->hwctx_lock);
|
||||
mutex_destroy(&client->mm_lock);
|
||||
if (client->dev_heap)
|
||||
drm_gem_object_put(to_gobj(client->dev_heap));
|
||||
|
||||
iommu_sva_unbind_device(client->sva);
|
||||
|
||||
XDNA_DBG(xdna, "pid %d closed", client->pid);
|
||||
kfree(client);
|
||||
pm_runtime_mark_last_busy(ddev->dev);
|
||||
pm_runtime_put_autosuspend(ddev->dev);
|
||||
}
|
||||
|
||||
static int amdxdna_flush(struct file *f, fl_owner_t id)
|
||||
{
|
||||
struct drm_file *filp = f->private_data;
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
int idx;
|
||||
|
||||
XDNA_DBG(xdna, "PID %d flushing...", client->pid);
|
||||
if (!drm_dev_enter(&xdna->ddev, &idx))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
list_del_init(&client->node);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
amdxdna_hwctx_remove_all(client);
|
||||
|
||||
drm_dev_exit(idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_drm_get_info *args = data;
|
||||
int ret;
|
||||
|
||||
if (!xdna->dev_info->ops->get_aie_info)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
XDNA_DBG(xdna, "Request parameter %u", args->param);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = xdna->dev_info->ops->get_aie_info(client, args);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_drm_set_state *args = data;
|
||||
int ret;
|
||||
|
||||
if (!xdna->dev_info->ops->set_aie_state)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
XDNA_DBG(xdna, "Request parameter %u", args->param);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = xdna->dev_info->ops->set_aie_state(client, args);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
|
||||
/* Context */
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0),
|
||||
/* BO */
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
|
||||
/* Execution */
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
|
||||
/* AIE hardware */
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
|
||||
};
|
||||
|
||||
static const struct file_operations amdxdna_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = accel_open,
|
||||
.release = drm_release,
|
||||
.flush = amdxdna_flush,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
.poll = drm_poll,
|
||||
.read = drm_read,
|
||||
.llseek = noop_llseek,
|
||||
.mmap = drm_gem_mmap,
|
||||
.fop_flags = FOP_UNSIGNED_OFFSET,
|
||||
};
|
||||
|
||||
const struct drm_driver amdxdna_drm_drv = {
|
||||
.driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL |
|
||||
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
|
||||
.fops = &amdxdna_fops,
|
||||
.name = "amdxdna_accel_driver",
|
||||
.desc = "AMD XDNA DRM implementation",
|
||||
.open = amdxdna_drm_open,
|
||||
.postclose = amdxdna_drm_close,
|
||||
.ioctls = amdxdna_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
|
||||
|
||||
.gem_create_object = amdxdna_gem_create_object_cb,
|
||||
};
|
||||
|
||||
static const struct amdxdna_dev_info *
|
||||
amdxdna_get_dev_info(struct pci_dev *pdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) {
|
||||
if (pdev->device == amdxdna_ids[i].device &&
|
||||
pdev->revision == amdxdna_ids[i].revision)
|
||||
return amdxdna_ids[i].dev_info;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct amdxdna_dev *xdna;
|
||||
int ret;
|
||||
|
||||
xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
|
||||
if (IS_ERR(xdna))
|
||||
return PTR_ERR(xdna);
|
||||
|
||||
xdna->dev_info = amdxdna_get_dev_info(pdev);
|
||||
if (!xdna->dev_info)
|
||||
return -ENODEV;
|
||||
|
||||
drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
|
||||
init_rwsem(&xdna->notifier_lock);
|
||||
INIT_LIST_HEAD(&xdna->client_list);
|
||||
pci_set_drvdata(pdev, xdna);
|
||||
|
||||
if (IS_ENABLED(CONFIG_LOCKDEP)) {
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&xdna->notifier_lock);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = xdna->dev_info->ops->init(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = amdxdna_sysfs_init(xdna);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret);
|
||||
goto failed_dev_fini;
|
||||
}
|
||||
|
||||
pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
|
||||
pm_runtime_use_autosuspend(dev);
|
||||
pm_runtime_allow(dev);
|
||||
|
||||
ret = drm_dev_register(&xdna->ddev, 0);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
|
||||
pm_runtime_forbid(dev);
|
||||
goto failed_sysfs_fini;
|
||||
}
|
||||
|
||||
pm_runtime_mark_last_busy(dev);
|
||||
pm_runtime_put_autosuspend(dev);
|
||||
return 0;
|
||||
|
||||
failed_sysfs_fini:
|
||||
amdxdna_sysfs_fini(xdna);
|
||||
failed_dev_fini:
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
xdna->dev_info->ops->fini(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amdxdna_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
|
||||
struct device *dev = &pdev->dev;
|
||||
struct amdxdna_client *client;
|
||||
|
||||
pm_runtime_get_noresume(dev);
|
||||
pm_runtime_forbid(dev);
|
||||
|
||||
drm_dev_unplug(&xdna->ddev);
|
||||
amdxdna_sysfs_fini(xdna);
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
client = list_first_entry_or_null(&xdna->client_list,
|
||||
struct amdxdna_client, node);
|
||||
while (client) {
|
||||
list_del_init(&client->node);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
amdxdna_hwctx_remove_all(client);
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
client = list_first_entry_or_null(&xdna->client_list,
|
||||
struct amdxdna_client, node);
|
||||
}
|
||||
|
||||
xdna->dev_info->ops->fini(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
}
|
||||
|
||||
static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna)
|
||||
{
|
||||
if (xdna->dev_info->ops->suspend)
|
||||
xdna->dev_info->ops->suspend(xdna);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna)
|
||||
{
|
||||
if (xdna->dev_info->ops->resume)
|
||||
return xdna->dev_info->ops->resume(xdna);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdxdna_pmops_suspend(struct device *dev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
|
||||
struct amdxdna_client *client;
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
list_for_each_entry(client, &xdna->client_list, node)
|
||||
amdxdna_hwctx_suspend(client);
|
||||
|
||||
amdxdna_dev_suspend_nolock(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdxdna_pmops_resume(struct device *dev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
|
||||
struct amdxdna_client *client;
|
||||
int ret;
|
||||
|
||||
XDNA_INFO(xdna, "firmware resuming...");
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = amdxdna_dev_resume_nolock(xdna);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "resume NPU firmware failed");
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
XDNA_INFO(xdna, "hardware context resuming...");
|
||||
list_for_each_entry(client, &xdna->client_list, node)
|
||||
amdxdna_hwctx_resume(client);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdxdna_rpmops_suspend(struct device *dev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
|
||||
int ret;
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = amdxdna_dev_suspend_nolock(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdxdna_rpmops_resume(struct device *dev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
|
||||
int ret;
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = amdxdna_dev_resume_nolock(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
XDNA_DBG(xdna, "Runtime resume done ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops amdxdna_pm_ops = {
|
||||
SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
|
||||
RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
|
||||
};
|
||||
|
||||
static struct pci_driver amdxdna_pci_driver = {
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = pci_ids,
|
||||
.probe = amdxdna_probe,
|
||||
.remove = amdxdna_remove,
|
||||
.driver.pm = &amdxdna_pm_ops,
|
||||
};
|
||||
|
||||
module_pci_driver(amdxdna_pci_driver);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
|
||||
MODULE_DESCRIPTION("amdxdna driver");
|
147
drivers/accel/amdxdna/amdxdna_pci_drv.h
Normal file
147
drivers/accel/amdxdna/amdxdna_pci_drv.h
Normal file
@ -0,0 +1,147 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AMDXDNA_PCI_DRV_H_
|
||||
#define _AMDXDNA_PCI_DRV_H_
|
||||
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
|
||||
#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
|
||||
#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
|
||||
#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args)
|
||||
#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
|
||||
|
||||
#define XDNA_MBZ_DBG(xdna, ptr, sz) \
|
||||
({ \
|
||||
int __i; \
|
||||
int __ret = 0; \
|
||||
u8 *__ptr = (u8 *)(ptr); \
|
||||
for (__i = 0; __i < (sz); __i++) { \
|
||||
if (__ptr[__i]) { \
|
||||
XDNA_DBG(xdna, "MBZ check failed"); \
|
||||
__ret = -EINVAL; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define to_xdna_dev(drm_dev) \
|
||||
((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
|
||||
|
||||
extern const struct drm_driver amdxdna_drm_drv;
|
||||
|
||||
struct amdxdna_client;
|
||||
struct amdxdna_dev;
|
||||
struct amdxdna_drm_get_info;
|
||||
struct amdxdna_drm_set_state;
|
||||
struct amdxdna_gem_obj;
|
||||
struct amdxdna_hwctx;
|
||||
struct amdxdna_sched_job;
|
||||
|
||||
/*
|
||||
* struct amdxdna_dev_ops - Device hardware operation callbacks
|
||||
*/
|
||||
struct amdxdna_dev_ops {
|
||||
int (*init)(struct amdxdna_dev *xdna);
|
||||
void (*fini)(struct amdxdna_dev *xdna);
|
||||
int (*resume)(struct amdxdna_dev *xdna);
|
||||
void (*suspend)(struct amdxdna_dev *xdna);
|
||||
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
|
||||
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
|
||||
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
|
||||
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
|
||||
void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx);
|
||||
void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
|
||||
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
|
||||
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
|
||||
int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
|
||||
};
|
||||
|
||||
/*
|
||||
* struct amdxdna_dev_info - Device hardware information
|
||||
* Record device static information, like reg, mbox, PSP, SMU bar index
|
||||
*/
|
||||
struct amdxdna_dev_info {
|
||||
int reg_bar;
|
||||
int mbox_bar;
|
||||
int sram_bar;
|
||||
int psp_bar;
|
||||
int smu_bar;
|
||||
int device_type;
|
||||
int first_col;
|
||||
u32 dev_mem_buf_shift;
|
||||
u64 dev_mem_base;
|
||||
size_t dev_mem_size;
|
||||
char *vbnv;
|
||||
const struct amdxdna_dev_priv *dev_priv;
|
||||
const struct amdxdna_dev_ops *ops;
|
||||
};
|
||||
|
||||
struct amdxdna_fw_ver {
|
||||
u32 major;
|
||||
u32 minor;
|
||||
u32 sub;
|
||||
u32 build;
|
||||
};
|
||||
|
||||
struct amdxdna_dev {
|
||||
struct drm_device ddev;
|
||||
struct amdxdna_dev_hdl *dev_handle;
|
||||
const struct amdxdna_dev_info *dev_info;
|
||||
void *xrs_hdl;
|
||||
|
||||
struct mutex dev_lock; /* per device lock */
|
||||
struct list_head client_list;
|
||||
struct amdxdna_fw_ver fw_ver;
|
||||
struct rw_semaphore notifier_lock; /* for mmu notifier*/
|
||||
};
|
||||
|
||||
/*
|
||||
* struct amdxdna_device_id - PCI device info
|
||||
*/
|
||||
struct amdxdna_device_id {
|
||||
unsigned short device;
|
||||
u8 revision;
|
||||
const struct amdxdna_dev_info *dev_info;
|
||||
};
|
||||
|
||||
/*
|
||||
* struct amdxdna_client - amdxdna client
|
||||
* A per fd data structure for managing context and other user process stuffs.
|
||||
*/
|
||||
struct amdxdna_client {
|
||||
struct list_head node;
|
||||
pid_t pid;
|
||||
struct mutex hwctx_lock; /* protect hwctx */
|
||||
/* do NOT wait this srcu when hwctx_lock is held */
|
||||
struct srcu_struct hwctx_srcu;
|
||||
struct xarray hwctx_xa;
|
||||
u32 next_hwctxid;
|
||||
struct amdxdna_dev *xdna;
|
||||
struct drm_file *filp;
|
||||
|
||||
struct mutex mm_lock; /* protect memory related */
|
||||
struct amdxdna_gem_obj *dev_heap;
|
||||
|
||||
struct iommu_sva *sva;
|
||||
int pasid;
|
||||
};
|
||||
|
||||
#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
|
||||
xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
|
||||
|
||||
/* Add device info below */
|
||||
extern const struct amdxdna_dev_info dev_npu1_info;
|
||||
extern const struct amdxdna_dev_info dev_npu2_info;
|
||||
extern const struct amdxdna_dev_info dev_npu4_info;
|
||||
extern const struct amdxdna_dev_info dev_npu5_info;
|
||||
extern const struct amdxdna_dev_info dev_npu6_info;
|
||||
|
||||
int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
|
||||
void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
|
||||
|
||||
#endif /* _AMDXDNA_PCI_DRV_H_ */
|
67
drivers/accel/amdxdna/amdxdna_sysfs.c
Normal file
67
drivers/accel/amdxdna/amdxdna_sysfs.c
Normal file
@ -0,0 +1,67 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct amdxdna_dev *xdna = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%s\n", xdna->dev_info->vbnv);
|
||||
}
|
||||
static DEVICE_ATTR_RO(vbnv);
|
||||
|
||||
static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct amdxdna_dev *xdna = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", xdna->dev_info->device_type);
|
||||
}
|
||||
static DEVICE_ATTR_RO(device_type);
|
||||
|
||||
static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct amdxdna_dev *xdna = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%d.%d.%d.%d\n", xdna->fw_ver.major,
|
||||
xdna->fw_ver.minor, xdna->fw_ver.sub,
|
||||
xdna->fw_ver.build);
|
||||
}
|
||||
static DEVICE_ATTR_RO(fw_version);
|
||||
|
||||
static struct attribute *amdxdna_attrs[] = {
|
||||
&dev_attr_device_type.attr,
|
||||
&dev_attr_vbnv.attr,
|
||||
&dev_attr_fw_version.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amdxdna_attr_group = {
|
||||
.attrs = amdxdna_attrs,
|
||||
};
|
||||
|
||||
int amdxdna_sysfs_init(struct amdxdna_dev *xdna)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "Create attr group failed");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdxdna_sysfs_fini(struct amdxdna_dev *xdna)
|
||||
{
|
||||
sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
|
||||
}
|
114
drivers/accel/amdxdna/npu1_regs.c
Normal file
114
drivers/accel/amdxdna/npu1_regs.c
Normal file
@ -0,0 +1,114 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
/* Address definition from NPU1 docs */
|
||||
#define MPNPU_PUB_SEC_INTR 0x3010090
|
||||
#define MPNPU_PUB_PWRMGMT_INTR 0x3010094
|
||||
#define MPNPU_PUB_SCRATCH2 0x30100A0
|
||||
#define MPNPU_PUB_SCRATCH3 0x30100A4
|
||||
#define MPNPU_PUB_SCRATCH4 0x30100A8
|
||||
#define MPNPU_PUB_SCRATCH5 0x30100AC
|
||||
#define MPNPU_PUB_SCRATCH6 0x30100B0
|
||||
#define MPNPU_PUB_SCRATCH7 0x30100B4
|
||||
#define MPNPU_PUB_SCRATCH9 0x30100BC
|
||||
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_0 0x30A0000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_1 0x30A2000
|
||||
#define MPNPU_SRAM_I2X_MAILBOX_15 0x30BF000
|
||||
|
||||
#define MPNPU_APERTURE0_BASE 0x3000000
|
||||
#define MPNPU_APERTURE1_BASE 0x3080000
|
||||
#define MPNPU_APERTURE2_BASE 0x30C0000
|
||||
|
||||
/* PCIe BAR Index for NPU1 */
|
||||
#define NPU1_REG_BAR_INDEX 0
|
||||
#define NPU1_MBOX_BAR_INDEX 4
|
||||
#define NPU1_PSP_BAR_INDEX 0
|
||||
#define NPU1_SMU_BAR_INDEX 0
|
||||
#define NPU1_SRAM_BAR_INDEX 2
|
||||
/* Associated BARs and Apertures */
|
||||
#define NPU1_REG_BAR_BASE MPNPU_APERTURE0_BASE
|
||||
#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE
|
||||
#define NPU1_PSP_BAR_BASE MPNPU_APERTURE0_BASE
|
||||
#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
|
||||
#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
|
||||
|
||||
const struct rt_config npu1_default_rt_cfg[] = {
|
||||
{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
|
||||
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
|
||||
{ 0 },
|
||||
};
|
||||
|
||||
const struct dpm_clk_freq npu1_dpm_clk_table[] = {
|
||||
{400, 800},
|
||||
{600, 1024},
|
||||
{600, 1024},
|
||||
{600, 1024},
|
||||
{600, 1024},
|
||||
{720, 1309},
|
||||
{720, 1309},
|
||||
{847, 1600},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
static const struct amdxdna_dev_priv npu1_dev_priv = {
|
||||
.fw_path = "amdnpu/1502_00/npu.sbin",
|
||||
.protocol_major = 0x5,
|
||||
.protocol_minor = 0x7,
|
||||
.rt_config = npu1_default_rt_cfg,
|
||||
.dpm_clk_tbl = npu1_dpm_clk_table,
|
||||
.col_align = COL_ALIGN_NONE,
|
||||
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
|
||||
.mbox_size = 0, /* Use BAR size */
|
||||
.sram_dev_addr = NPU1_SRAM_BAR_BASE,
|
||||
.sram_offs = {
|
||||
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
|
||||
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
|
||||
},
|
||||
.psp_regs_off = {
|
||||
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU1_PSP, MPNPU_PUB_SCRATCH4),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU1_PSP, MPNPU_PUB_SCRATCH9),
|
||||
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR),
|
||||
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
|
||||
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
|
||||
},
|
||||
.smu_regs_off = {
|
||||
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5),
|
||||
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
|
||||
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR),
|
||||
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
|
||||
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
|
||||
},
|
||||
.hw_ops = {
|
||||
.set_dpm = npu1_set_dpm,
|
||||
},
|
||||
};
|
||||
|
||||
const struct amdxdna_dev_info dev_npu1_info = {
|
||||
.reg_bar = NPU1_REG_BAR_INDEX,
|
||||
.mbox_bar = NPU1_MBOX_BAR_INDEX,
|
||||
.sram_bar = NPU1_SRAM_BAR_INDEX,
|
||||
.psp_bar = NPU1_PSP_BAR_INDEX,
|
||||
.smu_bar = NPU1_SMU_BAR_INDEX,
|
||||
.first_col = 1,
|
||||
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
|
||||
.dev_mem_base = AIE2_DEVM_BASE,
|
||||
.dev_mem_size = AIE2_DEVM_SIZE,
|
||||
.vbnv = "RyzenAI-npu1",
|
||||
.device_type = AMDXDNA_DEV_TYPE_KMQ,
|
||||
.dev_priv = &npu1_dev_priv,
|
||||
.ops = &aie2_ops,
|
||||
};
|
113
drivers/accel/amdxdna/npu2_regs.c
Normal file
113
drivers/accel/amdxdna/npu2_regs.c
Normal file
@ -0,0 +1,113 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
|
||||
#define MPNPU_PUB_SEC_INTR 0x3010060
|
||||
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
|
||||
#define MPNPU_PUB_SCRATCH0 0x301006C
|
||||
#define MPNPU_PUB_SCRATCH1 0x3010070
|
||||
#define MPNPU_PUB_SCRATCH2 0x3010074
|
||||
#define MPNPU_PUB_SCRATCH3 0x3010078
|
||||
#define MPNPU_PUB_SCRATCH4 0x301007C
|
||||
#define MPNPU_PUB_SCRATCH5 0x3010080
|
||||
#define MPNPU_PUB_SCRATCH6 0x3010084
|
||||
#define MPNPU_PUB_SCRATCH7 0x3010088
|
||||
#define MPNPU_PUB_SCRATCH8 0x301008C
|
||||
#define MPNPU_PUB_SCRATCH9 0x3010090
|
||||
#define MPNPU_PUB_SCRATCH10 0x3010094
|
||||
#define MPNPU_PUB_SCRATCH11 0x3010098
|
||||
#define MPNPU_PUB_SCRATCH12 0x301009C
|
||||
#define MPNPU_PUB_SCRATCH13 0x30100A0
|
||||
#define MPNPU_PUB_SCRATCH14 0x30100A4
|
||||
#define MPNPU_PUB_SCRATCH15 0x30100A8
|
||||
#define MP0_C2PMSG_73 0x3810A24
|
||||
#define MP0_C2PMSG_123 0x3810AEC
|
||||
|
||||
#define MP1_C2PMSG_0 0x3B10900
|
||||
#define MP1_C2PMSG_60 0x3B109F0
|
||||
#define MP1_C2PMSG_61 0x3B109F4
|
||||
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
|
||||
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
|
||||
|
||||
#define MMNPU_APERTURE0_BASE 0x3000000
|
||||
#define MMNPU_APERTURE1_BASE 0x3600000
|
||||
#define MMNPU_APERTURE3_BASE 0x3810000
|
||||
#define MMNPU_APERTURE4_BASE 0x3B10000
|
||||
|
||||
/* PCIe BAR Index for NPU2 */
|
||||
#define NPU2_REG_BAR_INDEX 0
|
||||
#define NPU2_MBOX_BAR_INDEX 0
|
||||
#define NPU2_PSP_BAR_INDEX 4
|
||||
#define NPU2_SMU_BAR_INDEX 5
|
||||
#define NPU2_SRAM_BAR_INDEX 2
|
||||
/* Associated BARs and Apertures */
|
||||
#define NPU2_REG_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU2_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU2_PSP_BAR_BASE MMNPU_APERTURE3_BASE
|
||||
#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
|
||||
#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
|
||||
|
||||
static const struct amdxdna_dev_priv npu2_dev_priv = {
|
||||
.fw_path = "amdnpu/17f0_00/npu.sbin",
|
||||
.protocol_major = 0x6,
|
||||
.protocol_minor = 0x6,
|
||||
.rt_config = npu4_default_rt_cfg,
|
||||
.dpm_clk_tbl = npu4_dpm_clk_table,
|
||||
.col_align = COL_ALIGN_NATURE,
|
||||
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
|
||||
.mbox_size = 0, /* Use BAR size */
|
||||
.sram_dev_addr = NPU2_SRAM_BAR_BASE,
|
||||
.sram_offs = {
|
||||
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
|
||||
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
|
||||
},
|
||||
.psp_regs_off = {
|
||||
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU2_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU2_REG, MPNPU_PUB_SCRATCH4),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU2_REG, MPNPU_PUB_SCRATCH9),
|
||||
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
|
||||
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
|
||||
},
|
||||
.smu_regs_off = {
|
||||
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
|
||||
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU2_SMU, MP1_C2PMSG_60),
|
||||
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
|
||||
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
|
||||
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
|
||||
},
|
||||
.hw_ops = {
|
||||
.set_dpm = npu4_set_dpm,
|
||||
},
|
||||
};
|
||||
|
||||
const struct amdxdna_dev_info dev_npu2_info = {
|
||||
.reg_bar = NPU2_REG_BAR_INDEX,
|
||||
.mbox_bar = NPU2_MBOX_BAR_INDEX,
|
||||
.sram_bar = NPU2_SRAM_BAR_INDEX,
|
||||
.psp_bar = NPU2_PSP_BAR_INDEX,
|
||||
.smu_bar = NPU2_SMU_BAR_INDEX,
|
||||
.first_col = 0,
|
||||
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
|
||||
.dev_mem_base = AIE2_DEVM_BASE,
|
||||
.dev_mem_size = AIE2_DEVM_SIZE,
|
||||
.vbnv = "RyzenAI-npu2",
|
||||
.device_type = AMDXDNA_DEV_TYPE_KMQ,
|
||||
.dev_priv = &npu2_dev_priv,
|
||||
.ops = &aie2_ops, /* NPU2 can share NPU1's callback */
|
||||
};
|
134
drivers/accel/amdxdna/npu4_regs.c
Normal file
134
drivers/accel/amdxdna/npu4_regs.c
Normal file
@ -0,0 +1,134 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
|
||||
#define MPNPU_PUB_SEC_INTR 0x3010060
|
||||
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
|
||||
#define MPNPU_PUB_SCRATCH0 0x301006C
|
||||
#define MPNPU_PUB_SCRATCH1 0x3010070
|
||||
#define MPNPU_PUB_SCRATCH2 0x3010074
|
||||
#define MPNPU_PUB_SCRATCH3 0x3010078
|
||||
#define MPNPU_PUB_SCRATCH4 0x301007C
|
||||
#define MPNPU_PUB_SCRATCH5 0x3010080
|
||||
#define MPNPU_PUB_SCRATCH6 0x3010084
|
||||
#define MPNPU_PUB_SCRATCH7 0x3010088
|
||||
#define MPNPU_PUB_SCRATCH8 0x301008C
|
||||
#define MPNPU_PUB_SCRATCH9 0x3010090
|
||||
#define MPNPU_PUB_SCRATCH10 0x3010094
|
||||
#define MPNPU_PUB_SCRATCH11 0x3010098
|
||||
#define MPNPU_PUB_SCRATCH12 0x301009C
|
||||
#define MPNPU_PUB_SCRATCH13 0x30100A0
|
||||
#define MPNPU_PUB_SCRATCH14 0x30100A4
|
||||
#define MPNPU_PUB_SCRATCH15 0x30100A8
|
||||
#define MP0_C2PMSG_73 0x3810A24
|
||||
#define MP0_C2PMSG_123 0x3810AEC
|
||||
|
||||
#define MP1_C2PMSG_0 0x3B10900
|
||||
#define MP1_C2PMSG_60 0x3B109F0
|
||||
#define MP1_C2PMSG_61 0x3B109F4
|
||||
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
|
||||
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
|
||||
|
||||
#define MMNPU_APERTURE0_BASE 0x3000000
|
||||
#define MMNPU_APERTURE1_BASE 0x3600000
|
||||
#define MMNPU_APERTURE3_BASE 0x3810000
|
||||
#define MMNPU_APERTURE4_BASE 0x3B10000
|
||||
|
||||
/* PCIe BAR Index for NPU4 */
|
||||
#define NPU4_REG_BAR_INDEX 0
|
||||
#define NPU4_MBOX_BAR_INDEX 0
|
||||
#define NPU4_PSP_BAR_INDEX 4
|
||||
#define NPU4_SMU_BAR_INDEX 5
|
||||
#define NPU4_SRAM_BAR_INDEX 2
|
||||
/* Associated BARs and Apertures */
|
||||
#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
|
||||
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
|
||||
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
|
||||
|
||||
const struct rt_config npu4_default_rt_cfg[] = {
|
||||
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
|
||||
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
|
||||
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
|
||||
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
|
||||
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
|
||||
{ 0 },
|
||||
};
|
||||
|
||||
const struct dpm_clk_freq npu4_dpm_clk_table[] = {
|
||||
{396, 792},
|
||||
{600, 1056},
|
||||
{792, 1152},
|
||||
{975, 1267},
|
||||
{975, 1267},
|
||||
{1056, 1408},
|
||||
{1152, 1584},
|
||||
{1267, 1800},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
static const struct amdxdna_dev_priv npu4_dev_priv = {
|
||||
.fw_path = "amdnpu/17f0_10/npu.sbin",
|
||||
.protocol_major = 0x6,
|
||||
.protocol_minor = 12,
|
||||
.rt_config = npu4_default_rt_cfg,
|
||||
.dpm_clk_tbl = npu4_dpm_clk_table,
|
||||
.col_align = COL_ALIGN_NATURE,
|
||||
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
|
||||
.mbox_size = 0, /* Use BAR size */
|
||||
.sram_dev_addr = NPU4_SRAM_BAR_BASE,
|
||||
.sram_offs = {
|
||||
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
|
||||
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
|
||||
},
|
||||
.psp_regs_off = {
|
||||
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
|
||||
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
|
||||
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
|
||||
},
|
||||
.smu_regs_off = {
|
||||
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
|
||||
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
|
||||
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
|
||||
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
|
||||
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
|
||||
},
|
||||
.hw_ops = {
|
||||
.set_dpm = npu4_set_dpm,
|
||||
},
|
||||
};
|
||||
|
||||
const struct amdxdna_dev_info dev_npu4_info = {
|
||||
.reg_bar = NPU4_REG_BAR_INDEX,
|
||||
.mbox_bar = NPU4_MBOX_BAR_INDEX,
|
||||
.sram_bar = NPU4_SRAM_BAR_INDEX,
|
||||
.psp_bar = NPU4_PSP_BAR_INDEX,
|
||||
.smu_bar = NPU4_SMU_BAR_INDEX,
|
||||
.first_col = 0,
|
||||
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
|
||||
.dev_mem_base = AIE2_DEVM_BASE,
|
||||
.dev_mem_size = AIE2_DEVM_SIZE,
|
||||
.vbnv = "RyzenAI-npu4",
|
||||
.device_type = AMDXDNA_DEV_TYPE_KMQ,
|
||||
.dev_priv = &npu4_dev_priv,
|
||||
.ops = &aie2_ops, /* NPU4 can share NPU1's callback */
|
||||
};
|
113
drivers/accel/amdxdna/npu5_regs.c
Normal file
113
drivers/accel/amdxdna/npu5_regs.c
Normal file
@ -0,0 +1,113 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
|
||||
#define MPNPU_PUB_SEC_INTR 0x3010060
|
||||
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
|
||||
#define MPNPU_PUB_SCRATCH0 0x301006C
|
||||
#define MPNPU_PUB_SCRATCH1 0x3010070
|
||||
#define MPNPU_PUB_SCRATCH2 0x3010074
|
||||
#define MPNPU_PUB_SCRATCH3 0x3010078
|
||||
#define MPNPU_PUB_SCRATCH4 0x301007C
|
||||
#define MPNPU_PUB_SCRATCH5 0x3010080
|
||||
#define MPNPU_PUB_SCRATCH6 0x3010084
|
||||
#define MPNPU_PUB_SCRATCH7 0x3010088
|
||||
#define MPNPU_PUB_SCRATCH8 0x301008C
|
||||
#define MPNPU_PUB_SCRATCH9 0x3010090
|
||||
#define MPNPU_PUB_SCRATCH10 0x3010094
|
||||
#define MPNPU_PUB_SCRATCH11 0x3010098
|
||||
#define MPNPU_PUB_SCRATCH12 0x301009C
|
||||
#define MPNPU_PUB_SCRATCH13 0x30100A0
|
||||
#define MPNPU_PUB_SCRATCH14 0x30100A4
|
||||
#define MPNPU_PUB_SCRATCH15 0x30100A8
|
||||
#define MP0_C2PMSG_73 0x3810A24
|
||||
#define MP0_C2PMSG_123 0x3810AEC
|
||||
|
||||
#define MP1_C2PMSG_0 0x3B10900
|
||||
#define MP1_C2PMSG_60 0x3B109F0
|
||||
#define MP1_C2PMSG_61 0x3B109F4
|
||||
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
|
||||
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
|
||||
|
||||
#define MMNPU_APERTURE0_BASE 0x3000000
|
||||
#define MMNPU_APERTURE1_BASE 0x3600000
|
||||
#define MMNPU_APERTURE3_BASE 0x3810000
|
||||
#define MMNPU_APERTURE4_BASE 0x3B10000
|
||||
|
||||
/* PCIe BAR Index for NPU5 */
|
||||
#define NPU5_REG_BAR_INDEX 0
|
||||
#define NPU5_MBOX_BAR_INDEX 0
|
||||
#define NPU5_PSP_BAR_INDEX 4
|
||||
#define NPU5_SMU_BAR_INDEX 5
|
||||
#define NPU5_SRAM_BAR_INDEX 2
|
||||
/* Associated BARs and Apertures */
|
||||
#define NPU5_REG_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU5_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU5_PSP_BAR_BASE MMNPU_APERTURE3_BASE
|
||||
#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
|
||||
#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
|
||||
|
||||
static const struct amdxdna_dev_priv npu5_dev_priv = {
|
||||
.fw_path = "amdnpu/17f0_11/npu.sbin",
|
||||
.protocol_major = 0x6,
|
||||
.protocol_minor = 12,
|
||||
.rt_config = npu4_default_rt_cfg,
|
||||
.dpm_clk_tbl = npu4_dpm_clk_table,
|
||||
.col_align = COL_ALIGN_NATURE,
|
||||
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
|
||||
.mbox_size = 0, /* Use BAR size */
|
||||
.sram_dev_addr = NPU5_SRAM_BAR_BASE,
|
||||
.sram_offs = {
|
||||
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
|
||||
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
|
||||
},
|
||||
.psp_regs_off = {
|
||||
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU5_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU5_REG, MPNPU_PUB_SCRATCH4),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU5_REG, MPNPU_PUB_SCRATCH9),
|
||||
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73),
|
||||
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
|
||||
},
|
||||
.smu_regs_off = {
|
||||
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0),
|
||||
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU5_SMU, MP1_C2PMSG_60),
|
||||
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE),
|
||||
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
|
||||
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
|
||||
},
|
||||
.hw_ops = {
|
||||
.set_dpm = npu4_set_dpm,
|
||||
},
|
||||
};
|
||||
|
||||
const struct amdxdna_dev_info dev_npu5_info = {
|
||||
.reg_bar = NPU5_REG_BAR_INDEX,
|
||||
.mbox_bar = NPU5_MBOX_BAR_INDEX,
|
||||
.sram_bar = NPU5_SRAM_BAR_INDEX,
|
||||
.psp_bar = NPU5_PSP_BAR_INDEX,
|
||||
.smu_bar = NPU5_SMU_BAR_INDEX,
|
||||
.first_col = 0,
|
||||
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
|
||||
.dev_mem_base = AIE2_DEVM_BASE,
|
||||
.dev_mem_size = AIE2_DEVM_SIZE,
|
||||
.vbnv = "RyzenAI-npu5",
|
||||
.device_type = AMDXDNA_DEV_TYPE_KMQ,
|
||||
.dev_priv = &npu5_dev_priv,
|
||||
.ops = &aie2_ops,
|
||||
};
|
114
drivers/accel/amdxdna/npu6_regs.c
Normal file
114
drivers/accel/amdxdna/npu6_regs.c
Normal file
@ -0,0 +1,114 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
|
||||
#define MPNPU_PUB_SEC_INTR 0x3010060
|
||||
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
|
||||
#define MPNPU_PUB_SCRATCH0 0x301006C
|
||||
#define MPNPU_PUB_SCRATCH1 0x3010070
|
||||
#define MPNPU_PUB_SCRATCH2 0x3010074
|
||||
#define MPNPU_PUB_SCRATCH3 0x3010078
|
||||
#define MPNPU_PUB_SCRATCH4 0x301007C
|
||||
#define MPNPU_PUB_SCRATCH5 0x3010080
|
||||
#define MPNPU_PUB_SCRATCH6 0x3010084
|
||||
#define MPNPU_PUB_SCRATCH7 0x3010088
|
||||
#define MPNPU_PUB_SCRATCH8 0x301008C
|
||||
#define MPNPU_PUB_SCRATCH9 0x3010090
|
||||
#define MPNPU_PUB_SCRATCH10 0x3010094
|
||||
#define MPNPU_PUB_SCRATCH11 0x3010098
|
||||
#define MPNPU_PUB_SCRATCH12 0x301009C
|
||||
#define MPNPU_PUB_SCRATCH13 0x30100A0
|
||||
#define MPNPU_PUB_SCRATCH14 0x30100A4
|
||||
#define MPNPU_PUB_SCRATCH15 0x30100A8
|
||||
#define MP0_C2PMSG_73 0x3810A24
|
||||
#define MP0_C2PMSG_123 0x3810AEC
|
||||
|
||||
#define MP1_C2PMSG_0 0x3B10900
|
||||
#define MP1_C2PMSG_60 0x3B109F0
|
||||
#define MP1_C2PMSG_61 0x3B109F4
|
||||
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
|
||||
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
|
||||
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
|
||||
|
||||
#define MMNPU_APERTURE0_BASE 0x3000000
|
||||
#define MMNPU_APERTURE1_BASE 0x3600000
|
||||
#define MMNPU_APERTURE3_BASE 0x3810000
|
||||
#define MMNPU_APERTURE4_BASE 0x3B10000
|
||||
|
||||
/* PCIe BAR Index for NPU6 */
|
||||
#define NPU6_REG_BAR_INDEX 0
|
||||
#define NPU6_MBOX_BAR_INDEX 0
|
||||
#define NPU6_PSP_BAR_INDEX 4
|
||||
#define NPU6_SMU_BAR_INDEX 5
|
||||
#define NPU6_SRAM_BAR_INDEX 2
|
||||
/* Associated BARs and Apertures */
|
||||
#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
|
||||
#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE
|
||||
#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
|
||||
#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
|
||||
|
||||
static const struct amdxdna_dev_priv npu6_dev_priv = {
|
||||
.fw_path = "amdnpu/17f0_10/npu.sbin",
|
||||
.protocol_major = 0x6,
|
||||
.protocol_minor = 12,
|
||||
.rt_config = npu4_default_rt_cfg,
|
||||
.dpm_clk_tbl = npu4_dpm_clk_table,
|
||||
.col_align = COL_ALIGN_NATURE,
|
||||
.mbox_dev_addr = NPU6_MBOX_BAR_BASE,
|
||||
.mbox_size = 0, /* Use BAR size */
|
||||
.sram_dev_addr = NPU6_SRAM_BAR_BASE,
|
||||
.sram_offs = {
|
||||
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
|
||||
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
|
||||
},
|
||||
.psp_regs_off = {
|
||||
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4),
|
||||
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9),
|
||||
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
|
||||
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
|
||||
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
|
||||
},
|
||||
.smu_regs_off = {
|
||||
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
|
||||
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60),
|
||||
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
|
||||
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
|
||||
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
|
||||
},
|
||||
.hw_ops = {
|
||||
.set_dpm = npu4_set_dpm,
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
const struct amdxdna_dev_info dev_npu6_info = {
|
||||
.reg_bar = NPU6_REG_BAR_INDEX,
|
||||
.mbox_bar = NPU6_MBOX_BAR_INDEX,
|
||||
.sram_bar = NPU6_SRAM_BAR_INDEX,
|
||||
.psp_bar = NPU6_PSP_BAR_INDEX,
|
||||
.smu_bar = NPU6_SMU_BAR_INDEX,
|
||||
.first_col = 0,
|
||||
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
|
||||
.dev_mem_base = AIE2_DEVM_BASE,
|
||||
.dev_mem_size = AIE2_DEVM_SIZE,
|
||||
.vbnv = "RyzenAI-npu6",
|
||||
.device_type = AMDXDNA_DEV_TYPE_KMQ,
|
||||
.dev_priv = &npu6_dev_priv,
|
||||
.ops = &aie2_ops,
|
||||
};
|
@ -101,7 +101,6 @@ static const struct drm_driver hl_driver = {
|
||||
.major = LINUX_VERSION_MAJOR,
|
||||
.minor = LINUX_VERSION_PATCHLEVEL,
|
||||
.patchlevel = LINUX_VERSION_SUBLEVEL,
|
||||
.date = "20190505",
|
||||
|
||||
.fops = &hl_fops,
|
||||
.open = hl_device_open,
|
||||
|
@ -458,15 +458,7 @@ static const struct drm_driver driver = {
|
||||
.name = DRIVER_NAME,
|
||||
.desc = DRIVER_DESC,
|
||||
|
||||
#ifdef DRIVER_DATE
|
||||
.date = DRIVER_DATE,
|
||||
.major = DRIVER_MAJOR,
|
||||
.minor = DRIVER_MINOR,
|
||||
.patchlevel = DRIVER_PATCHLEVEL,
|
||||
#else
|
||||
.date = UTS_RELEASE,
|
||||
.major = 1,
|
||||
#endif
|
||||
};
|
||||
|
||||
static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
|
||||
|
@ -78,8 +78,8 @@ static int ivpu_resume(struct ivpu_device *vdev)
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
pci_restore_state(to_pci_dev(vdev->drm.dev));
|
||||
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
|
||||
pci_restore_state(to_pci_dev(vdev->drm.dev));
|
||||
|
||||
ret = ivpu_hw_power_up(vdev);
|
||||
if (ret) {
|
||||
|
@ -208,7 +208,6 @@ static const struct drm_driver qaic_accel_driver = {
|
||||
|
||||
.name = QAIC_NAME,
|
||||
.desc = QAIC_DESC,
|
||||
.date = "20190618",
|
||||
|
||||
.fops = &qaic_accel_fops,
|
||||
.open = qaic_open,
|
||||
|
@ -772,8 +772,7 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
|
||||
|
||||
cancel_work_sync(&context->fw_work);
|
||||
cancel_work_sync(&context->dump_work);
|
||||
if (context->mem_dump)
|
||||
vfree(context->mem_dump);
|
||||
vfree(context->mem_dump);
|
||||
sahara_release_image(context);
|
||||
mhi_unprepare_from_transfer(mhi_dev);
|
||||
}
|
||||
|
@ -103,10 +103,15 @@ config DRM_KMS_HELPER
|
||||
help
|
||||
CRTC helpers for KMS drivers.
|
||||
|
||||
config DRM_DRAW
|
||||
bool
|
||||
depends on DRM
|
||||
|
||||
config DRM_PANIC
|
||||
bool "Display a user-friendly message when a kernel panic occurs"
|
||||
depends on DRM
|
||||
select FONT_SUPPORT
|
||||
select DRM_DRAW
|
||||
help
|
||||
Enable a drm panic handler, which will display a user-friendly message
|
||||
when a kernel panic occurs. It's useful when using a user-space
|
||||
@ -218,77 +223,7 @@ config DRM_CLIENT
|
||||
option. Drivers that support the default clients should
|
||||
select DRM_CLIENT_SELECTION instead.
|
||||
|
||||
config DRM_CLIENT_LIB
|
||||
tristate
|
||||
depends on DRM
|
||||
select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
|
||||
select FB_CORE if DRM_FBDEV_EMULATION
|
||||
help
|
||||
This option enables the DRM client library and selects all
|
||||
modules and components according to the enabled clients.
|
||||
|
||||
config DRM_CLIENT_SELECTION
|
||||
tristate
|
||||
depends on DRM
|
||||
select DRM_CLIENT_LIB if DRM_FBDEV_EMULATION
|
||||
help
|
||||
Drivers that support in-kernel DRM clients have to select this
|
||||
option.
|
||||
|
||||
config DRM_CLIENT_SETUP
|
||||
bool
|
||||
depends on DRM_CLIENT_SELECTION
|
||||
help
|
||||
Enables the DRM client selection. DRM drivers that support the
|
||||
default clients should select DRM_CLIENT_SELECTION instead.
|
||||
|
||||
menu "Supported DRM clients"
|
||||
depends on DRM_CLIENT_SELECTION
|
||||
|
||||
config DRM_FBDEV_EMULATION
|
||||
bool "Enable legacy fbdev support for your modesetting driver"
|
||||
depends on DRM_CLIENT_SELECTION
|
||||
select DRM_CLIENT
|
||||
select DRM_CLIENT_SETUP
|
||||
select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
|
||||
default FB
|
||||
help
|
||||
Choose this option if you have a need for the legacy fbdev
|
||||
support. Note that this support also provides the linux console
|
||||
support on top of your modesetting driver.
|
||||
|
||||
If in doubt, say "Y".
|
||||
|
||||
config DRM_FBDEV_OVERALLOC
|
||||
int "Overallocation of the fbdev buffer"
|
||||
depends on DRM_FBDEV_EMULATION
|
||||
default 100
|
||||
help
|
||||
Defines the fbdev buffer overallocation in percent. Default
|
||||
is 100. Typical values for double buffering will be 200,
|
||||
triple buffering 300.
|
||||
|
||||
config DRM_FBDEV_LEAK_PHYS_SMEM
|
||||
bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)"
|
||||
depends on DRM_FBDEV_EMULATION && EXPERT
|
||||
default n
|
||||
help
|
||||
In order to keep user-space compatibility, we want in certain
|
||||
use-cases to keep leaking the fbdev physical address to the
|
||||
user-space program handling the fbdev buffer.
|
||||
This affects, not only, Amlogic, Allwinner or Rockchip devices
|
||||
with ARM Mali GPUs using an userspace Blob.
|
||||
This option is not supported by upstream developers and should be
|
||||
removed as soon as possible and be considered as a broken and
|
||||
legacy behaviour from a modern fbdev device driver.
|
||||
|
||||
Please send any bug reports when using this to your proprietary
|
||||
software vendor that requires this.
|
||||
|
||||
If in doubt, say "N" or spread the word to your closed source
|
||||
library vendor.
|
||||
|
||||
endmenu
|
||||
source "drivers/gpu/drm/clients/Kconfig"
|
||||
|
||||
config DRM_LOAD_EDID_FIRMWARE
|
||||
bool "Allow to specify an EDID data set instead of probing for it"
|
||||
@ -530,6 +465,10 @@ config DRM_HYPERV
|
||||
config DRM_EXPORT_FOR_TESTS
|
||||
bool
|
||||
|
||||
# Separate option as not all DRM drivers use it
|
||||
config DRM_PANEL_BACKLIGHT_QUIRKS
|
||||
tristate
|
||||
|
||||
config DRM_LIB_RANDOM
|
||||
bool
|
||||
default n
|
||||
|
@ -91,10 +91,12 @@ drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
|
||||
drm_privacy_screen_x86.o
|
||||
drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
|
||||
drm-$(CONFIG_DRM_PANIC) += drm_panic.o
|
||||
drm-$(CONFIG_DRM_DRAW) += drm_draw.o
|
||||
drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o
|
||||
obj-$(CONFIG_DRM) += drm.o
|
||||
|
||||
obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
|
||||
obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o
|
||||
|
||||
#
|
||||
# Memory-management helpers
|
||||
@ -148,14 +150,6 @@ drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
|
||||
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
|
||||
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
|
||||
|
||||
#
|
||||
# DRM clients
|
||||
#
|
||||
|
||||
drm_client_lib-y := drm_client_setup.o
|
||||
drm_client_lib-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_client.o
|
||||
obj-$(CONFIG_DRM_CLIENT_LIB) += drm_client_lib.o
|
||||
|
||||
#
|
||||
# Drivers and the rest
|
||||
#
|
||||
@ -165,6 +159,7 @@ obj-y += tests/
|
||||
obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o
|
||||
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
|
||||
obj-y += arm/
|
||||
obj-y += clients/
|
||||
obj-y += display/
|
||||
obj-$(CONFIG_DRM_TTM) += ttm/
|
||||
obj-$(CONFIG_DRM_SCHED) += scheduler/
|
||||
|
@ -26,6 +26,7 @@ config DRM_AMDGPU
|
||||
select DRM_BUDDY
|
||||
select DRM_SUBALLOC_HELPER
|
||||
select DRM_EXEC
|
||||
select DRM_PANEL_BACKLIGHT_QUIRKS
|
||||
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
|
||||
# ACPI_VIDEO's dependencies must also be selected.
|
||||
select INPUT if ACPI
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright 2017 Advanced Micro Devices, Inc.
|
||||
# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
@ -105,7 +105,7 @@ amdgpu-y += \
|
||||
|
||||
# add UMC block
|
||||
amdgpu-y += \
|
||||
umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o
|
||||
umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
|
||||
|
||||
# add IH block
|
||||
amdgpu-y += \
|
||||
@ -200,6 +200,7 @@ amdgpu-y += \
|
||||
vcn_v4_0_3.o \
|
||||
vcn_v4_0_5.o \
|
||||
vcn_v5_0_0.o \
|
||||
vcn_v5_0_1.o \
|
||||
amdgpu_jpeg.o \
|
||||
jpeg_v1_0.o \
|
||||
jpeg_v2_0.o \
|
||||
@ -208,7 +209,8 @@ amdgpu-y += \
|
||||
jpeg_v4_0.o \
|
||||
jpeg_v4_0_3.o \
|
||||
jpeg_v4_0_5.o \
|
||||
jpeg_v5_0_0.o
|
||||
jpeg_v5_0_0.o \
|
||||
jpeg_v5_0_1.o
|
||||
|
||||
# add VPE block
|
||||
amdgpu-y += \
|
||||
|
@ -334,6 +334,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
|
||||
AMDGPU_INIT_LEVEL_RESET_RECOVERY);
|
||||
dev_info(tmp_adev->dev,
|
||||
"GPU reset succeeded, trying to resume\n");
|
||||
/*TBD: Ideally should clear only GFX, SDMA blocks*/
|
||||
amdgpu_ras_clear_err_state(tmp_adev);
|
||||
r = aldebaran_mode2_restore_ip(tmp_adev);
|
||||
if (r)
|
||||
goto end;
|
||||
|
@ -880,6 +880,7 @@ struct amdgpu_device {
|
||||
bool need_swiotlb;
|
||||
bool accel_working;
|
||||
struct notifier_block acpi_nb;
|
||||
struct notifier_block pm_nb;
|
||||
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
|
||||
struct debugfs_blob_wrapper debugfs_vbios_blob;
|
||||
struct debugfs_blob_wrapper debugfs_discovery_blob;
|
||||
@ -1174,7 +1175,6 @@ struct amdgpu_device {
|
||||
|
||||
struct work_struct reset_work;
|
||||
|
||||
bool job_hang;
|
||||
bool dc_enabled;
|
||||
/* Mask of active clusters */
|
||||
uint32_t aid_mask;
|
||||
|
@ -71,6 +71,11 @@ struct ras_query_context;
|
||||
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
|
||||
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
|
||||
|
||||
#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
|
||||
#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
|
||||
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
|
||||
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
|
||||
|
||||
enum aca_reg_idx {
|
||||
ACA_REG_IDX_CTL = 0,
|
||||
ACA_REG_IDX_STATUS = 1,
|
||||
|
@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
|
||||
* 2. power off the acp tiles
|
||||
* 3. check and enter ulv state
|
||||
*/
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd)
|
||||
* 2. turn on acp clock
|
||||
* 3. power on acp tiles
|
||||
*/
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -236,7 +236,7 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
ip_block->version->major, ip_block->version->minor);
|
||||
/* -ENODEV means board uses AZ rather than ACP */
|
||||
if (r == -ENODEV) {
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
|
||||
return 0;
|
||||
} else if (r) {
|
||||
return r;
|
||||
@ -508,7 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
/* return early if no ACP */
|
||||
if (!adev->acp.acp_genpd) {
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -565,7 +565,7 @@ static int acp_suspend(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
/* power up on suspend */
|
||||
if (!adev->acp.acp_cell)
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -575,7 +575,7 @@ static int acp_resume(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
/* power down again on resume */
|
||||
if (!adev->acp.acp_cell)
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -584,19 +584,19 @@ static bool acp_is_idle(void *handle)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int acp_set_clockgating_state(void *handle,
|
||||
static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
|
||||
enum amd_clockgating_state state)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int acp_set_powergating_state(void *handle,
|
||||
static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
|
||||
enum amd_powergating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
bool enable = (state == AMD_PG_STATE_GATE);
|
||||
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -368,7 +368,7 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
|
||||
{
|
||||
struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
|
||||
|
||||
amdgpu_bo_reserve(*bo, true);
|
||||
(void)amdgpu_bo_reserve(*bo, true);
|
||||
amdgpu_bo_kunmap(*bo);
|
||||
amdgpu_bo_unpin(*bo);
|
||||
amdgpu_bo_unreserve(*bo);
|
||||
@ -724,7 +724,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
|
||||
/* Disable GFXOFF and PG. Temporary workaround
|
||||
* to fix some compute applications issue on GFX9.
|
||||
*/
|
||||
adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state);
|
||||
struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
|
||||
if (gfx_block != NULL)
|
||||
gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
|
||||
}
|
||||
amdgpu_dpm_switch_power_profile(adev,
|
||||
PP_SMC_POWER_PROFILE_COMPUTE,
|
||||
@ -834,7 +836,7 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
if (!kiq_ring->sched.ready || adev->job_hang)
|
||||
if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
|
||||
return 0;
|
||||
|
||||
ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
|
||||
|
@ -433,6 +433,9 @@ void kgd2kfd_unlock_kfd(void);
|
||||
int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
|
||||
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
|
||||
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
|
||||
bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
|
||||
bool retry_fault);
|
||||
|
||||
#else
|
||||
static inline int kgd2kfd_init(void)
|
||||
{
|
||||
@ -518,5 +521,12 @@ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
|
||||
bool retry_fault)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
|
||||
uint32_t low, high;
|
||||
uint64_t queue_addr = 0;
|
||||
|
||||
if (!amdgpu_gpu_recovery)
|
||||
return 0;
|
||||
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
|
||||
|
||||
@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
|
||||
uint32_t low, high, pipe_reset_data = 0;
|
||||
uint64_t queue_addr = 0;
|
||||
|
||||
if (!amdgpu_gpu_recovery)
|
||||
return 0;
|
||||
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
|
||||
|
||||
|
@ -730,7 +730,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
|
||||
return;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
(void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
|
||||
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
|
||||
sg_free_table(ttm->sg);
|
||||
@ -779,7 +779,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
|
||||
}
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
(void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
|
||||
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
@ -989,7 +989,7 @@ unwind:
|
||||
if (!attachment[i])
|
||||
continue;
|
||||
if (attachment[i]->bo_va) {
|
||||
amdgpu_bo_reserve(bo[i], true);
|
||||
(void)amdgpu_bo_reserve(bo[i], true);
|
||||
if (--attachment[i]->bo_va->ref_count == 0)
|
||||
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
|
||||
amdgpu_bo_unreserve(bo[i]);
|
||||
@ -1259,11 +1259,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
|
||||
(void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
|
||||
|
||||
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
|
||||
(void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
|
||||
|
||||
amdgpu_sync_fence(sync, bo_va->last_pt_update);
|
||||
(void)amdgpu_sync_fence(sync, bo_va->last_pt_update);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2352,7 +2352,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
|
||||
{
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
|
||||
amdgpu_bo_reserve(bo, true);
|
||||
(void)amdgpu_bo_reserve(bo, true);
|
||||
amdgpu_bo_kunmap(bo);
|
||||
amdgpu_bo_unpin(bo);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
@ -47,35 +47,37 @@
|
||||
/* Check if current bios is an ATOM BIOS.
|
||||
* Return true if it is ATOM BIOS. Otherwise, return false.
|
||||
*/
|
||||
static bool check_atom_bios(uint8_t *bios, size_t size)
|
||||
static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
|
||||
{
|
||||
uint16_t tmp, bios_header_start;
|
||||
uint8_t *bios = adev->bios;
|
||||
|
||||
if (!bios || size < 0x49) {
|
||||
DRM_INFO("vbios mem is null or mem size is wrong\n");
|
||||
dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!AMD_IS_VALID_VBIOS(bios)) {
|
||||
DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
|
||||
dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
|
||||
bios[1]);
|
||||
return false;
|
||||
}
|
||||
|
||||
bios_header_start = bios[0x48] | (bios[0x49] << 8);
|
||||
if (!bios_header_start) {
|
||||
DRM_INFO("Can't locate bios header\n");
|
||||
dev_dbg(adev->dev, "Can't locate VBIOS header\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
tmp = bios_header_start + 4;
|
||||
if (size < tmp) {
|
||||
DRM_INFO("BIOS header is broken\n");
|
||||
dev_dbg(adev->dev, "VBIOS header is broken\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!memcmp(bios + tmp, "ATOM", 4) ||
|
||||
!memcmp(bios + tmp, "MOTA", 4)) {
|
||||
DRM_DEBUG("ATOMBIOS detected\n");
|
||||
dev_dbg(adev->dev, "ATOMBIOS detected\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -118,7 +120,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
|
||||
memcpy_fromio(adev->bios, bios, size);
|
||||
iounmap(bios);
|
||||
|
||||
if (!check_atom_bios(adev->bios, size)) {
|
||||
if (!check_atom_bios(adev, size)) {
|
||||
kfree(adev->bios);
|
||||
return false;
|
||||
}
|
||||
@ -146,7 +148,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
|
||||
memcpy_fromio(adev->bios, bios, size);
|
||||
pci_unmap_rom(adev->pdev, bios);
|
||||
|
||||
if (!check_atom_bios(adev->bios, size)) {
|
||||
if (!check_atom_bios(adev, size)) {
|
||||
kfree(adev->bios);
|
||||
return false;
|
||||
}
|
||||
@ -186,7 +188,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
|
||||
/* read complete BIOS */
|
||||
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
|
||||
|
||||
if (!check_atom_bios(adev->bios, len)) {
|
||||
if (!check_atom_bios(adev, len)) {
|
||||
kfree(adev->bios);
|
||||
return false;
|
||||
}
|
||||
@ -216,7 +218,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
|
||||
memcpy_fromio(adev->bios, bios, romlen);
|
||||
iounmap(bios);
|
||||
|
||||
if (!check_atom_bios(adev->bios, romlen))
|
||||
if (!check_atom_bios(adev, romlen))
|
||||
goto free_bios;
|
||||
|
||||
adev->bios_size = romlen;
|
||||
@ -324,7 +326,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!check_atom_bios(adev->bios, size)) {
|
||||
if (!check_atom_bios(adev, size)) {
|
||||
kfree(adev->bios);
|
||||
return false;
|
||||
}
|
||||
@ -389,7 +391,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
|
||||
vhdr->ImageLength,
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
|
||||
if (!check_atom_bios(adev, vhdr->ImageLength)) {
|
||||
kfree(adev->bios);
|
||||
return false;
|
||||
}
|
||||
|
@ -414,7 +414,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name);
|
||||
err = amdgpu_ucode_request(adev, &adev->pm.fw,
|
||||
AMDGPU_UCODE_REQUIRED,
|
||||
"%s", fw_name);
|
||||
if (err) {
|
||||
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
|
||||
amdgpu_ucode_release(&adev->pm.fw);
|
||||
|
@ -1105,7 +1105,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
||||
* We can't use gang submit on with reserved VMIDs when the VM changes
|
||||
* can't be invalidated by more than one engine at the same time.
|
||||
*/
|
||||
if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) {
|
||||
if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
|
||||
for (i = 0; i < p->gang_size; ++i) {
|
||||
struct drm_sched_entity *entity = p->entities[i];
|
||||
struct drm_gpu_scheduler *sched = entity->rq->sched;
|
||||
@ -1189,7 +1189,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
||||
if (!bo)
|
||||
continue;
|
||||
|
||||
amdgpu_vm_bo_invalidate(adev, bo, false);
|
||||
amdgpu_vm_bo_invalidate(bo, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2095,6 +2095,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
|
||||
if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
|
||||
amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
|
||||
|
||||
amdgpu_debugfs_vcn_sched_mask_init(adev);
|
||||
amdgpu_debugfs_jpeg_sched_mask_init(adev);
|
||||
amdgpu_debugfs_gfx_sched_mask_init(adev);
|
||||
amdgpu_debugfs_compute_sched_mask_init(adev);
|
||||
|
@ -199,14 +199,16 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
|
||||
static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
|
||||
void *data);
|
||||
|
||||
/**
|
||||
* DOC: pcie_replay_count
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the total number
|
||||
* of PCIe replays (NAKs)
|
||||
* of PCIe replays (NAKs).
|
||||
* The file pcie_replay_count is used for this and returns the total
|
||||
* number of replays as a sum of the NAKs generated and NAKs received
|
||||
* number of replays as a sum of the NAKs generated and NAKs received.
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
|
||||
@ -432,8 +434,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
|
||||
* @dev: drm_device pointer
|
||||
*
|
||||
* Return:
|
||||
* 1 if the device supporte BACO;
|
||||
* 3 if the device support MACO (only works if BACO is supported)
|
||||
* 1 if the device supports BACO;
|
||||
* 3 if the device supports MACO (only works if BACO is supported)
|
||||
* otherwise return 0.
|
||||
*/
|
||||
int amdgpu_device_supports_baco(struct drm_device *dev)
|
||||
@ -580,7 +582,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_aper_access - access vram by vram aperature
|
||||
* amdgpu_device_aper_access - access vram by vram aperture
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @pos: offset of the buffer in vram
|
||||
@ -671,7 +673,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
|
||||
* here is that the GPU reset is not running on another thread in parallel.
|
||||
*
|
||||
* For this we trylock the read side of the reset semaphore, if that succeeds
|
||||
* we know that the reset is not running in paralell.
|
||||
* we know that the reset is not running in parallel.
|
||||
*
|
||||
* If the trylock fails we assert that we are either already holding the read
|
||||
* side of the lock or are the reset thread itself and hold the write side of
|
||||
@ -1402,6 +1404,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
|
||||
|
||||
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
|
||||
amdgpu_psp_wait_for_bootloader(adev);
|
||||
ret = amdgpu_atomfirmware_asic_init(adev, true);
|
||||
@ -1736,7 +1739,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
|
||||
uint32_t fw_ver;
|
||||
|
||||
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
|
||||
/* force vPost if error occured */
|
||||
/* force vPost if error occurred */
|
||||
if (err)
|
||||
return true;
|
||||
|
||||
@ -2168,7 +2171,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
|
||||
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
|
||||
continue;
|
||||
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
|
||||
(void *)adev, state);
|
||||
&adev->ip_blocks[i], state);
|
||||
if (r)
|
||||
DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
@ -2202,7 +2205,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
|
||||
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
|
||||
continue;
|
||||
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
|
||||
(void *)adev, state);
|
||||
&adev->ip_blocks[i], state);
|
||||
if (r)
|
||||
DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
@ -2381,7 +2384,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
|
||||
* the module parameter virtual_display. This feature provides a virtual
|
||||
* display hardware on headless boards or in virtualized environments.
|
||||
* This function parses and validates the configuration string specified by
|
||||
* the user and configues the virtual display configuration (number of
|
||||
* the user and configures the virtual display configuration (number of
|
||||
* virtual connectors, crtcs, etc.) specified.
|
||||
*/
|
||||
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
|
||||
@ -2444,7 +2447,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Parses the asic configuration parameters specified in the gpu info
|
||||
* firmware and makes them availale to the driver for use in configuring
|
||||
* firmware and makes them available to the driver for use in configuring
|
||||
* the asic.
|
||||
* Returns 0 on success, -EINVAL on failure.
|
||||
*/
|
||||
@ -2485,6 +2488,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
|
||||
AMDGPU_UCODE_OPTIONAL,
|
||||
"amdgpu/%s_gpu_info.bin", chip_name);
|
||||
if (err) {
|
||||
dev_err(adev->dev,
|
||||
@ -2504,7 +2508,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
|
||||
/*
|
||||
* Should be droped when DAL no longer needs it.
|
||||
* Should be dropped when DAL no longer needs it.
|
||||
*/
|
||||
if (adev->asic_type == CHIP_NAVI12)
|
||||
goto parse_soc_bounding_box;
|
||||
@ -3064,7 +3068,7 @@ init_failed:
|
||||
*
|
||||
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
|
||||
* this function before a GPU reset. If the value is retained after a
|
||||
* GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
|
||||
* GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
|
||||
*/
|
||||
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
|
||||
{
|
||||
@ -3140,7 +3144,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
|
||||
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
|
||||
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
|
||||
/* enable clockgating to save power */
|
||||
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
|
||||
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
|
||||
state);
|
||||
if (r) {
|
||||
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
|
||||
@ -3177,7 +3181,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
|
||||
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
|
||||
adev->ip_blocks[i].version->funcs->set_powergating_state) {
|
||||
/* enable powergating to save power */
|
||||
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
|
||||
r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
|
||||
state);
|
||||
if (r) {
|
||||
DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
|
||||
@ -3379,7 +3383,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
|
||||
|
||||
amdgpu_amdkfd_suspend(adev, false);
|
||||
|
||||
/* Workaroud for ASICs need to disable SMC first */
|
||||
/* Workaround for ASICs need to disable SMC first */
|
||||
amdgpu_device_smu_fini_early(adev);
|
||||
|
||||
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
|
||||
@ -3481,7 +3485,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
|
||||
WARN_ON_ONCE(adev->gfx.gfx_off_state);
|
||||
WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
|
||||
|
||||
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
|
||||
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
|
||||
adev->gfx.gfx_off_state = true;
|
||||
}
|
||||
|
||||
@ -4309,7 +4313,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
|
||||
/*
|
||||
* Reset domain needs to be present early, before XGMI hive discovered
|
||||
* (if any) and intitialized to use reset sem and in_gpu reset flag
|
||||
* (if any) and initialized to use reset sem and in_gpu reset flag
|
||||
* early on during init and before calling to RREG32.
|
||||
*/
|
||||
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
|
||||
@ -4599,6 +4603,11 @@ fence_driver_init:
|
||||
|
||||
amdgpu_device_check_iommu_direct_map(adev);
|
||||
|
||||
adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
|
||||
r = register_pm_notifier(&adev->pm_nb);
|
||||
if (r)
|
||||
goto failed;
|
||||
|
||||
return 0;
|
||||
|
||||
release_ras_con:
|
||||
@ -4663,6 +4672,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
|
||||
drain_workqueue(adev->mman.bdev.wq);
|
||||
adev->shutdown = true;
|
||||
|
||||
unregister_pm_notifier(&adev->pm_nb);
|
||||
|
||||
/* make sure IB test finished before entering exclusive mode
|
||||
* to avoid preemption on IB test
|
||||
*/
|
||||
@ -4781,8 +4792,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* No need to evict vram on APUs for suspend to ram or s2idle */
|
||||
if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
|
||||
/* No need to evict vram on APUs unless going to S4 */
|
||||
if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
|
||||
return 0;
|
||||
|
||||
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
|
||||
@ -4794,6 +4805,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
|
||||
/*
|
||||
* Suspend & resume.
|
||||
*/
|
||||
/**
|
||||
* amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
|
||||
* @nb: notifier block
|
||||
* @mode: suspend mode
|
||||
* @data: data
|
||||
*
|
||||
* This function is called when the system is about to suspend or hibernate.
|
||||
* It is used to evict resources from the device before the system goes to
|
||||
* sleep while there is still access to swap.
|
||||
*/
|
||||
static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
|
||||
void *data)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
|
||||
int r;
|
||||
|
||||
switch (mode) {
|
||||
case PM_HIBERNATION_PREPARE:
|
||||
adev->in_s4 = true;
|
||||
fallthrough;
|
||||
case PM_SUSPEND_PREPARE:
|
||||
r = amdgpu_device_evict_resources(adev);
|
||||
/*
|
||||
* This is considered non-fatal at this time because
|
||||
* amdgpu_device_prepare() will also fatally evict resources.
|
||||
* See https://gitlab.freedesktop.org/drm/amd/-/issues/3781
|
||||
*/
|
||||
if (r)
|
||||
drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r);
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_prepare - prepare for device suspend
|
||||
*
|
||||
@ -4833,7 +4879,7 @@ int amdgpu_device_prepare(struct drm_device *dev)
|
||||
return 0;
|
||||
|
||||
unprepare:
|
||||
adev->in_s0ix = adev->in_s3 = false;
|
||||
adev->in_s0ix = adev->in_s3 = adev->in_s4 = false;
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -5184,7 +5230,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_ras_set_fed(adev, false);
|
||||
amdgpu_ras_clear_err_state(adev);
|
||||
amdgpu_irq_gpu_reset_resume_helper(adev);
|
||||
|
||||
/* some sw clean up VF needs to do before recover */
|
||||
@ -5241,16 +5287,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_has_job_running - check if there is any job in mirror list
|
||||
* amdgpu_device_has_job_running - check if there is any unfinished job
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* check if there is any job in mirror list
|
||||
* check if there is any job running on the device when guest driver receives
|
||||
* FLR notification from host driver. If there are still jobs running, then
|
||||
* the guest driver will not respond the FLR reset. Instead, let the job hit
|
||||
* the timeout and guest driver then issue the reset request.
|
||||
*/
|
||||
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
struct drm_sched_job *job;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
@ -5258,11 +5306,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
spin_lock(&ring->sched.job_list_lock);
|
||||
job = list_first_entry_or_null(&ring->sched.pending_list,
|
||||
struct drm_sched_job, list);
|
||||
spin_unlock(&ring->sched.job_list_lock);
|
||||
if (job)
|
||||
if (amdgpu_fence_count_emitted(ring))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -5487,7 +5531,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
|
||||
amdgpu_set_init_level(tmp_adev, init_level);
|
||||
if (full_reset) {
|
||||
/* post card */
|
||||
amdgpu_ras_set_fed(tmp_adev, false);
|
||||
amdgpu_ras_clear_err_state(tmp_adev);
|
||||
r = amdgpu_device_asic_init(tmp_adev);
|
||||
if (r) {
|
||||
dev_warn(tmp_adev->dev, "asic atom init failed!");
|
||||
@ -5820,6 +5864,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
bool audio_suspended = false;
|
||||
int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
|
||||
|
||||
/*
|
||||
* If it reaches here because of hang/timeout and a RAS error is
|
||||
* detected at the same time, let RAS recovery take care of it.
|
||||
*/
|
||||
if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
|
||||
!amdgpu_sriov_vf(adev) &&
|
||||
reset_context->src != AMDGPU_RESET_SRC_RAS) {
|
||||
dev_dbg(adev->dev,
|
||||
"Gpu recovery from source: %d yielding to RAS error recovery handling",
|
||||
reset_context->src);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Special case: RAS triggered and full reset isn't supported
|
||||
*/
|
||||
@ -5903,7 +5959,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
|
||||
|
||||
/*
|
||||
* Mark these ASICs to be reseted as untracked first
|
||||
* Mark these ASICs to be reset as untracked first
|
||||
* And add them back after reset completed
|
||||
*/
|
||||
amdgpu_unregister_gpu_instance(tmp_adev);
|
||||
@ -6106,7 +6162,7 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Fetchs and stores in the driver the PCIE capabilities (gen speed
|
||||
* Fetches and stores in the driver the PCIE capabilities (gen speed
|
||||
* and lanes) of the slot the device is in. Handles APUs and
|
||||
* virtualized environments where PCIE config space may not be available.
|
||||
*/
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
* Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -104,7 +104,9 @@
|
||||
#include "smuio_v13_0_6.h"
|
||||
#include "smuio_v14_0_2.h"
|
||||
#include "vcn_v5_0_0.h"
|
||||
#include "vcn_v5_0_1.h"
|
||||
#include "jpeg_v5_0_0.h"
|
||||
#include "jpeg_v5_0_1.h"
|
||||
|
||||
#include "amdgpu_vpe.h"
|
||||
#if defined(CONFIG_DRM_AMD_ISP)
|
||||
@ -1340,7 +1342,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
||||
*/
|
||||
if (adev->vcn.num_vcn_inst <
|
||||
AMDGPU_MAX_VCN_INSTANCES) {
|
||||
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
|
||||
adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
|
||||
ip->revision & 0xc0;
|
||||
adev->vcn.num_vcn_inst++;
|
||||
adev->vcn.inst_mask |=
|
||||
@ -1705,7 +1707,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
|
||||
* so this won't overflow.
|
||||
*/
|
||||
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
|
||||
adev->vcn.vcn_codec_disable_mask[v] =
|
||||
adev->vcn.inst[v].vcn_codec_disable_mask =
|
||||
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
|
||||
}
|
||||
break;
|
||||
@ -1836,6 +1838,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
|
||||
case IP_VERSION(9, 4, 2):
|
||||
case IP_VERSION(9, 4, 3):
|
||||
case IP_VERSION(9, 4, 4):
|
||||
case IP_VERSION(9, 5, 0):
|
||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||
break;
|
||||
case IP_VERSION(10, 1, 10):
|
||||
@ -1890,6 +1893,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
|
||||
case IP_VERSION(9, 4, 2):
|
||||
case IP_VERSION(9, 4, 3):
|
||||
case IP_VERSION(9, 4, 4):
|
||||
case IP_VERSION(9, 5, 0):
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(10, 1, 10):
|
||||
@ -2013,6 +2017,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
case IP_VERSION(13, 0, 11):
|
||||
case IP_VERSION(13, 0, 12):
|
||||
case IP_VERSION(13, 0, 14):
|
||||
case IP_VERSION(14, 0, 0):
|
||||
case IP_VERSION(14, 0, 1):
|
||||
@ -2184,6 +2189,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
|
||||
break;
|
||||
case IP_VERSION(9, 4, 3):
|
||||
case IP_VERSION(9, 4, 4):
|
||||
case IP_VERSION(9, 5, 0):
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
|
||||
break;
|
||||
case IP_VERSION(10, 1, 10):
|
||||
@ -2238,6 +2244,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
|
||||
break;
|
||||
case IP_VERSION(4, 4, 2):
|
||||
case IP_VERSION(4, 4, 5):
|
||||
case IP_VERSION(4, 4, 4):
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
|
||||
break;
|
||||
case IP_VERSION(5, 0, 0):
|
||||
@ -2361,6 +2368,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(5, 0, 1):
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
|
||||
@ -2405,6 +2416,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(9, 4, 3):
|
||||
case IP_VERSION(9, 4, 4):
|
||||
case IP_VERSION(9, 5, 0):
|
||||
aqua_vanjaram_init_soc_config(adev);
|
||||
break;
|
||||
default:
|
||||
@ -2652,6 +2664,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
||||
case IP_VERSION(9, 4, 2):
|
||||
case IP_VERSION(9, 4, 3):
|
||||
case IP_VERSION(9, 4, 4):
|
||||
case IP_VERSION(9, 5, 0):
|
||||
adev->family = AMDGPU_FAMILY_AI;
|
||||
break;
|
||||
case IP_VERSION(9, 1, 0):
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "soc15_common.h"
|
||||
#include "gc/gc_11_0_0_offset.h"
|
||||
#include "gc/gc_11_0_0_sh_mask.h"
|
||||
#include "bif/bif_4_1_d.h"
|
||||
#include <asm/div64.h>
|
||||
|
||||
#include <linux/pci.h>
|
||||
@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
|
||||
* they are called from the panic handler, and protected by the drm_panic spinlock.
|
||||
*/
|
||||
static struct amdgpu_bo *panic_abo;
|
||||
|
||||
/* Use the indirect MMIO to write each pixel to the GPU VRAM,
|
||||
* This is a simplified version of amdgpu_device_mm_access()
|
||||
*/
|
||||
static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
|
||||
unsigned int x,
|
||||
unsigned int y,
|
||||
u32 color)
|
||||
{
|
||||
struct amdgpu_res_cursor cursor;
|
||||
unsigned long offset;
|
||||
struct amdgpu_bo *abo = panic_abo;
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
|
||||
uint32_t tmp;
|
||||
|
||||
offset = x * 4 + y * sb->pitch[0];
|
||||
amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
|
||||
|
||||
tmp = cursor.start >> 31;
|
||||
WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
|
||||
if (tmp != 0xffffffff)
|
||||
WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
|
||||
WREG32_NO_KIQ(mmMM_DATA, color);
|
||||
}
|
||||
|
||||
int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
|
||||
struct drm_scanout_buffer *sb)
|
||||
{
|
||||
struct amdgpu_bo *abo;
|
||||
struct drm_framebuffer *fb = plane->state->fb;
|
||||
|
||||
if (!fb)
|
||||
return -EINVAL;
|
||||
|
||||
DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
|
||||
|
||||
abo = gem_to_amdgpu_bo(fb->obj[0]);
|
||||
if (!abo)
|
||||
return -EINVAL;
|
||||
|
||||
sb->width = fb->width;
|
||||
sb->height = fb->height;
|
||||
/* Use the generic linear format, because tiling will be disabled in panic_flush() */
|
||||
sb->format = drm_format_info(fb->format->format);
|
||||
if (!sb->format)
|
||||
return -EINVAL;
|
||||
|
||||
sb->pitch[0] = fb->pitches[0];
|
||||
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
|
||||
if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
|
||||
drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Only handle 32bits format, to simplify mmio access */
|
||||
if (fb->format->cpp[0] != 4) {
|
||||
drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
sb->set_pixel = amdgpu_display_set_pixel;
|
||||
panic_abo = abo;
|
||||
return 0;
|
||||
}
|
||||
if (!abo->kmap.virtual &&
|
||||
ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
|
||||
drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
|
||||
iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
|
||||
else
|
||||
iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,6 +23,8 @@
|
||||
#ifndef __AMDGPU_DISPLAY_H__
|
||||
#define __AMDGPU_DISPLAY_H__
|
||||
|
||||
#include <drm/drm_panic.h>
|
||||
|
||||
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
|
||||
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
|
||||
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
|
||||
@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier);
|
||||
int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
|
||||
int amdgpu_display_resume_helper(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
|
||||
struct drm_scanout_buffer *sb);
|
||||
|
||||
#endif
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "amdgpu_gem.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <drm/ttm/ttm_tt.h>
|
||||
#include <linux/dma-buf.h>
|
||||
@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
|
||||
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
|
||||
attach->peer2peer = false;
|
||||
|
||||
amdgpu_vm_bo_update_shared(bo);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -345,7 +348,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
|
||||
/* FIXME: This should be after the "if", but needs a fix to make sure
|
||||
* DMABuf imports are initialized in the right VM list.
|
||||
*/
|
||||
amdgpu_vm_bo_invalidate(adev, bo, false);
|
||||
amdgpu_vm_bo_invalidate(bo, false);
|
||||
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
|
||||
return;
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
*/
|
||||
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <drm/drm_client_setup.h>
|
||||
#include <drm/clients/drm_client_setup.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#include <drm/drm_gem.h>
|
||||
@ -2552,7 +2552,6 @@ static int amdgpu_pmops_freeze(struct device *dev)
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
int r;
|
||||
|
||||
adev->in_s4 = true;
|
||||
r = amdgpu_device_suspend(drm_dev, true);
|
||||
adev->in_s4 = false;
|
||||
if (r)
|
||||
@ -2916,7 +2915,6 @@ static const struct drm_driver amdgpu_kms_driver = {
|
||||
|
||||
.name = DRIVER_NAME,
|
||||
.desc = DRIVER_DESC,
|
||||
.date = DRIVER_DATE,
|
||||
.major = KMS_DRIVER_MAJOR,
|
||||
.minor = KMS_DRIVER_MINOR,
|
||||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
@ -2940,7 +2938,6 @@ const struct drm_driver amdgpu_partition_driver = {
|
||||
|
||||
.name = DRIVER_NAME,
|
||||
.desc = DRIVER_DESC,
|
||||
.date = DRIVER_DATE,
|
||||
.major = KMS_DRIVER_MAJOR,
|
||||
.minor = KMS_DRIVER_MINOR,
|
||||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
|
@ -40,7 +40,6 @@
|
||||
|
||||
#define DRIVER_NAME "amdgpu"
|
||||
#define DRIVER_DESC "AMD GPU"
|
||||
#define DRIVER_DATE "20150101"
|
||||
|
||||
extern const struct drm_driver amdgpu_partition_driver;
|
||||
|
||||
|
@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
|
||||
struct amdgpu_fpriv *fpriv = file->driver_priv;
|
||||
struct amdgpu_vm *vm = &fpriv->vm;
|
||||
|
||||
struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { };
|
||||
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
|
||||
ktime_t usage[AMDGPU_HW_IP_NUM];
|
||||
const char *pl_name[] = {
|
||||
[TTM_PL_VRAM] = "vram",
|
||||
@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
|
||||
[AMDGPU_PL_DOORBELL] = "doorbell",
|
||||
};
|
||||
unsigned int hw_ip, i;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_bo_reserve(vm->root.bo, false);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats));
|
||||
amdgpu_bo_unreserve(vm->root.bo);
|
||||
|
||||
amdgpu_vm_get_memory(vm, stats);
|
||||
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
|
||||
|
||||
/*
|
||||
@ -114,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
|
||||
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
|
||||
stats[TTM_PL_VRAM].evicted/1024UL);
|
||||
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
|
||||
stats[TTM_PL_VRAM].requested/1024UL);
|
||||
(stats[TTM_PL_VRAM].drm.shared +
|
||||
stats[TTM_PL_VRAM].drm.private) / 1024UL);
|
||||
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
|
||||
stats[TTM_PL_TT].requested/1024UL);
|
||||
(stats[TTM_PL_TT].drm.shared +
|
||||
stats[TTM_PL_TT].drm.private) / 1024UL);
|
||||
|
||||
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
|
||||
if (!usage[hw_ip])
|
||||
|
@ -384,7 +384,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
|
||||
|
||||
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
|
||||
if (!adev->fru_info)
|
||||
return;
|
||||
|
||||
sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
|
||||
|
@ -32,7 +32,7 @@ struct amdgpu_fru_info {
|
||||
char product_name[AMDGPU_PRODUCT_NAME_LEN];
|
||||
char serial[20];
|
||||
char manufacturer_name[32];
|
||||
char fru_id[32];
|
||||
char fru_id[50];
|
||||
};
|
||||
|
||||
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include "amdgpu_hmm.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
@ -87,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
|
||||
{
|
||||
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
|
||||
|
||||
if (aobj) {
|
||||
amdgpu_hmm_unregister(aobj);
|
||||
ttm_bo_put(&aobj->tbo);
|
||||
}
|
||||
amdgpu_hmm_unregister(aobj);
|
||||
ttm_bo_put(&aobj->tbo);
|
||||
}
|
||||
|
||||
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
||||
@ -179,6 +178,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_vm_bo_update_shared(abo);
|
||||
bo_va = amdgpu_vm_bo_find(vm, abo);
|
||||
if (!bo_va)
|
||||
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
|
||||
@ -252,6 +252,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||
goto out_unlock;
|
||||
|
||||
amdgpu_vm_bo_del(adev, bo_va);
|
||||
amdgpu_vm_bo_update_shared(bo);
|
||||
if (!amdgpu_vm_ready(vm))
|
||||
goto out_unlock;
|
||||
|
||||
@ -839,7 +840,6 @@ error:
|
||||
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
struct drm_amdgpu_gem_op *args = data;
|
||||
struct drm_gem_object *gobj;
|
||||
struct amdgpu_vm_bo_base *base;
|
||||
@ -899,7 +899,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
|
||||
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||
amdgpu_vm_bo_invalidate(adev, robj, true);
|
||||
amdgpu_vm_bo_invalidate(robj, true);
|
||||
|
||||
amdgpu_bo_unreserve(robj);
|
||||
break;
|
||||
|
@ -515,7 +515,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev))
|
||||
if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
|
||||
return 0;
|
||||
|
||||
spin_lock(&kiq->ring_lock);
|
||||
@ -567,7 +567,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
|
||||
if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
|
||||
return 0;
|
||||
|
||||
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
|
||||
@ -806,7 +806,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
|
||||
/* If going to s2idle, no need to wait */
|
||||
if (adev->in_s0ix) {
|
||||
if (!amdgpu_dpm_set_powergating_by_smu(adev,
|
||||
AMD_IP_BLOCK_TYPE_GFX, true))
|
||||
AMD_IP_BLOCK_TYPE_GFX, true, 0))
|
||||
adev->gfx.gfx_off_state = true;
|
||||
} else {
|
||||
schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
|
||||
@ -818,7 +818,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
|
||||
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
|
||||
|
||||
if (adev->gfx.gfx_off_state &&
|
||||
!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
|
||||
!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
|
||||
adev->gfx.gfx_off_state = false;
|
||||
|
||||
if (adev->gfx.funcs->init_spm_golden) {
|
||||
@ -1484,6 +1484,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
|
||||
* @dev: The device structure
|
||||
* @attr: The device attribute structure
|
||||
* @buf: The buffer containing the input data
|
||||
* @count: The size of the input data
|
||||
*
|
||||
* Provides the sysfs interface to manually run a cleaner shader, which is
|
||||
* used to clear the GPU state between different tasks. Writing a value to the
|
||||
* 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
|
||||
* The value written corresponds to the partition index on multi-partition
|
||||
* devices. On single-partition devices, the value should be '0'.
|
||||
*
|
||||
* The cleaner shader clears the Local Data Store (LDS) and General Purpose
|
||||
* Registers (GPRs) to ensure data isolation between GPU workloads.
|
||||
*
|
||||
* Return: The number of bytes written to the sysfs file.
|
||||
*/
|
||||
static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
@ -1532,6 +1550,19 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
|
||||
* @dev: The device structure
|
||||
* @attr: The device attribute structure
|
||||
* @buf: The buffer to store the output data
|
||||
*
|
||||
* Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
|
||||
* feature for each GPU partition. Reading from the 'enforce_isolation'
|
||||
* sysfs file returns the isolation settings for all partitions, where '0'
|
||||
* indicates disabled and '1' indicates enabled.
|
||||
*
|
||||
* Return: The number of bytes read from the sysfs file.
|
||||
*/
|
||||
static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
@ -1555,6 +1586,20 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
|
||||
* @dev: The device structure
|
||||
* @attr: The device attribute structure
|
||||
* @buf: The buffer containing the input data
|
||||
* @count: The size of the input data
|
||||
*
|
||||
* This function allows control over the 'enforce_isolation' feature, which
|
||||
* serializes access to the graphics engine. Writing '1' or '0' to the
|
||||
* 'enforce_isolation' sysfs file enables or disables process isolation for
|
||||
* each partition. The input should specify the setting for all partitions.
|
||||
*
|
||||
* Return: The number of bytes written to the sysfs file.
|
||||
*/
|
||||
static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
@ -1940,6 +1985,17 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
|
||||
mutex_unlock(&adev->enforce_isolation_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
|
||||
* @adev: amdgpu_device pointer
|
||||
* @idx: Index of the GPU partition
|
||||
*
|
||||
* When kernel submissions come in, the jobs are given a time slice and once
|
||||
* that time slice is up, if there are KFD user queues active, kernel
|
||||
* submissions are blocked until KFD has had its time slice. Once the KFD time
|
||||
* slice is up, KFD user queues are preempted and kernel submissions are
|
||||
* unblocked and allowed to run again.
|
||||
*/
|
||||
static void
|
||||
amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
|
||||
u32 idx)
|
||||
@ -1985,6 +2041,15 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
|
||||
msleep(GFX_SLICE_PERIOD_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
|
||||
* @ring: Pointer to the amdgpu_ring structure
|
||||
*
|
||||
* Ring begin_use helper implementation for gfx which serializes access to the
|
||||
* gfx IP between kernel submission IOCTLs and KFD user queues when isolation
|
||||
* enforcement is enabled. The kernel submission IOCTLs and KFD user queues
|
||||
* each get a time slice when both are active.
|
||||
*/
|
||||
void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@ -2012,6 +2077,15 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
|
||||
mutex_unlock(&adev->enforce_isolation_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
|
||||
* @ring: Pointer to the amdgpu_ring structure
|
||||
*
|
||||
* Ring end_use helper implementation for gfx which serializes access to the
|
||||
* gfx IP between kernel submission IOCTLs and KFD user queues when isolation
|
||||
* enforcement is enabled. The kernel submission IOCTLs and KFD user queues
|
||||
* each get a time slice when both are active.
|
||||
*/
|
||||
void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@ -2050,7 +2124,7 @@ static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
|
||||
if (!adev)
|
||||
return -ENODEV;
|
||||
|
||||
mask = (1 << adev->gfx.num_gfx_rings) - 1;
|
||||
mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
|
||||
if ((val & mask) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
@ -2078,7 +2152,7 @@ static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (ring->sched.ready)
|
||||
mask |= 1 << i;
|
||||
mask |= 1ULL << i;
|
||||
}
|
||||
|
||||
*val = mask;
|
||||
@ -2120,7 +2194,7 @@ static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
|
||||
if (!adev)
|
||||
return -ENODEV;
|
||||
|
||||
mask = (1 << adev->gfx.num_compute_rings) - 1;
|
||||
mask = (1ULL << adev->gfx.num_compute_rings) - 1;
|
||||
if ((val & mask) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
@ -2149,7 +2223,7 @@ static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->sched.ready)
|
||||
mask |= 1 << i;
|
||||
mask |= 1ULL << i;
|
||||
}
|
||||
|
||||
*val = mask;
|
||||
|
@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
/**
|
||||
* amdgpu_ib_free - free an IB (Indirect Buffer)
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ib: IB object to free
|
||||
* @f: the fence SA bo need wait on for the ib alloation
|
||||
*
|
||||
* Free an IB (all asics).
|
||||
*/
|
||||
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
|
||||
struct dma_fence *f)
|
||||
void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
|
||||
{
|
||||
amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
|
||||
amdgpu_sa_bo_free(&ib->sa_bo, f);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
|
||||
dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
|
||||
return dw1 | ((u64)(dw2 & 0xffff) << 32);
|
||||
}
|
||||
|
||||
const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
|
||||
ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
|
||||
}
|
||||
|
@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
|
||||
signed int offset);
|
||||
const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user