drm for 6.7-rc1

kernel:
 - add initial vmemdup-user-array
 
 core:
 - fix platform remove() to return void
 - drm_file owner updated to reflect owner
 - move size calcs to drm buddy allocator
 - let GPUVM build as a module
 - allow variable number of run-queues in scheduler
 
 edid:
 - handle bad h/v sync_end in EDIDs
 
 panfrost:
 - add Boris as maintainer
 
 fbdev:
 - use fb_ops helpers more
 - only allow logo use from fbcon
 - rename fb_pgproto to pgprot_framebuffer
 - add HPD state to drm_connector_oob_hotplug_event
 - convert to fbdev i/o mem helpers
 
 i915:
 - Enable meteorlake by default
 - Early Xe2 LPD/Lunarlake display enablement
 - Rework subplatforms into IP version checks
 - GuC based TLB invalidation for Meteorlake
 - Display rework for future Xe driver integration
 - LNL FBC features
 - LNL display feature capability reads
 - update recommended fw versions for DG2+
 - drop fastboot module parameter
 - added deviceid for Arrowlake-S
 - drop preproduction workarounds
 - don't disable preemption for resets
 - cleanup inlines in headers
 - PXP firmware loading fix
 - Fix sg list lengths
 - DSC PPS state readout/verification
 - Add more RPL P/U PCI IDs
 - Add new DG2-G12 stepping
 - DP enhanced framing support to state checker
 - Improve shared link bandwidth management
 - stop using GEM macros in display code
 - refactor related code into display code
 - locally enable W=1 warnings
 - remove PSR watchdog timers on LNL
 
 amdgpu:
 - RAS/FRU EEPROM updatse
 - IP discovery updatses
 - GC 11.5 support
 - DCN 3.5 support
 - VPE 6.1 support
 - NBIO 7.11 support
 - DML2 support
 - lots of IP updates
 - use flexible arrays for bo list handling
 - W=1 fixes
 - Enable seamless boot in more cases
 - Enable context type property for HDMI
 - Rework GPUVM TLB flushing
 - VCN IB start/size alignment fixes
 
 amdkfd:
 - GC 10/11 fixes
 - GC 11.5 support
 - use partial migration in GPU faults
 
 radeon:
 - W=1 Fixes
 - fix some possible buffer overflow/NULL derefs
 nouveau:
 - update uapi for NO_PREFETCH
 - scheduler/fence fixes
 - rework suspend/resume for GSP-RM
 - rework display in preparation for GSP-RM
 
 habanalabs:
 - uapi: expose tsc clock
 - uapi: block access to eventfd through control device
 - uapi: force dma-buf export to PAGE_SIZE alignments
 - complete move to accel subsystem
 - move firmware interface include files
 - perform hard reset on PCIe AXI drain event
 - optimise user interrupt handling
 
 msm:
 - DP: use existing helpers for DPCD
 - DPU: interrupts reworked
 - gpu: a7xx (a730/a740) support
 - decouple msm_drv from kms for headless devices
 
 mediatek:
 - MT8188 dsi/dp/edp support
 - DDP GAMMA - 12 bit LUT support
 - connector dynamic selection capability
 
 rockchip:
 - rv1126 mipi-dsi/vop support
 - add planar formats
 
 ast:
 - rename constants
 
 panels:
 - Mitsubishi AA084XE01
 - JDI LPM102A188A
 - LTK050H3148W-CTA6
 
 ivpu:
 - power management fixes
 
 qaic:
 - add detach slice bo api
 
 komeda:
 - add NV12 writeback
 
 tegra:
 - support NVSYNC/NHSYNC
 - host1x suspend fixes
 
 ili9882t:
 - separate into own driver
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmVAgzYACgkQDHTzWXnE
 hr7ZEQ//UXne3tyGOsU3X8r+lstLFDMa90a3hvTg6hX+Q0MjHd/clwkKFkLpkipL
 n7gIZlaHl11dRs0FzrIZA5EVAAgjMLKmIl10NBDFec6ZFA3VERcggx8y61uifI15
 VviMR1VbLHYZaCdyrQOK0A4wcktWnKXyoXp7cwy9crdc2GOBMUZkdIqtvD7jHxQx
 UMIFnzi1CyKUX/Fjt/JceYcNk9y2ZGkzakYO3sHcUdv4DPu9qX4kNzpjF691AZBP
 UeKWvCswTRVg2M0kuo/RYIBzqaTmOlk6dHLWBognIeZPyuyhCcaGC2d64c6tShwQ
 dtHdi+IgyQ8s2qb350ymKTQUP7xA/DfZBwH7LvrZALBxeQGYQN1CnsgDMOS2wcUc
 XrRFiS7PxEOtMMBctcPBnnoV5ttnsLLlPpzM9puh9sUFMn6CgLzcAMqXdqxzMajH
 +dz2aD1N0vMqq4varozOg9SC2QamgUiPN/TQfrulhCTCfQaXczy5x1OYiIz65+Sl
 mKoe2WASuP9Ve8do4N/wEwH5SZY2ItipBdUTRxttY9NTanmV0X5DjZBXH5b9XGci
 Zl5Ar613f9zwm5T5BVA5k6s3ZbGY6QcP5pDNTCPaSgitfFXIdReBZ2CaYzK3MPg/
 Wit/TXrud9yT6VPpI1igboMyasf5QubV1MY1K83kOCWr9u8R2CM=
 =l79u
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "Highlights:
   - AMD adds some more upcoming HW platforms
   - Intel made Meteorlake stable and started adding Lunarlake
   - nouveau has a bunch of display rework in prepartion for the NVIDIA
     GSP firmware support
   - msm adds a7xx support
   - habanalabs has finished migration to accel subsystem

  Detail summary:

  kernel:
   - add initial vmemdup-user-array

  core:
   - fix platform remove() to return void
   - drm_file owner updated to reflect owner
   - move size calcs to drm buddy allocator
   - let GPUVM build as a module
   - allow variable number of run-queues in scheduler

  edid:
   - handle bad h/v sync_end in EDIDs

  panfrost:
   - add Boris as maintainer

  fbdev:
   - use fb_ops helpers more
   - only allow logo use from fbcon
   - rename fb_pgproto to pgprot_framebuffer
   - add HPD state to drm_connector_oob_hotplug_event
   - convert to fbdev i/o mem helpers

  i915:
   - Enable meteorlake by default
   - Early Xe2 LPD/Lunarlake display enablement
   - Rework subplatforms into IP version checks
   - GuC based TLB invalidation for Meteorlake
   - Display rework for future Xe driver integration
   - LNL FBC features
   - LNL display feature capability reads
   - update recommended fw versions for DG2+
   - drop fastboot module parameter
   - added deviceid for Arrowlake-S
   - drop preproduction workarounds
   - don't disable preemption for resets
   - cleanup inlines in headers
   - PXP firmware loading fix
   - Fix sg list lengths
   - DSC PPS state readout/verification
   - Add more RPL P/U PCI IDs
   - Add new DG2-G12 stepping
   - DP enhanced framing support to state checker
   - Improve shared link bandwidth management
   - stop using GEM macros in display code
   - refactor related code into display code
   - locally enable W=1 warnings
   - remove PSR watchdog timers on LNL

  amdgpu:
   - RAS/FRU EEPROM updatse
   - IP discovery updatses
   - GC 11.5 support
   - DCN 3.5 support
   - VPE 6.1 support
   - NBIO 7.11 support
   - DML2 support
   - lots of IP updates
   - use flexible arrays for bo list handling
   - W=1 fixes
   - Enable seamless boot in more cases
   - Enable context type property for HDMI
   - Rework GPUVM TLB flushing
   - VCN IB start/size alignment fixes

  amdkfd:
   - GC 10/11 fixes
   - GC 11.5 support
   - use partial migration in GPU faults

  radeon:
   - W=1 Fixes
   - fix some possible buffer overflow/NULL derefs

  nouveau:
   - update uapi for NO_PREFETCH
   - scheduler/fence fixes
   - rework suspend/resume for GSP-RM
   - rework display in preparation for GSP-RM

  habanalabs:
   - uapi: expose tsc clock
   - uapi: block access to eventfd through control device
   - uapi: force dma-buf export to PAGE_SIZE alignments
   - complete move to accel subsystem
   - move firmware interface include files
   - perform hard reset on PCIe AXI drain event
   - optimise user interrupt handling

  msm:
   - DP: use existing helpers for DPCD
   - DPU: interrupts reworked
   - gpu: a7xx (a730/a740) support
   - decouple msm_drv from kms for headless devices

  mediatek:
   - MT8188 dsi/dp/edp support
   - DDP GAMMA - 12 bit LUT support
   - connector dynamic selection capability

  rockchip:
   - rv1126 mipi-dsi/vop support
   - add planar formats

  ast:
   - rename constants

  panels:
   - Mitsubishi AA084XE01
   - JDI LPM102A188A
   - LTK050H3148W-CTA6

  ivpu:
   - power management fixes

  qaic:
   - add detach slice bo api

  komeda:
   - add NV12 writeback

  tegra:
   - support NVSYNC/NHSYNC
   - host1x suspend fixes

  ili9882t:
   - separate into own driver"

* tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm: (1803 commits)
  drm/amdgpu: Remove unused variables from amdgpu_show_fdinfo
  drm/amdgpu: Remove duplicate fdinfo fields
  drm/amd/amdgpu: avoid to disable gfxhub interrupt when driver is unloaded
  drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems
  drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table
  drm/amdgpu: Identify data parity error corrected in replay mode
  drm/amdgpu: Fix typo in IP discovery parsing
  drm/amd/display: fix S/G display enablement
  drm/amdxcp: fix amdxcp unloads incompletely
  drm/amd/amdgpu: fix the GPU power print error in pm info
  drm/amdgpu: Use pcie domain of xcc acpi objects
  drm/amd: check num of link levels when update pcie param
  drm/amdgpu: Add a read to GFX v9.4.3 ring test
  drm/amd/pm: call smu_cmn_get_smc_version in is_mode1_reset_supported.
  drm/amdgpu: get RAS poison status from DF v4_6_2
  drm/amdgpu: Use discovery table's subrevision
  drm/amd/display: 3.2.256
  drm/amd/display: add interface to query SubVP status
  drm/amd/display: Read before writing Backlight Mode Set Register
  drm/amd/display: Disable SYMCLK32_SE RCO on DCN314
  ...
This commit is contained in:
Linus Torvalds 2023-11-01 06:28:35 -10:00
commit 7d461b291e
1476 changed files with 295700 additions and 20118 deletions

View File

@ -1,4 +1,4 @@
What: /sys/kernel/debug/habanalabs/hl<n>/addr
What: /sys/kernel/debug/accel/<n>/addr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -8,34 +8,34 @@ Description: Sets the device address to be used for read or write through
only when the IOMMU is disabled.
The acceptable value is a string that starts with "0x"
What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
What: /sys/kernel/debug/accel/<n>/clk_gate
Date: May 2020
KernelVersion: 5.8
Contact: ogabbay@kernel.org
Description: This setting is now deprecated as clock gating is handled solely by the f/w
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
What: /sys/kernel/debug/accel/<n>/command_buffers
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently allocated
command buffers
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission
What: /sys/kernel/debug/accel/<n>/command_submission
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently active
command submissions
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
What: /sys/kernel/debug/accel/<n>/command_submission_jobs
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with detailed information about each JOB (CB) of
each active command submission
What: /sys/kernel/debug/habanalabs/hl<n>/data32
What: /sys/kernel/debug/accel/<n>/data32
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -50,7 +50,7 @@ Description: Allows the root user to read or write directly through the
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/data64
What: /sys/kernel/debug/accel/<n>/data64
Date: Jan 2020
KernelVersion: 5.6
Contact: ogabbay@kernel.org
@ -65,7 +65,7 @@ Description: Allows the root user to read or write 64 bit data directly
If the IOMMU is disabled, it also allows the root user to read
or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/data_dma
What: /sys/kernel/debug/accel/<n>/data_dma
Date: Apr 2021
KernelVersion: 5.13
Contact: ogabbay@kernel.org
@ -79,11 +79,11 @@ Description: Allows the root user to read from the device's internal
a very long time.
This interface doesn't support concurrency in the same device.
In GAUDI and GOYA, this action can cause undefined behavior
in case the it is done while the device is executing user
in case it is done while the device is executing user
workloads.
Only supported on GAUDI at this stage.
What: /sys/kernel/debug/habanalabs/hl<n>/device
What: /sys/kernel/debug/accel/<n>/device
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -91,14 +91,14 @@ Description: Enables the root user to set the device to specific state.
Valid values are "disable", "enable", "suspend", "resume".
User can read this property to see the valid values
What: /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
What: /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
Date: Oct 2022
KernelVersion: 6.2
Contact: ttayar@habana.ai
Description: The watchdog timeout value in seconds for a device release upon
certain error cases, after which the device is reset.
What: /sys/kernel/debug/habanalabs/hl<n>/dma_size
What: /sys/kernel/debug/accel/<n>/dma_size
Date: Apr 2021
KernelVersion: 5.13
Contact: ogabbay@kernel.org
@ -108,7 +108,7 @@ Description: Specify the size of the DMA transaction when using DMA to read
When the write is finished, the user can read the "data_dma"
blob
What: /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
What: /sys/kernel/debug/accel/<n>/dump_razwi_events
Date: Aug 2022
KernelVersion: 5.20
Contact: fkassabri@habana.ai
@ -117,7 +117,7 @@ Description: Dumps all razwi events to dmesg if exist.
the routine will clear the status register.
Usage: cat dump_razwi_events
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
What: /sys/kernel/debug/accel/<n>/dump_security_violations
Date: Jan 2021
KernelVersion: 5.12
Contact: ogabbay@kernel.org
@ -125,14 +125,14 @@ Description: Dumps all security violations to dmesg. This will also ack
all security violations meanings those violations will not be
dumped next time user calls this API
What: /sys/kernel/debug/habanalabs/hl<n>/engines
What: /sys/kernel/debug/accel/<n>/engines
Date: Jul 2019
KernelVersion: 5.3
Contact: ogabbay@kernel.org
Description: Displays the status registers values of the device engines and
their derived idle status
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
What: /sys/kernel/debug/accel/<n>/i2c_addr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -140,7 +140,7 @@ Description: Sets I2C device address for I2C transaction that is generated
by the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
What: /sys/kernel/debug/accel/<n>/i2c_bus
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -148,7 +148,7 @@ Description: Sets I2C bus address for I2C transaction that is generated by
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
What: /sys/kernel/debug/accel/<n>/i2c_data
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -157,7 +157,7 @@ Description: Triggers an I2C transaction that is generated by the device's
reading from the file generates a read transaction, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_len
What: /sys/kernel/debug/accel/<n>/i2c_len
Date: Dec 2021
KernelVersion: 5.17
Contact: obitton@habana.ai
@ -165,7 +165,7 @@ Description: Sets I2C length in bytes for I2C transaction that is generated b
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
What: /sys/kernel/debug/accel/<n>/i2c_reg
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -173,35 +173,35 @@ Description: Sets I2C register id for I2C transaction that is generated by
the device's CPU, Not available when device is loaded with secured
firmware
What: /sys/kernel/debug/habanalabs/hl<n>/led0
What: /sys/kernel/debug/accel/<n>/led0
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the first S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/habanalabs/hl<n>/led1
What: /sys/kernel/debug/accel/<n>/led1
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the second S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/habanalabs/hl<n>/led2
What: /sys/kernel/debug/accel/<n>/led2
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the state of the third S/W led on the device, Not available
when device is loaded with secured firmware
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
What: /sys/kernel/debug/accel/<n>/memory_scrub
Date: May 2022
KernelVersion: 5.19
Contact: dhirschfeld@habana.ai
Description: Allows the root user to scrub the dram memory. The scrubbing
value can be set using the debugfs file memory_scrub_val.
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub_val
What: /sys/kernel/debug/accel/<n>/memory_scrub_val
Date: May 2022
KernelVersion: 5.19
Contact: dhirschfeld@habana.ai
@ -209,7 +209,7 @@ Description: The value to which the dram will be set to when the user
scrubs the dram using 'memory_scrub' debugfs file and
the scrubbing value when using module param 'memory_scrub'
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
What: /sys/kernel/debug/accel/<n>/mmu
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -217,19 +217,19 @@ Description: Displays the hop values and physical address for a given ASID
and virtual address. The user should write the ASID and VA into
the file and then read the file to get the result.
e.g. to display info about VA 0x1000 for ASID 1 you need to do:
echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
What: /sys/kernel/debug/habanalabs/hl<n>/mmu_error
What: /sys/kernel/debug/accel/<n>/mmu_error
Date: Mar 2021
KernelVersion: 5.12
Contact: fkassabri@habana.ai
Description: Check and display page fault or access violation mmu errors for
all MMUs specified in mmu_cap_mask.
e.g. to display error info for MMU hw cap bit 9, you need to do:
echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
cat /sys/kernel/debug/habanalabs/hl0/mmu_error
echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
cat /sys/kernel/debug/accel/0/mmu_error
What: /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
What: /sys/kernel/debug/accel/<n>/monitor_dump
Date: Mar 2022
KernelVersion: 5.19
Contact: osharabi@habana.ai
@ -243,7 +243,7 @@ Description: Allows the root user to dump monitors status from the device's
This interface doesn't support concurrency in the same device.
Only supported on GAUDI.
What: /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
What: /sys/kernel/debug/accel/<n>/monitor_dump_trig
Date: Mar 2022
KernelVersion: 5.19
Contact: osharabi@habana.ai
@ -253,14 +253,14 @@ Description: Triggers dump of monitor data. The value to trigger the operatio
When the write is finished, the user can read the "monitor_dump"
blob
What: /sys/kernel/debug/habanalabs/hl<n>/set_power_state
What: /sys/kernel/debug/accel/<n>/set_power_state
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
for D3Hot
What: /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
What: /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
Date: Jun 2021
KernelVersion: 5.13
Contact: ynudelman@habana.ai
@ -268,7 +268,7 @@ Description: Sets the skip reset on timeout option for the device. Value of
"0" means device will be reset in case some CS has timed out,
otherwise it will not be reset.
What: /sys/kernel/debug/habanalabs/hl<n>/state_dump
What: /sys/kernel/debug/accel/<n>/state_dump
Date: Oct 2021
KernelVersion: 5.15
Contact: ynudelman@habana.ai
@ -279,7 +279,7 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
What: /sys/kernel/debug/accel/<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
Contact: ogabbay@kernel.org
@ -287,21 +287,21 @@ Description: Sets the stop-on_error option for the device engines. Value of
"0" is for disable, otherwise enable.
Relevant only for GOYA and GAUDI.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
What: /sys/kernel/debug/accel/<n>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.
What: /sys/kernel/debug/habanalabs/hl<n>/userptr
What: /sys/kernel/debug/accel/<n>/userptr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays a list with information about the currently user
Description: Displays a list with information about the current user
pointers (user virtual addresses) that are pinned and mapped
to DMA addresses
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
What: /sys/kernel/debug/accel/<n>/userptr_lookup
Date: Oct 2021
KernelVersion: 5.15
Contact: ogabbay@kernel.org
@ -309,7 +309,7 @@ Description: Allows to search for specific user pointers (user virtual
addresses) that are pinned and mapped to DMA addresses, and see
their resolution to the specific dma address.
What: /sys/kernel/debug/habanalabs/hl<n>/vm
What: /sys/kernel/debug/accel/<n>/vm
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org

View File

@ -1,4 +1,4 @@
What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
What: /sys/class/accel/accel<n>/device/armcp_kernel_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -6,7 +6,7 @@ Description: Version of the Linux kernel running on the device's CPU.
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_kernel_ver
What: /sys/class/habanalabs/hl<n>/armcp_ver
What: /sys/class/accel/accel<n>/device/armcp_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -14,7 +14,7 @@ Description: Version of the application running on the device's CPU
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_ver
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
What: /sys/class/accel/accel<n>/device/clk_max_freq_mhz
Date: Jun 2019
KernelVersion: 5.7
Contact: ogabbay@kernel.org
@ -24,58 +24,58 @@ Description: Allows the user to set the maximum clock frequency, in MHz.
frequency value of the device clock. This property is valid
only for the Gaudi ASIC family
What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
What: /sys/class/accel/accel<n>/device/clk_cur_freq_mhz
Date: Jun 2019
KernelVersion: 5.7
Contact: ogabbay@kernel.org
Description: Displays the current frequency, in MHz, of the device clock.
This property is valid only for the Gaudi ASIC family
What: /sys/class/habanalabs/hl<n>/cpld_ver
What: /sys/class/accel/accel<n>/device/cpld_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the Device's CPLD F/W
What: /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
What: /sys/class/accel/accel<n>/device/cpucp_kernel_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: ogabbay@kernel.org
Description: Version of the Linux kernel running on the device's CPU
What: /sys/class/habanalabs/hl<n>/cpucp_ver
What: /sys/class/accel/accel<n>/device/cpucp_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: ogabbay@kernel.org
Description: Version of the application running on the device's CPU
What: /sys/class/habanalabs/hl<n>/device_type
What: /sys/class/accel/accel<n>/device/device_type
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the code name of the device according to its type.
The supported values are: "GOYA"
What: /sys/class/habanalabs/hl<n>/eeprom
What: /sys/class/accel/accel<n>/device/eeprom
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: A binary file attribute that contains the contents of the
on-board EEPROM
What: /sys/class/habanalabs/hl<n>/fuse_ver
What: /sys/class/accel/accel<n>/device/fuse_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the device's version from the eFuse
What: /sys/class/habanalabs/hl<n>/fw_os_ver
What: /sys/class/accel/accel<n>/device/fw_os_ver
Date: Dec 2021
KernelVersion: 5.18
Contact: ogabbay@kernel.org
Description: Version of the firmware OS running on the device's CPU
What: /sys/class/habanalabs/hl<n>/hard_reset
What: /sys/class/accel/accel<n>/device/hard_reset
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -83,14 +83,14 @@ Description: Interface to trigger a hard-reset operation for the device.
Hard-reset will reset ALL internal components of the device
except for the PCI interface and the internal PLLs
What: /sys/class/habanalabs/hl<n>/hard_reset_cnt
What: /sys/class/accel/accel<n>/device/hard_reset_cnt
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays how many times the device have undergone a hard-reset
operation since the driver was loaded
What: /sys/class/habanalabs/hl<n>/high_pll
What: /sys/class/accel/accel<n>/device/high_pll
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -98,7 +98,7 @@ Description: Allows the user to set the maximum clock frequency for MME, TPC
and IC when the power management profile is set to "automatic".
This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/ic_clk
What: /sys/class/accel/accel<n>/device/ic_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -110,27 +110,27 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
frequency value of the IC. This property is valid only for the
Goya ASIC family
What: /sys/class/habanalabs/hl<n>/ic_clk_curr
What: /sys/class/accel/accel<n>/device/ic_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the current clock frequency, in Hz, of the Interconnect
fabric. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/infineon_ver
What: /sys/class/accel/accel<n>/device/infineon_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
What: /sys/class/habanalabs/hl<n>/max_power
What: /sys/class/accel/accel<n>/device/max_power
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Allows the user to set the maximum power consumption of the
device in milliwatts.
What: /sys/class/habanalabs/hl<n>/mme_clk
What: /sys/class/accel/accel<n>/device/mme_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -142,21 +142,21 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
frequency value of the MME. This property is valid only for the
Goya ASIC family
What: /sys/class/habanalabs/hl<n>/mme_clk_curr
What: /sys/class/accel/accel<n>/device/mme_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the current clock frequency, in Hz, of the MME compute
engine. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/pci_addr
What: /sys/class/accel/accel<n>/device/pci_addr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the PCI address of the device. This is needed so the
user would be able to open a device based on its PCI address
What: /sys/class/habanalabs/hl<n>/pm_mng_profile
What: /sys/class/accel/accel<n>/device/pm_mng_profile
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -170,19 +170,19 @@ Description: Power management profile. Values are "auto", "manual". In "auto"
ic_clk, mme_clk and tpc_clk. This property is valid only for
the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/preboot_btl_ver
What: /sys/class/accel/accel<n>/device/preboot_btl_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the device's preboot F/W code
What: /sys/class/habanalabs/hl<n>/security_enabled
What: /sys/class/accel/accel<n>/device/security_enabled
Date: Oct 2022
KernelVersion: 6.1
Contact: obitton@habana.ai
Description: Displays the device's security status
What: /sys/class/habanalabs/hl<n>/soft_reset
What: /sys/class/accel/accel<n>/device/soft_reset
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -190,14 +190,14 @@ Description: Interface to trigger a soft-reset operation for the device.
Soft-reset will reset only the compute and DMA engines of the
device
What: /sys/class/habanalabs/hl<n>/soft_reset_cnt
What: /sys/class/accel/accel<n>/device/soft_reset_cnt
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays how many times the device have undergone a soft-reset
operation since the driver was loaded
What: /sys/class/habanalabs/hl<n>/status
What: /sys/class/accel/accel<n>/device/status
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -215,13 +215,13 @@ Description: Status of the card:
a compute-reset which is executed after a device release
(relevant for Gaudi2 only).
What: /sys/class/habanalabs/hl<n>/thermal_ver
What: /sys/class/accel/accel<n>/device/thermal_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the Device's thermal daemon
What: /sys/class/habanalabs/hl<n>/tpc_clk
What: /sys/class/accel/accel<n>/device/tpc_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
@ -233,20 +233,20 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
frequency value of the TPC. This property is valid only for
Goya ASIC family
What: /sys/class/habanalabs/hl<n>/tpc_clk_curr
What: /sys/class/accel/accel<n>/device/tpc_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Displays the current clock frequency, in Hz, of the TPC compute
engines. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/uboot_ver
What: /sys/class/accel/accel<n>/device/uboot_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the u-boot running on the device's CPU
What: /sys/class/habanalabs/hl<n>/vrm_ver
What: /sys/class/accel/accel<n>/device/vrm_ver
Date: Jan 2022
KernelVersion: 5.17
Contact: ogabbay@kernel.org

View File

@ -123,6 +123,16 @@ DRM_IOCTL_QAIC_PART_DEV
AIC100 device and can be used for limiting a process to some subset of
resources.
DRM_IOCTL_QAIC_DETACH_SLICE_BO
This IOCTL allows userspace to remove the slicing information from a BO that
was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
is the inverse of DRM_IOCTL_QAIC_ATTACH_SLICE_BO. The BO must be idle for
DRM_IOCTL_QAIC_DETACH_SLICE_BO to be called. After a successful detach slice
operation the BO may have new slicing information attached with a new call
to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. After detach slice, the BO cannot be
executed until after a new attach slice operation. Combining attach slice
and detach slice calls allows userspace to use a BO with multiple workloads.
Userspace Client Isolation
==========================

View File

@ -17,6 +17,7 @@ properties:
- analogix,anx7808
- analogix,anx7812
- analogix,anx7814
- analogix,anx7816
- analogix,anx7818
reg:

View File

@ -0,0 +1,115 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/bridge/fsl,imx93-mipi-dsi.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Freescale i.MX93 specific extensions to Synopsys Designware MIPI DSI
maintainers:
- Liu Ying <victor.liu@nxp.com>
description: |
There is a Synopsys Designware MIPI DSI Host Controller and a Synopsys
Designware MIPI DPHY embedded in Freescale i.MX93 SoC. Some configurations
and extensions to them are controlled by i.MX93 media blk-ctrl.
allOf:
- $ref: snps,dw-mipi-dsi.yaml#
properties:
compatible:
const: fsl,imx93-mipi-dsi
clocks:
items:
- description: apb clock
- description: pixel clock
- description: PHY configuration clock
- description: PHY reference clock
clock-names:
items:
- const: pclk
- const: pix
- const: phy_cfg
- const: phy_ref
interrupts:
maxItems: 1
fsl,media-blk-ctrl:
$ref: /schemas/types.yaml#/definitions/phandle
description:
i.MX93 media blk-ctrl, as a syscon, controls pixel component bit map
configurations from LCDIF display controller to the MIPI DSI host
controller and MIPI DPHY PLL related configurations through PLL SoC
interface.
power-domains:
maxItems: 1
required:
- compatible
- interrupts
- fsl,media-blk-ctrl
- power-domains
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/imx93-clock.h>
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/fsl,imx93-power.h>
dsi@4ae10000 {
compatible = "fsl,imx93-mipi-dsi";
reg = <0x4ae10000 0x10000>;
interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX93_CLK_MIPI_DSI_GATE>,
<&clk IMX93_CLK_MEDIA_DISP_PIX>,
<&clk IMX93_CLK_MIPI_PHY_CFG>,
<&clk IMX93_CLK_24M>;
clock-names = "pclk", "pix", "phy_cfg", "phy_ref";
fsl,media-blk-ctrl = <&media_blk_ctrl>;
power-domains = <&media_blk_ctrl IMX93_MEDIABLK_PD_MIPI_DSI>;
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "raydium,rm67191";
reg = <0>;
reset-gpios = <&adp5585gpio 6 GPIO_ACTIVE_LOW>;
dsi-lanes = <4>;
video-mode = <2>;
port {
panel_in: endpoint {
remote-endpoint = <&dsi_out>;
};
};
};
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi_to_lcdif: endpoint {
remote-endpoint = <&lcdif_to_dsi>;
};
};
port@1 {
reg = <1>;
dsi_out: endpoint {
remote-endpoint = <&panel_in>;
};
};
};
};

View File

@ -0,0 +1,84 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/lvds-data-mapping.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: LVDS Data Mapping
maintainers:
- Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
- Thierry Reding <thierry.reding@gmail.com>
description: |
LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
incompatible data link layers have been used over time to transmit image data
to LVDS devices. This bindings supports devices compatible with the following
specifications.
[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
Semiconductor
[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
Electronics Standards Association (VESA)
Device compatible with those specifications have been marketed under the
FPD-Link and FlatLink brands.
properties:
data-mapping:
enum:
- jeida-18
- jeida-24
- vesa-24
description: |
The color signals mapping order.
LVDS data mappings are defined as follows.
- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
[VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
specifications. Data are transferred as follows on 4 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
Data are transferred as follows on 4 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
Control signals are mapped as follows.
CTL0: HSync
CTL1: VSync
CTL2: Data Enable
CTL3: 0
additionalProperties: true
...

View File

@ -6,83 +6,24 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: LVDS Display Common Properties
allOf:
- $ref: lvds-data-mapping.yaml#
maintainers:
- Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
- Thierry Reding <thierry.reding@gmail.com>
description: |+
LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
incompatible data link layers have been used over time to transmit image data
to LVDS devices. This bindings supports devices compatible with the following
specifications.
[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
Semiconductor
[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
Electronics Standards Association (VESA)
Device compatible with those specifications have been marketed under the
FPD-Link and FlatLink brands.
description:
This binding extends the data mapping defined in lvds-data-mapping.yaml.
It supports reversing the bit order on the formats defined there in order
to accomodate for even more specialized data formats, since a variety of
data formats and layouts is used to drive LVDS displays.
properties:
data-mapping:
enum:
- jeida-18
- jeida-24
- vesa-24
description: |
The color signals mapping order.
LVDS data mappings are defined as follows.
- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
[VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
specifications. Data are transferred as follows on 4 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
Data are transferred as follows on 4 LVDS lanes.
Slot 0 1 2 3 4 5 6
________________ _________________
Clock \_______________________/
______ ______ ______ ______ ______ ______ ______
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
Control signals are mapped as follows.
CTL0: HSync
CTL1: VSync
CTL2: Data Enable
CTL3: 0
data-mirror:
type: boolean
description:
If set, reverse the bit order described in the data mappings below on all
If set, reverse the bit order described in the data mappings on all
data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6.
additionalProperties: true

View File

@ -21,6 +21,8 @@ description: |
properties:
compatible:
enum:
- mediatek,mt8188-dp-tx
- mediatek,mt8188-edp-tx
- mediatek,mt8195-dp-tx
- mediatek,mt8195-edp-tx

View File

@ -30,6 +30,7 @@ properties:
- mediatek,mt8173-dsi
- mediatek,mt8183-dsi
- mediatek,mt8186-dsi
- mediatek,mt8188-dsi
- items:
- enum:
- mediatek,mt6795-dsi

View File

@ -114,6 +114,7 @@ properties:
port@1:
$ref: /schemas/graph.yaml#/$defs/port-base
unevaluatedProperties: false
description: Output endpoint of the controller
properties:
endpoint:

View File

@ -21,7 +21,7 @@ properties:
compatible:
oneOf:
- items:
- pattern: '^qcom,adreno-gmu-6[0-9][0-9]\.[0-9]$'
- pattern: '^qcom,adreno-gmu-[67][0-9][0-9]\.[0-9]$'
- const: qcom,adreno-gmu
- const: qcom,adreno-gmu-wrapper
@ -64,6 +64,10 @@ properties:
iommus:
maxItems: 1
qcom,qmp:
$ref: /schemas/types.yaml#/definitions/phandle
description: Reference to the AOSS side-channel message RAM
operating-points-v2: true
opp-table:
@ -213,6 +217,47 @@ allOf:
- const: axi
- const: memnoc
- if:
properties:
compatible:
contains:
enum:
- qcom,adreno-gmu-730.1
- qcom,adreno-gmu-740.1
then:
properties:
reg:
items:
- description: Core GMU registers
- description: Resource controller registers
- description: GMU PDC registers
reg-names:
items:
- const: gmu
- const: rscc
- const: gmu_pdc
clocks:
items:
- description: GPU AHB clock
- description: GMU clock
- description: GPU CX clock
- description: GPU AXI clock
- description: GPU MEMNOC clock
- description: GMU HUB clock
- description: GPUSS DEMET clock
clock-names:
items:
- const: ahb
- const: gmu
- const: cxo
- const: axi
- const: memnoc
- const: hub
- const: demet
required:
- qcom,qmp
- if:
properties:
compatible:

View File

@ -23,7 +23,7 @@ properties:
The driver is parsing the compat string for Adreno to
figure out the gpu-id and patch level.
items:
- pattern: '^qcom,adreno-[3-6][0-9][0-9]\.[0-9]$'
- pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]$'
- const: qcom,adreno
- description: |
The driver is parsing the compat string for Imageon to
@ -203,7 +203,7 @@ allOf:
properties:
compatible:
contains:
pattern: '^qcom,adreno-6[0-9][0-9]\.[0-9]$'
pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]$'
then: # Starting with A6xx, the clocks are usually defined in the GMU node
properties:

View File

@ -38,12 +38,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,msm8998-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -52,6 +56,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-10nm-8998

View File

@ -44,18 +44,24 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,qcm2290-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-ctrl-6g-qcm2290
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-14nm-2290

View File

@ -44,18 +44,24 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc7180-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc7180-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -64,6 +70,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-10nm

View File

@ -44,18 +44,24 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc7280-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc7280-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -64,12 +70,16 @@ patternProperties:
"^edp@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc7280-edp
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
enum:

View File

@ -34,12 +34,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc8280xp-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
enum:

View File

@ -42,18 +42,24 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sdm845-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sdm845-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -62,6 +68,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-10nm

View File

@ -32,12 +32,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6115-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
oneOf:
@ -50,6 +54,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-14nm-2290

View File

@ -43,12 +43,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6125-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -57,6 +61,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6125-dsi-phy-14nm

View File

@ -43,12 +43,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6350-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -57,6 +61,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-10nm

View File

@ -43,12 +43,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6375-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -57,6 +61,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm6375-dsi-phy-7nm

View File

@ -47,12 +47,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8150-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -61,6 +65,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-7nm

View File

@ -46,12 +46,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8250-dpu
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -60,6 +64,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-7nm

View File

@ -48,18 +48,24 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8350-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8350-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -68,6 +74,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8350-dsi-phy-5nm

View File

@ -38,12 +38,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8450-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -52,6 +56,8 @@ patternProperties:
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -60,6 +66,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8450-dsi-phy-5nm

View File

@ -38,12 +38,16 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8550-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -52,6 +56,8 @@ patternProperties:
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -60,6 +66,8 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8550-dsi-phy-4nm

View File

@ -0,0 +1,94 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/jdi,lpm102a188a.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: JDI LPM102A188A 2560x1800 10.2" DSI Panel
maintainers:
- Diogo Ivo <diogo.ivo@tecnico.ulisboa.pt>
description: |
This panel requires a dual-channel DSI host to operate. It supports two modes:
- left-right: each channel drives the left or right half of the screen
- even-odd: each channel drives the even or odd lines of the screen
Each of the DSI channels controls a separate DSI peripheral. The peripheral
driven by the first link (DSI-LINK1) is considered the primary peripheral
and controls the device. The 'link2' property contains a phandle to the
peripheral driven by the second link (DSI-LINK2).
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
const: jdi,lpm102a188a
reg: true
enable-gpios: true
reset-gpios: true
power-supply: true
backlight: true
ddi-supply:
description: The regulator that provides IOVCC (1.8V).
link2:
$ref: /schemas/types.yaml#/definitions/phandle
description: |
phandle to the DSI peripheral on the secondary link. Note that the
presence of this property marks the containing node as DSI-LINK1.
required:
- compatible
- reg
if:
required:
- link2
then:
required:
- power-supply
- ddi-supply
- enable-gpios
- reset-gpios
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/gpio/tegra-gpio.h>
dsia: dsi@54300000 {
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x54300000 0x0 0x00040000>;
link2: panel@0 {
compatible = "jdi,lpm102a188a";
reg = <0>;
};
};
dsib: dsi@54400000{
#address-cells = <1>;
#size-cells = <0>;
reg = <0x0 0x54400000 0x0 0x00040000>;
nvidia,ganged-mode = <&dsia>;
link1: panel@0 {
compatible = "jdi,lpm102a188a";
reg = <0>;
power-supply = <&pplcd_vdd>;
ddi-supply = <&pp1800_lcdio>;
enable-gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>;
reset-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
link2 = <&link2>;
backlight = <&backlight>;
};
};
...

View File

@ -17,6 +17,7 @@ properties:
enum:
- leadtek,ltk050h3146w
- leadtek,ltk050h3146w-a2
- leadtek,ltk050h3148w
reg: true
backlight: true
reset-gpios: true

View File

@ -7,9 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: NewVision NV3051D based LCD panel
description: |
The NewVision NV3051D is a driver chip used to drive DSI panels. For now,
this driver only supports the 640x480 panels found in the Anbernic RG353
based devices.
The NewVision NV3051D is a driver chip used to drive DSI panels.
maintainers:
- Chris Morgan <macromorgan@hotmail.com>
@ -21,6 +19,7 @@ properties:
compatible:
items:
- enum:
- anbernic,rg351v-panel
- anbernic,rg353p-panel
- anbernic,rg353v-panel
- const: newvision,nv3051d

View File

@ -21,9 +21,9 @@ description: |
allOf:
- $ref: panel-common.yaml#
- $ref: ../lvds-data-mapping.yaml#
properties:
compatible:
enum:
# compatible must be listed in alphabetical order, ordered by compatible.
@ -230,6 +230,8 @@ properties:
- logictechno,lttd800480070-l6wh-rt
# Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel
- mitsubishi,aa070mc01-ca1
# Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
- mitsubishi,aa084xe01
# Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
- multi-inno,mi0700s4t-6
# Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
@ -347,6 +349,17 @@ properties:
power-supply: true
no-hpd: true
hpd-gpios: true
data-mapping: true
if:
not:
properties:
compatible:
contains:
const: innolux,g101ice-l01
then:
properties:
data-mapping: false
additionalProperties: false
@ -366,3 +379,16 @@ examples:
};
};
};
- |
panel_lvds: panel-lvds {
compatible = "innolux,g101ice-l01";
power-supply = <&vcc_lcd_reg>;
data-mapping = "jeida-24";
port {
panel_in_lvds: endpoint {
remote-endpoint = <&ltdc_out_lvds>;
};
};
};

View File

@ -0,0 +1,73 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/raydium,rm692e5.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Raydium RM692E5 based DSI display panels
maintainers:
- Konrad Dybcio <konradybcio@kernel.org>
description:
The Raydium RM692E5 is a generic DSI Panel IC used to control
AMOLED panels.
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
items:
- const: fairphone,fp5-rm692e5-boe
- const: raydium,rm692e5
dvdd-supply:
description: Digital voltage rail
vci-supply:
description: Analog voltage rail
vddio-supply:
description: I/O voltage rail
reg: true
port: true
required:
- compatible
- reg
- reset-gpios
- dvdd-supply
- vci-supply
- vddio-supply
- port
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "fairphone,fp5-rm692e5-boe", "raydium,rm692e5";
reg = <0>;
reset-gpios = <&tlmm 44 GPIO_ACTIVE_LOW>;
dvdd-supply = <&vreg_oled_vci>;
vci-supply = <&vreg_l12c>;
vddio-supply = <&vreg_oled_dvdd>;
port {
panel_in_0: endpoint {
remote-endpoint = <&dsi0_out>;
};
};
};
};
...

View File

@ -22,6 +22,8 @@ properties:
enum:
# Anberic RG353V-V2 5.0" 640x480 TFT LCD panel
- anbernic,rg353v-panel-v2
# Powkiddy RGB30 3.0" 720x720 TFT LCD panel
- powkiddy,rgb30-panel
# Rocktech JH057N00900 5.5" 720x1440 TFT LCD panel
- rocktech,jh057n00900
# Xingbangda XBD599 5.99" 720x1440 TFT LCD panel

View File

@ -18,6 +18,7 @@ properties:
- rockchip,rk3288-mipi-dsi
- rockchip,rk3399-mipi-dsi
- rockchip,rk3568-mipi-dsi
- rockchip,rv1126-mipi-dsi
- const: snps,dw-mipi-dsi
interrupts:
@ -77,6 +78,7 @@ allOf:
enum:
- rockchip,px30-mipi-dsi
- rockchip,rk3568-mipi-dsi
- rockchip,rv1126-mipi-dsi
then:
properties:

View File

@ -31,6 +31,7 @@ properties:
- rockchip,rk3368-vop
- rockchip,rk3399-vop-big
- rockchip,rk3399-vop-lit
- rockchip,rv1126-vop
reg:
minItems: 1

View File

@ -0,0 +1,42 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/solomon,ssd-common.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Common properties for Solomon OLED Display Controllers
maintainers:
- Javier Martinez Canillas <javierm@redhat.com>
properties:
reg:
maxItems: 1
reset-gpios:
maxItems: 1
# Only required for SPI
dc-gpios:
description:
GPIO connected to the controller's D/C# (Data/Command) pin,
that is needed for 4-wire SPI to tell the controller if the
data sent is for a command register or the display data RAM
maxItems: 1
solomon,height:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Height in pixel of the screen driven by the controller.
The default value is controller-dependent.
solomon,width:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Width in pixel of the screen driven by the controller.
The default value is controller-dependent.
allOf:
- $ref: /schemas/spi/spi-peripheral-props.yaml#
additionalProperties: true

View File

@ -27,38 +27,12 @@ properties:
- solomon,ssd1307
- solomon,ssd1309
reg:
maxItems: 1
pwms:
maxItems: 1
reset-gpios:
maxItems: 1
# Only required for SPI
dc-gpios:
description:
GPIO connected to the controller's D/C# (Data/Command) pin,
that is needed for 4-wire SPI to tell the controller if the
data sent is for a command register or the display data RAM
maxItems: 1
vbat-supply:
description: The supply for VBAT
solomon,height:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Height in pixel of the screen driven by the controller.
The default value is controller-dependent.
solomon,width:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Width in pixel of the screen driven by the controller.
The default value is controller-dependent.
solomon,page-offset:
$ref: /schemas/types.yaml#/definitions/uint32
default: 1
@ -148,7 +122,7 @@ required:
- reg
allOf:
- $ref: /schemas/spi/spi-peripheral-props.yaml#
- $ref: solomon,ssd-common.yaml#
- if:
properties:

View File

@ -0,0 +1,89 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/solomon,ssd132x.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Solomon SSD132x OLED Display Controllers
maintainers:
- Javier Martinez Canillas <javierm@redhat.com>
properties:
compatible:
- enum:
- solomon,ssd1322
- solomon,ssd1325
- solomon,ssd1327
required:
- compatible
- reg
allOf:
- $ref: solomon,ssd-common.yaml#
- if:
properties:
compatible:
contains:
const: solomon,ssd1322
then:
properties:
width:
default: 480
height:
default: 128
- if:
properties:
compatible:
contains:
const: solomon,ssd1325
then:
properties:
width:
default: 128
height:
default: 80
- if:
properties:
compatible:
contains:
const: solomon,ssd1327
then:
properties:
width:
default: 128
height:
default: 128
unevaluatedProperties: false
examples:
- |
i2c {
#address-cells = <1>;
#size-cells = <0>;
oled@3c {
compatible = "solomon,ssd1327";
reg = <0x3c>;
reset-gpios = <&gpio2 7>;
};
};
- |
spi {
#address-cells = <1>;
#size-cells = <0>;
oled@0 {
compatible = "solomon,ssd1327";
reg = <0x0>;
reset-gpios = <&gpio2 7>;
dc-gpios = <&gpio2 8>;
spi-max-frequency = <10000000>;
};
};

View File

@ -1085,6 +1085,8 @@ patternProperties:
description: Powertip Tech. Corp.
"^powervr,.*":
description: PowerVR (deprecated, use img)
"^powkiddy,.*":
description: Powkiddy
"^primux,.*":
description: Primux Trading, S.L.
"^probox2,.*":

View File

@ -5,14 +5,30 @@ The dma-buf subsystem provides the framework for sharing buffers for
hardware (DMA) access across multiple device drivers and subsystems, and
for synchronizing asynchronous hardware access.
This is used, for example, by drm "prime" multi-GPU support, but is of
course not limited to GPU use cases.
As an example, it is used extensively by the DRM subsystem to exchange
buffers between processes, contexts, library APIs within the same
process, and also to exchange buffers with other subsystems such as
V4L2.
This document describes the way in which kernel subsystems can use and
interact with the three main primitives offered by dma-buf:
- dma-buf, representing a sg_table and exposed to userspace as a file
descriptor to allow passing between processes, subsystems, devices,
etc;
- dma-fence, providing a mechanism to signal when an asynchronous
hardware operation has completed; and
- dma-resv, which manages a set of dma-fences for a particular dma-buf
allowing implicit (kernel-ordered) synchronization of work to
preserve the illusion of coherent access
Userspace API principles and use
--------------------------------
For more details on how to design your subsystem's API for dma-buf use, please
see Documentation/userspace-api/dma-buf-alloc-exchange.rst.
The three main components of this are: (1) dma-buf, representing a
sg_table and exposed to userspace as a file descriptor to allow passing
between devices, (2) fence, which provides a mechanism to signal when
one device has finished access, and (3) reservation, which manages the
shared or exclusive fence(s) associated with the buffer.
Shared DMA Buffers
------------------

View File

@ -26,12 +26,30 @@ serial_number
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
:doc: serial_number
fru_id
-------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
:doc: fru_id
manufacturer
-------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
:doc: manufacturer
unique_id
---------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: unique_id
board_info
----------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
:doc: board_info
Accelerated Processing Units (APU) Info
---------------------------------------

View File

@ -64,6 +64,36 @@ gpu_metrics
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: gpu_metrics
fan_curve
---------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: fan_curve
acoustic_limit_rpm_threshold
----------------------------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: acoustic_limit_rpm_threshold
acoustic_target_rpm_threshold
-----------------------------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: acoustic_target_rpm_threshold
fan_target_temperature
----------------------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: fan_target_temperature
fan_minimum_pwm
---------------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: fan_minimum_pwm
GFXOFF
======

View File

@ -67,6 +67,19 @@ Lists the tests that for a given driver on a specific hardware revision are
known to behave unreliably. These tests won't cause a job to fail regardless of
the result. They will still be run.
Each new flake entry must be associated with a link to the email reporting the
bug to the author of the affected driver, the board name or Device Tree name of
the board, the first kernel version affected, and an approximation of the
failure rate.
They should be provided under the following format::
# Bug Report: $LORE_OR_PATCHWORK_URL
# Board Name: broken-board.dtb
# Version: 6.6-rc1
# Failure Rate: 100
flaky-test
drivers/gpu/drm/ci/${DRIVER_NAME}-${HW_REVISION}-skips.txt
-----------------------------------------------------------
@ -86,10 +99,13 @@ https://gitlab.freedesktop.org/janedoe/linux/-/settings/ci_cd), change the
CI/CD configuration file from .gitlab-ci.yml to
drivers/gpu/drm/ci/gitlab-ci.yml.
3. Next time you push to this repository, you will see a CI pipeline being
3. Request to be added to the drm/ci-ok group so that your user has the
necessary privileges to run the CI on https://gitlab.freedesktop.org/drm/ci-ok
4. Next time you push to this repository, you will see a CI pipeline being
created (eg. https://gitlab.freedesktop.org/janedoe/linux/-/pipelines)
4. The various jobs will be run and when the pipeline is finished, all jobs
5. The various jobs will be run and when the pipeline is finished, all jobs
should be green unless a regression has been found.

View File

@ -18,6 +18,7 @@ GPU Driver Documentation
xen-front
afbc
komeda-kms
panfrost
.. only:: subproject and html

View File

@ -360,6 +360,8 @@ Format Functions Reference
.. kernel-doc:: drivers/gpu/drm/drm_fourcc.c
:export:
.. _kms_dumb_buffer_objects:
Dumb Buffer Objects
===================

View File

@ -466,40 +466,40 @@ DRM MM Range Allocator Function References
.. kernel-doc:: drivers/gpu/drm/drm_mm.c
:export:
DRM GPU VA Manager
==================
DRM GPUVM
=========
Overview
--------
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:doc: Overview
Split and Merge
---------------
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:doc: Split and Merge
Locking
-------
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:doc: Locking
Examples
--------
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:doc: Examples
DRM GPU VA Manager Function References
--------------------------------------
DRM GPUVM Function References
-----------------------------
.. kernel-doc:: include/drm/drm_gpuva_mgr.h
.. kernel-doc:: include/drm/drm_gpuvm.h
:internal:
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
:export:
DRM Buddy Allocator

View File

@ -285,6 +285,83 @@ for GPU1 and GPU2 from different vendors, and a third handler for
mmapped regular files. Threads cause additional pain with signal
handling as well.
Device reset
============
The GPU stack is really complex and is prone to errors, from hardware bugs,
faulty applications and everything in between the many layers. Some errors
require resetting the device in order to make the device usable again. This
section describes the expectations for DRM and usermode drivers when a
device resets and how to propagate the reset status.
Device resets can not be disabled without tainting the kernel, which can lead to
hanging the entire kernel through shrinkers/mmu_notifiers. Userspace role in
device resets is to propagate the message to the application and apply any
special policy for blocking guilty applications, if any. Corollary is that
debugging a hung GPU context require hardware support to be able to preempt such
a GPU context while it's stopped.
Kernel Mode Driver
------------------
The KMD is responsible for checking if the device needs a reset, and to perform
it as needed. Usually a hang is detected when a job gets stuck executing. KMD
should keep track of resets, because userspace can query any time about the
reset status for a specific context. This is needed to propagate to the rest of
the stack that a reset has happened. Currently, this is implemented by each
driver separately, with no common DRM interface. Ideally this should be properly
integrated at DRM scheduler to provide a common ground for all drivers. After a
reset, KMD should reject new command submissions for affected contexts.
User Mode Driver
----------------
After command submission, UMD should check if the submission was accepted or
rejected. After a reset, KMD should reject submissions, and UMD can issue an
ioctl to the KMD to check the reset status, and this can be checked more often
if the UMD requires it. After detecting a reset, UMD will then proceed to report
it to the application using the appropriate API error code, as explained in the
section below about robustness.
Robustness
----------
The only way to try to keep a graphical API context working after a reset is if
it complies with the robustness aspects of the graphical API that it is using.
Graphical APIs provide ways to applications to deal with device resets. However,
there is no guarantee that the app will use such features correctly, and a
userspace that doesn't support robust interfaces (like a non-robust
OpenGL context or API without any robustness support like libva) leave the
robustness handling entirely to the userspace driver. There is no strong
community consensus on what the userspace driver should do in that case,
since all reasonable approaches have some clear downsides.
OpenGL
~~~~~~
Apps using OpenGL should use the available robust interfaces, like the
extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). This
interface tells if a reset has happened, and if so, all the context state is
considered lost and the app proceeds by creating new ones. There's no consensus
on what to do to if robustness is not in use.
Vulkan
~~~~~~
Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions.
This error code means, among other things, that a device reset has happened and
it needs to recreate the contexts to keep going.
Reporting causes of resets
--------------------------
Apart from propagating the reset through the stack so apps can recover, it's
really useful for driver developers to learn more about what caused the reset in
the first place. DRM devices should make use of devcoredump to store relevant
information about the reset, so this information can be added to user bug
reports.
.. _drm_driver_ioctl:
IOCTL Support on Device Nodes
@ -450,12 +527,12 @@ VBlank event handling
The DRM core exposes two vertical blank related ioctls:
DRM_IOCTL_WAIT_VBLANK
:c:macro:`DRM_IOCTL_WAIT_VBLANK`
This takes a struct drm_wait_vblank structure as its argument, and
it is used to block or request a signal when a specified vblank
event occurs.
DRM_IOCTL_MODESET_CTL
:c:macro:`DRM_IOCTL_MODESET_CTL`
This was only used for user-mode-settind drivers around modesetting
changes to allow the kernel to update the vblank interrupt after
mode setting, since on many devices the vertical blank counter is
@ -478,11 +555,18 @@ The index is used in cases where a densely packed identifier for a CRTC is
needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct
drm_mode_get_plane is an example.
DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's,
and the CRTC index is its position in this array.
:c:macro:`DRM_IOCTL_MODE_GETRESOURCES` populates a structure with an array of
CRTC ID's, and the CRTC index is its position in this array.
.. kernel-doc:: include/uapi/drm/drm.h
:internal:
.. kernel-doc:: include/uapi/drm/drm_mode.h
:internal:
dma-buf interoperability
========================
Please see Documentation/userspace-api/dma-buf-alloc-exchange.rst for
information on how dma-buf is integrated and exposed within DRM.

View File

@ -169,3 +169,4 @@ Driver specific implementations
-------------------------------
:ref:`i915-usage-stats`
:ref:`panfrost-usage-stats`

View File

@ -0,0 +1,309 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
====================
Asynchronous VM_BIND
====================
Nomenclature:
=============
* ``VRAM``: On-device memory. Sometimes referred to as device local memory.
* ``gpu_vm``: A virtual GPU address space. Typically per process, but
can be shared by multiple processes.
* ``VM_BIND``: An operation or a list of operations to modify a gpu_vm using
an IOCTL. The operations include mapping and unmapping system- or
VRAM memory.
* ``syncobj``: A container that abstracts synchronization objects. The
synchronization objects can be either generic, like dma-fences or
driver specific. A syncobj typically indicates the type of the
underlying synchronization object.
* ``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
for these before starting.
* ``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation
signals these when the bind operation is complete.
* ``dma-fence``: A cross-driver synchronization object. A basic
understanding of dma-fences is required to digest this
document. Please refer to the ``DMA Fences`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.
* ``memory fence``: A synchronization object, different from a dma-fence.
A memory fence uses the value of a specified memory location to determine
signaled status. A memory fence can be awaited and signaled by both
the GPU and CPU. Memory fences are sometimes referred to as
user-fences, userspace-fences or gpu futexes and do not necessarily obey
the dma-fence rule of signaling within a "reasonable amount of time".
The kernel should thus avoid waiting for memory fences with locks held.
* ``long-running workload``: A workload that may take more than the
current stipulated dma-fence maximum signal delay to complete and
which therefore needs to set the gpu_vm or the GPU execution context in
a certain mode that disallows completion dma-fences.
* ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.
* ``bind context``: A context identifier used for the VM_BIND
operation. VM_BIND operations that use the same bind context can be
assumed, where it matters, to complete in order of submission. No such
assumptions can be made for VM_BIND operations using separate bind contexts.
* ``UMD``: User-mode driver.
* ``KMD``: Kernel-mode driver.
Synchronous / Asynchronous VM_BIND operation
============================================
Synchronous VM_BIND
___________________
With Synchronous VM_BIND, the VM_BIND operations all complete before the
IOCTL returns. A synchronous VM_BIND takes neither in-fences nor
out-fences. Synchronous VM_BIND may block and wait for GPU operations;
for example swap-in or clearing, or even previous binds.
Asynchronous VM_BIND
____________________
Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the
IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs
before modifying the GPU page-tables, and signal the out-syncobjs when
the modification is done in the sense that the next exec function that
awaits for the out-syncobjs will see the change. Errors are reported
synchronously.
In low-memory situations the implementation may block, performing the
VM_BIND synchronously, because there might not be enough memory
immediately available for preparing the asynchronous operation.
If the VM_BIND IOCTL takes a list or an array of operations as an argument,
the in-syncobjs needs to signal before the first operation starts to
execute, and the out-syncobjs signal after the last operation
completes. Operations in the operation list can be assumed, where it
matters, to complete in order.
Since asynchronous VM_BIND operations may use dma-fences embedded in
out-syncobjs and internally in KMD to signal bind completion, any
memory fences given as VM_BIND in-fences need to be awaited
synchronously before the VM_BIND ioctl returns, since dma-fences,
required to signal in a reasonable amount of time, can never be made
to depend on memory fences that don't have such a restriction.
The purpose of an Asynchronous VM_BIND operation is for user-mode
drivers to be able to pipeline interleaved gpu_vm modifications and
exec functions. For long-running workloads, such pipelining of a bind
operation is not allowed and any in-fences need to be awaited
synchronously. The reason for this is twofold. First, any memory
fences gated by a long-running workload and used as in-syncobjs for the
VM_BIND operation will need to be awaited synchronously anyway (see
above). Second, any dma-fences used as in-syncobjs for VM_BIND
operations for long-running workloads will not allow for pipelining
anyway since long-running workloads don't allow for dma-fences as
out-syncobjs, so while theoretically possible the use of them is
questionable and should be rejected until there is a valuable use-case.
Note that this is not a limitation imposed by dma-fence rules, but
rather a limitation imposed to keep KMD implementation simple. It does
not affect using dma-fences as dependencies for the long-running
workload itself, which is allowed by dma-fence rules, but rather for
the VM_BIND operation only.
An asynchronous VM_BIND operation may take substantial time to
complete and signal the out_fence. In particular if the operation is
deeply pipelined behind other VM_BIND operations and workloads
submitted using exec functions. In that case, UMD might want to avoid a
subsequent VM_BIND operation to be queued behind the first one if
there are no explicit dependencies. In order to circumvent such a queue-up, a
VM_BIND implementation may allow for VM_BIND contexts to be
created. For each context, VM_BIND operations will be guaranteed to
complete in the order they were submitted, but that is not the case
for VM_BIND operations executing on separate VM_BIND contexts. Instead
KMD will attempt to execute such VM_BIND operations in parallel but
leaving no guarantee that they will actually be executed in
parallel. There may be internal implicit dependencies that only KMD knows
about, for example page-table structure changes. A way to attempt
to avoid such internal dependencies is to have different VM_BIND
contexts use separate regions of a VM.
Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically
select memory fences as out-fences since that gives greater flexibility for
the kernel mode driver to inject other operations into the bind /
unbind operations. Like for example inserting breakpoints into batch
buffers. The workload execution can then easily be pipelined behind
the bind completion using the memory out-fence as the signal condition
for a GPU semaphore embedded by UMD in the workload.
There is no difference in the operations supported or in
multi-operation support between asynchronous VM_BIND and synchronous VM_BIND.
Multi-operation VM_BIND IOCTL error handling and interrupts
===========================================================
The VM_BIND operations of the IOCTL may error for various reasons, for
example due to lack of resources to complete and due to interrupted
waits.
In these situations UMD should preferably restart the IOCTL after
taking suitable action.
If UMD has over-committed a memory resource, an -ENOSPC error will be
returned, and UMD may then unbind resources that are not used at the
moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the
IOCTL and on -ENOMEM user-space may either attempt to free known
system memory resources or fail. In case of UMD deciding to fail a
bind operation, due to an error return, no additional action is needed
to clean up the failed operation, and the VM is left in the same state
as it was before the failing IOCTL.
Unbind operations are guaranteed not to return any errors due to
resource constraints, but may return errors due to, for example,
invalid arguments or the gpu_vm being banned.
In the case an unexpected error happens during the asynchronous bind
process, the gpu_vm will be banned, and attempts to use it after banning
will return -ENOENT.
Example: The Xe VM_BIND uAPI
============================
Starting with the VM_BIND operation struct, the IOCTL call can take
zero, one or many such operations. A zero number means only the
synchronization part of the IOCTL is carried out: an asynchronous
VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the
implicit dependencies to be fulfilled.
.. code-block:: c
struct drm_xe_vm_bind_op {
/**
* @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
*/
__u32 obj;
/** @pad: MBZ */
__u32 pad;
union {
/**
* @obj_offset: Offset into the object for MAP.
*/
__u64 obj_offset;
/** @userptr: user virtual address for MAP_USERPTR */
__u64 userptr;
};
/**
* @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
*/
__u64 range;
/** @addr: Address to operate on, MBZ for UNMAP_ALL */
__u64 addr;
/**
* @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
* only applies to creating new VMAs
*/
__u64 tile_mask;
/* Map (parts of) an object into the GPU virtual address range.
#define XE_VM_BIND_OP_MAP 0x0
/* Unmap a GPU virtual address range */
#define XE_VM_BIND_OP_UNMAP 0x1
/*
* Map a CPU virtual address range into a GPU virtual
* address range.
*/
#define XE_VM_BIND_OP_MAP_USERPTR 0x2
/* Unmap a gem object from the VM. */
#define XE_VM_BIND_OP_UNMAP_ALL 0x3
/*
* Make the backing memory of an address range resident if
* possible. Note that this doesn't pin backing memory.
*/
#define XE_VM_BIND_OP_PREFETCH 0x4
/* Make the GPU map readonly. */
#define XE_VM_BIND_FLAG_READONLY (0x1 << 16)
/*
* Valid on a faulting VM only, do the MAP operation immediately rather
* than deferring the MAP to the page fault handler.
*/
#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 17)
/*
* When the NULL flag is set, the page tables are setup with a special
* bit which indicates writes are dropped and all reads return zero. In
* the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
* operations, the BO handle MBZ, and the BO offset MBZ. This flag is
* intended to implement VK sparse bindings.
*/
#define XE_VM_BIND_FLAG_NULL (0x1 << 18)
/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
__u32 op;
/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
__u32 region;
/** @reserved: Reserved */
__u64 reserved[2];
};
The VM_BIND IOCTL argument itself, looks like follows. Note that for
synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here
the ``exec_queue_id`` field is the VM_BIND context discussed previously
that is used to facilitate out-of-order VM_BINDs.
.. code-block:: c
struct drm_xe_vm_bind {
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
/** @vm_id: The ID of the VM to bind to */
__u32 vm_id;
/**
* @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
* and exec queue must have same vm_id. If zero, the default VM bind engine
* is used.
*/
__u32 exec_queue_id;
/** @num_binds: number of binds in this IOCTL */
__u32 num_binds;
/* If set, perform an async VM_BIND, if clear a sync VM_BIND */
#define XE_VM_BIND_IOCTL_FLAG_ASYNC (0x1 << 0)
/** @flag: Flags controlling all operations in this ioctl. */
__u32 flags;
union {
/** @bind: used if num_binds == 1 */
struct drm_xe_vm_bind_op bind;
/**
* @vector_of_binds: userptr to array of struct
* drm_xe_vm_bind_op if num_binds > 1
*/
__u64 vector_of_binds;
};
/** @num_syncs: amount of syncs to wait for or to signal on completion. */
__u32 num_syncs;
/** @pad2: MBZ */
__u32 pad2;
/** @syncs: pointer to struct drm_xe_sync array */
__u64 syncs;
/** @reserved: Reserved */
__u64 reserved[2];
};

View File

@ -267,19 +267,22 @@ i915 driver.
Intel GPU Basics
----------------
An Intel GPU has multiple engines. There are several engine types.
An Intel GPU has multiple engines. There are several engine types:
- RCS engine is for rendering 3D and performing compute, this is named
`I915_EXEC_RENDER` in user space.
- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
space.
- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
in user space
- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
space.
- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
instead it is to be used by user space to specify a default rendering
engine (for 3D) that may or may not be the same as RCS.
- Render Command Streamer (RCS). An engine for rendering 3D and
performing compute.
- Blitting Command Streamer (BCS). An engine for performing blitting and/or
copying operations.
- Video Command Streamer. An engine used for video encoding and decoding. Also
sometimes called 'BSD' in hardware documentation.
- Video Enhancement Command Streamer (VECS). An engine for video enhancement.
Also sometimes called 'VEBOX' in hardware documentation.
- Compute Command Streamer (CCS). An engine that has access to the media and
GPGPU pipelines, but not the 3D pipeline.
- Graphics Security Controller (GSCCS). A dedicated engine for internal
communication with GSC controller on security related tasks like
High-bandwidth Digital Content Protection (HDCP), Protected Xe Path (PXP),
and HuC firmware authentication.
The Intel GPU family is a family of integrated GPU's using Unified
Memory Access. For having the GPU "do work", user space will feed the

View File

@ -0,0 +1,9 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
===========================================================
Misc DRM driver uAPI- and feature implementation guidelines
===========================================================
.. toctree::
drm-vm-bind-async

View File

@ -18,6 +18,7 @@ GPU Driver Developer's Guide
vga-switcheroo
vgaarbiter
automated_testing
implementation_guidelines
todo
rfc/index

View File

@ -0,0 +1,40 @@
.. SPDX-License-Identifier: GPL-2.0+
=========================
drm/Panfrost Mali Driver
=========================
.. _panfrost-usage-stats:
Panfrost DRM client usage stats implementation
==============================================
The drm/Panfrost driver implements the DRM client usage stats specification as
documented in :ref:`drm-client-usage-stats`.
Example of the output showing the implemented key value pairs and entirety of
the currently possible format options:
::
pos: 0
flags: 02400002
mnt_id: 27
ino: 531
drm-driver: panfrost
drm-client-id: 14
drm-engine-fragment: 1846584880 ns
drm-cycles-fragment: 1424359409
drm-maxfreq-fragment: 799999987 Hz
drm-curfreq-fragment: 799999987 Hz
drm-engine-vertex-tiler: 71932239 ns
drm-cycles-vertex-tiler: 52617357
drm-maxfreq-vertex-tiler: 799999987 Hz
drm-curfreq-vertex-tiler: 799999987 Hz
drm-total-memory: 290 MiB
drm-shared-memory: 0 MiB
drm-active-memory: 226 MiB
drm-resident-memory: 36496 KiB
drm-purgeable-memory: 128 KiB
Possible `drm-engine-` key names are: `fragment`, and `vertex-tiler`.
`drm-curfreq-` values convey the current operating frequency for that engine.

View File

@ -67,14 +67,8 @@ platforms.
When the time comes for Xe, the protection will be lifted on Xe and kept in i915.
Xe driver will be protected with both STAGING Kconfig and force_probe. Changes in
the uAPI are expected while the driver is behind these protections. STAGING will
be removed when the driver uAPI gets to a mature state where we can guarantee the
no regression rule. Then force_probe will be lifted only for future platforms
that will be productized with Xe driver, but not with i915.
Xe Pre-Merge Goals
====================
Xe Pre-Merge Goals - Work-in-Progress
=======================================
Drm_scheduler
-------------
@ -94,41 +88,6 @@ depend on any other patch touching drm_scheduler itself that was not yet merged
through drm-misc. This, by itself, already includes the reach of an agreement for
uniform 1 to 1 relationship implementation / usage across drivers.
GPU VA
------
Two main goals of Xe are meeting together here:
1) Have an uAPI that aligns with modern UMD needs.
2) Early upstream engagement.
RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
track of GPU virtual address mappings. This is still not merged upstream, but
this aligns very well with our goals and with our VM_BIND. The engagement with
upstream and the port of Xe towards GPUVA is already ongoing.
As a key measurable result, Xe needs to be aligned with the GPU VA and working in
our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
related patch should be independent and present on dri-devel or acked by
maintainers to go along with the first Xe pull request towards drm-next.
DRM_VM_BIND
-----------
Nouveau, and Xe are all implementing VM_BIND and new Exec uAPIs in order to
fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
development of a common new drm_infrastructure. However, the Xe team needs to
engage with the community to explore the options of a common API.
As a key measurable result, the DRM_VM_BIND needs to be documented in this file
below, or this entire block deleted if the consensus is for independent drivers
vm_bind ioctls.
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to enforce the overall locking scheme for all major
structs and list (so vm and vma). So, a consensus is needed, and possibly some
common helpers. If helpers are needed, they should be also documented in this
document.
ASYNC VM_BIND
-------------
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
@ -138,8 +97,8 @@ memory fences. Ideally with helper support so people don't get it wrong in all
possible ways.
As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
various flavors, error handling and a sample API should be documented here or in
a separate document pointed to by this document.
various flavors, error handling and sample API suggestions are documented in
:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
Userptr integration and vm_bind
-------------------------------
@ -212,6 +171,14 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support.
As a key measurable result, we need to have a community consensus documented in
this document and the Xe driver prepared for the changes, if necessary.
Xe uAPI high level overview
=============================
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
Xe Pre-Merge Goals - Completed
================================
Dev_coredump
------------
@ -229,7 +196,37 @@ infrastructure with overall possible improvements, like multiple file support
for better organization of the dumps, snapshot support, dmesg extra print,
and whatever may make sense and help the overall infrastructure.
Xe uAPI high level overview
=============================
DRM_VM_BIND
-----------
Nouveau, and Xe are all implementing VM_BIND and new Exec uAPIs in order to
fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
development of a common new drm_infrastructure. However, the Xe team needs to
engage with the community to explore the options of a common API.
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
As a key measurable result, the DRM_VM_BIND needs to be documented in this file
below, or this entire block deleted if the consensus is for independent drivers
vm_bind ioctls.
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to enforce the overall locking scheme for all major
structs and list (so vm and vma). So, a consensus is needed, and possibly some
common helpers. If helpers are needed, they should be also documented in this
document.
GPU VA
------
Two main goals of Xe are meeting together here:
1) Have an uAPI that aligns with modern UMD needs.
2) Early upstream engagement.
RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
track of GPU virtual address mappings. This is still not merged upstream, but
this aligns very well with our goals and with our VM_BIND. The engagement with
upstream and the port of Xe towards GPUVA is already ongoing.
As a key measurable result, Xe needs to be aligned with the GPU VA and working in
our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
related patch should be independent and present on dri-devel or acked by
maintainers to go along with the first Xe pull request towards drm-next.

View File

@ -0,0 +1,389 @@
.. SPDX-License-Identifier: GPL-2.0
.. Copyright 2021-2023 Collabora Ltd.
========================
Exchanging pixel buffers
========================
As originally designed, the Linux graphics subsystem had extremely limited
support for sharing pixel-buffer allocations between processes, devices, and
subsystems. Modern systems require extensive integration between all three
classes; this document details how applications and kernel subsystems should
approach this sharing for two-dimensional image data.
It is written with reference to the DRM subsystem for GPU and display devices,
V4L2 for media devices, and also to Vulkan, EGL and Wayland, for userspace
support, however any other subsystems should also follow this design and advice.
Glossary of terms
=================
.. glossary::
image:
Conceptually a two-dimensional array of pixels. The pixels may be stored
in one or more memory buffers. Has width and height in pixels, pixel
format and modifier (implicit or explicit).
row:
A span along a single y-axis value, e.g. from co-ordinates (0,100) to
(200,100).
scanline:
Synonym for row.
column:
A span along a single x-axis value, e.g. from co-ordinates (100,0) to
(100,100).
memory buffer:
A piece of memory for storing (parts of) pixel data. Has stride and size
in bytes and at least one handle in some API. May contain one or more
planes.
plane:
A two-dimensional array of some or all of an image's color and alpha
channel values.
pixel:
A picture element. Has a single color value which is defined by one or
more color channels values, e.g. R, G and B, or Y, Cb and Cr. May also
have an alpha value as an additional channel.
pixel data:
Bytes or bits that represent some or all of the color/alpha channel values
of a pixel or an image. The data for one pixel may be spread over several
planes or memory buffers depending on format and modifier.
color value:
A tuple of numbers, representing a color. Each element in the tuple is a
color channel value.
color channel:
One of the dimensions in a color model. For example, RGB model has
channels R, G, and B. Alpha channel is sometimes counted as a color
channel as well.
pixel format:
A description of how pixel data represents the pixel's color and alpha
values.
modifier:
A description of how pixel data is laid out in memory buffers.
alpha:
A value that denotes the color coverage in a pixel. Sometimes used for
translucency instead.
stride:
A value that denotes the relationship between pixel-location co-ordinates
and byte-offset values. Typically used as the byte offset between two
pixels at the start of vertically-consecutive tiling blocks. For linear
layouts, the byte offset between two vertically-adjacent pixels. For
non-linear formats the stride must be computed in a consistent way, which
usually is done as-if the layout was linear.
pitch:
Synonym for stride.
Formats and modifiers
=====================
Each buffer must have an underlying format. This format describes the color
values provided for each pixel. Although each subsystem has its own format
descriptions (e.g. V4L2 and fbdev), the ``DRM_FORMAT_*`` tokens should be reused
wherever possible, as they are the standard descriptions used for interchange.
These tokens are described in the ``drm_fourcc.h`` file, which is a part of
DRM's uAPI.
Each ``DRM_FORMAT_*`` token describes the translation between a pixel
co-ordinate in an image, and the color values for that pixel contained within
its memory buffers. The number and type of color channels are described:
whether they are RGB or YUV, integer or floating-point, the size of each channel
and their locations within the pixel memory, and the relationship between color
planes.
For example, ``DRM_FORMAT_ARGB8888`` describes a format in which each pixel has
a single 32-bit value in memory. Alpha, red, green, and blue, color channels are
available at 8-bit precision per channel, ordered respectively from most to
least significant bits in little-endian storage. ``DRM_FORMAT_*`` is not
affected by either CPU or device endianness; the byte pattern in memory is
always as described in the format definition, which is usually little-endian.
As a more complex example, ``DRM_FORMAT_NV12`` describes a format in which luma
and chroma YUV samples are stored in separate planes, where the chroma plane is
stored at half the resolution in both dimensions (i.e. one U/V chroma
sample is stored for each 2x2 pixel grouping).
Format modifiers describe a translation mechanism between these per-pixel memory
samples, and the actual memory storage for the buffer. The most straightforward
modifier is ``DRM_FORMAT_MOD_LINEAR``, describing a scheme in which each plane
is laid out row-sequentially, from the top-left to the bottom-right corner.
This is considered the baseline interchange format, and most convenient for CPU
access.
Modern hardware employs much more sophisticated access mechanisms, typically
making use of tiled access and possibly also compression. For example, the
``DRM_FORMAT_MOD_VIVANTE_TILED`` modifier describes memory storage where pixels
are stored in 4x4 blocks arranged in row-major ordering, i.e. the first tile in
a plane stores pixels (0,0) to (3,3) inclusive, and the second tile in a plane
stores pixels (4,0) to (7,3) inclusive.
Some modifiers may modify the number of planes required for an image; for
example, the ``I915_FORMAT_MOD_Y_TILED_CCS`` modifier adds a second plane to RGB
formats in which it stores data about the status of every tile, notably
including whether the tile is fully populated with pixel data, or can be
expanded from a single solid color.
These extended layouts are highly vendor-specific, and even specific to
particular generations or configurations of devices per-vendor. For this reason,
support of modifiers must be explicitly enumerated and negotiated by all users
in order to ensure a compatible and optimal pipeline, as discussed below.
Dimensions and size
===================
Each pixel buffer must be accompanied by logical pixel dimensions. This refers
to the number of unique samples which can be extracted from, or stored to, the
underlying memory storage. For example, even though a 1920x1080
``DRM_FORMAT_NV12`` buffer has a luma plane containing 1920x1080 samples for the Y
component, and 960x540 samples for the U and V components, the overall buffer is
still described as having dimensions of 1920x1080.
The in-memory storage of a buffer is not guaranteed to begin immediately at the
base address of the underlying memory, nor is it guaranteed that the memory
storage is tightly clipped to either dimension.
Each plane must therefore be described with an ``offset`` in bytes, which will be
added to the base address of the memory storage before performing any per-pixel
calculations. This may be used to combine multiple planes into a single memory
buffer; for example, ``DRM_FORMAT_NV12`` may be stored in a single memory buffer
where the luma plane's storage begins immediately at the start of the buffer
with an offset of 0, and the chroma plane's storage follows within the same buffer
beginning from the byte offset for that plane.
Each plane must also have a ``stride`` in bytes, expressing the offset in memory
between two contiguous row. For example, a ``DRM_FORMAT_MOD_LINEAR`` buffer
with dimensions of 1000x1000 may have been allocated as if it were 1024x1000, in
order to allow for aligned access patterns. In this case, the buffer will still
be described with a width of 1000, however the stride will be ``1024 * bpp``,
indicating that there are 24 pixels at the positive extreme of the x axis whose
values are not significant.
Buffers may also be padded further in the y dimension, simply by allocating a
larger area than would ordinarily be required. For example, many media decoders
are not able to natively output buffers of height 1080, but instead require an
effective height of 1088 pixels. In this case, the buffer continues to be
described as having a height of 1080, with the memory allocation for each buffer
being increased to account for the extra padding.
Enumeration
===========
Every user of pixel buffers must be able to enumerate a set of supported formats
and modifiers, described together. Within KMS, this is achieved with the
``IN_FORMATS`` property on each DRM plane, listing the supported DRM formats, and
the modifiers supported for each format. In userspace, this is supported through
the `EGL_EXT_image_dma_buf_import_modifiers`_ extension entrypoints for EGL, the
`VK_EXT_image_drm_format_modifier`_ extension for Vulkan, and the
`zwp_linux_dmabuf_v1`_ extension for Wayland.
Each of these interfaces allows users to query a set of supported
format+modifier combinations.
Negotiation
===========
It is the responsibility of userspace to negotiate an acceptable format+modifier
combination for its usage. This is performed through a simple intersection of
lists. For example, if a user wants to use Vulkan to render an image to be
displayed on a KMS plane, it must:
- query KMS for the ``IN_FORMATS`` property for the given plane
- query Vulkan for the supported formats for its physical device, making sure
to pass the ``VkImageUsageFlagBits`` and ``VkImageCreateFlagBits``
corresponding to the intended rendering use
- intersect these formats to determine the most appropriate one
- for this format, intersect the lists of supported modifiers for both KMS and
Vulkan, to obtain a final list of acceptable modifiers for that format
This intersection must be performed for all usages. For example, if the user
also wishes to encode the image to a video stream, it must query the media API
it intends to use for encoding for the set of modifiers it supports, and
additionally intersect against this list.
If the intersection of all lists is an empty list, it is not possible to share
buffers in this way, and an alternate strategy must be considered (e.g. using
CPU access routines to copy data between the different uses, with the
corresponding performance cost).
The resulting modifier list is unsorted; the order is not significant.
Allocation
==========
Once userspace has determined an appropriate format, and corresponding list of
acceptable modifiers, it must allocate the buffer. As there is no universal
buffer-allocation interface available at either kernel or userspace level, the
client makes an arbitrary choice of allocation interface such as Vulkan, GBM, or
a media API.
Each allocation request must take, at a minimum: the pixel format, a list of
acceptable modifiers, and the buffer's width and height. Each API may extend
this set of properties in different ways, such as allowing allocation in more
than two dimensions, intended usage patterns, etc.
The component which allocates the buffer will make an arbitrary choice of what
it considers the 'best' modifier within the acceptable list for the requested
allocation, any padding required, and further properties of the underlying
memory buffers such as whether they are stored in system or device-specific
memory, whether or not they are physically contiguous, and their cache mode.
These properties of the memory buffer are not visible to userspace, however the
``dma-heaps`` API is an effort to address this.
After allocation, the client must query the allocator to determine the actual
modifier selected for the buffer, as well as the per-plane offset and stride.
Allocators are not permitted to vary the format in use, to select a modifier not
provided within the acceptable list, nor to vary the pixel dimensions other than
the padding expressed through offset, stride, and size.
Communicating additional constraints, such as alignment of stride or offset,
placement within a particular memory area, etc, is out of scope of dma-buf,
and is not solved by format and modifier tokens.
Import
======
To use a buffer within a different context, device, or subsystem, the user
passes these parameters (format, modifier, width, height, and per-plane offset
and stride) to an importing API.
Each memory buffer is referred to by a buffer handle, which may be unique or
duplicated within an image. For example, a ``DRM_FORMAT_NV12`` buffer may have
the luma and chroma buffers combined into a single memory buffer by use of the
per-plane offset parameters, or they may be completely separate allocations in
memory. For this reason, each import and allocation API must provide a separate
handle for each plane.
Each kernel subsystem has its own types and interfaces for buffer management.
DRM uses GEM buffer objects (BOs), V4L2 has its own references, etc. These types
are not portable between contexts, processes, devices, or subsystems.
To address this, ``dma-buf`` handles are used as the universal interchange for
buffers. Subsystem-specific operations are used to export native buffer handles
to a ``dma-buf`` file descriptor, and to import those file descriptors into a
native buffer handle. dma-buf file descriptors can be transferred between
contexts, processes, devices, and subsystems.
For example, a Wayland media player may use V4L2 to decode a video frame into a
``DRM_FORMAT_NV12`` buffer. This will result in two memory planes (luma and
chroma) being dequeued by the user from V4L2. These planes are then exported to
one dma-buf file descriptor per plane, these descriptors are then sent along
with the metadata (format, modifier, width, height, per-plane offset and stride)
to the Wayland server. The Wayland server will then import these file
descriptors as an EGLImage for use through EGL/OpenGL (ES), a VkImage for use
through Vulkan, or a KMS framebuffer object; each of these import operations
will take the same metadata and convert the dma-buf file descriptors into their
native buffer handles.
Having a non-empty intersection of supported modifiers does not guarantee that
import will succeed into all consumers; they may have constraints beyond those
implied by modifiers which must be satisfied.
Implicit modifiers
==================
The concept of modifiers post-dates all of the subsystems mentioned above. As
such, it has been retrofitted into all of these APIs, and in order to ensure
backwards compatibility, support is needed for drivers and userspace which do
not (yet) support modifiers.
As an example, GBM is used to allocate buffers to be shared between EGL for
rendering and KMS for display. It has two entrypoints for allocating buffers:
``gbm_bo_create`` which only takes the format, width, height, and a usage token,
and ``gbm_bo_create_with_modifiers`` which extends this with a list of modifiers.
In the latter case, the allocation is as discussed above, being provided with a
list of acceptable modifiers that the implementation can choose from (or fail if
it is not possible to allocate within those constraints). In the former case
where modifiers are not provided, the GBM implementation must make its own
choice as to what is likely to be the 'best' layout. Such a choice is entirely
implementation-specific: some will internally use tiled layouts which are not
CPU-accessible if the implementation decides that is a good idea through
whatever heuristic. It is the implementation's responsibility to ensure that
this choice is appropriate.
To support this case where the layout is not known because there is no awareness
of modifiers, a special ``DRM_FORMAT_MOD_INVALID`` token has been defined. This
pseudo-modifier declares that the layout is not known, and that the driver
should use its own logic to determine what the underlying layout may be.
.. note::
``DRM_FORMAT_MOD_INVALID`` is a non-zero value. The modifier value zero is
``DRM_FORMAT_MOD_LINEAR``, which is an explicit guarantee that the image
has the linear layout. Care and attention should be taken to ensure that
zero as a default value is not mixed up with either no modifier or the linear
modifier. Also note that in some APIs the invalid modifier value is specified
with an out-of-band flag, like in ``DRM_IOCTL_MODE_ADDFB2``.
There are four cases where this token may be used:
- during enumeration, an interface may return ``DRM_FORMAT_MOD_INVALID``, either
as the sole member of a modifier list to declare that explicit modifiers are
not supported, or as part of a larger list to declare that implicit modifiers
may be used
- during allocation, a user may supply ``DRM_FORMAT_MOD_INVALID``, either as the
sole member of a modifier list (equivalent to not supplying a modifier list
at all) to declare that explicit modifiers are not supported and must not be
used, or as part of a larger list to declare that an allocation using implicit
modifiers is acceptable
- in a post-allocation query, an implementation may return
``DRM_FORMAT_MOD_INVALID`` as the modifier of the allocated buffer to declare
that the underlying layout is implementation-defined and that an explicit
modifier description is not available; per the above rules, this may only be
returned when the user has included ``DRM_FORMAT_MOD_INVALID`` as part of the
list of acceptable modifiers, or not provided a list
- when importing a buffer, the user may supply ``DRM_FORMAT_MOD_INVALID`` as the
buffer modifier (or not supply a modifier) to indicate that the modifier is
unknown for whatever reason; this is only acceptable when the buffer has
not been allocated with an explicit modifier
It follows from this that for any single buffer, the complete chain of operations
formed by the producer and all the consumers must be either fully implicit or fully
explicit. For example, if a user wishes to allocate a buffer for use between
GPU, display, and media, but the media API does not support modifiers, then the
user **must not** allocate the buffer with explicit modifiers and attempt to
import the buffer into the media API with no modifier, but either perform the
allocation using implicit modifiers, or allocate the buffer for media use
separately and copy between the two buffers.
As one exception to the above, allocations may be 'upgraded' from implicit
to explicit modifiers. For example, if the buffer is allocated with
``gbm_bo_create`` (taking no modifiers), the user may then query the modifier with
``gbm_bo_get_modifier`` and then use this modifier as an explicit modifier token
if a valid modifier is returned.
When allocating buffers for exchange between different users and modifiers are
not available, implementations are strongly encouraged to use
``DRM_FORMAT_MOD_LINEAR`` for their allocation, as this is the universal baseline
for exchange. However, it is not guaranteed that this will result in the correct
interpretation of buffer content, as implicit modifier operation may still be
subject to driver-specific heuristics.
Any new users - userspace programs and protocols, kernel subsystems, etc -
wishing to exchange buffers must offer interoperability through dma-buf file
descriptors for memory planes, DRM format tokens to describe the format, DRM
format modifiers to describe the layout in memory, at least width and height for
dimensions, and at least offset and stride for each memory plane.
.. _zwp_linux_dmabuf_v1: https://gitlab.freedesktop.org/wayland/wayland-protocols/-/blob/main/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml
.. _VK_EXT_image_drm_format_modifier: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_image_drm_format_modifier.html
.. _EGL_EXT_image_dma_buf_import_modifiers: https://registry.khronos.org/EGL/extensions/EXT/EGL_EXT_image_dma_buf_import_modifiers.txt

View File

@ -22,6 +22,7 @@ place where this information is gathered.
unshare
spec_ctrl
accelerators/ocxl
dma-buf-alloc-exchange
ebpf/index
ELF
ioctl/index

View File

@ -1636,13 +1636,13 @@ F: drivers/gpu/drm/arm/display/include/
F: drivers/gpu/drm/arm/display/komeda/
ARM MALI PANFROST DRM DRIVER
M: Boris Brezillon <boris.brezillon@collabora.com>
M: Rob Herring <robh@kernel.org>
M: Tomeu Vizoso <tomeu.vizoso@collabora.com>
R: Steven Price <steven.price@arm.com>
R: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
L: dri-devel@lists.freedesktop.org
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/gpu/panfrost.rst
F: drivers/gpu/drm/panfrost/
F: include/uapi/drm/panfrost_drm.h
@ -6175,6 +6175,7 @@ L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/driver-api/dma-buf.rst
F: Documentation/userspace-api/dma-buf-alloc-exchange.rst
F: drivers/dma-buf/
F: include/linux/*fence.h
F: include/linux/dma-buf.h
@ -6667,6 +6668,7 @@ S: Maintained
B: https://gitlab.freedesktop.org/drm/msm/-/issues
T: git https://gitlab.freedesktop.org/drm/msm.git
F: Documentation/devicetree/bindings/display/msm/
F: drivers/gpu/drm/ci/xfails/msm*
F: drivers/gpu/drm/msm/
F: include/uapi/drm/msm_drm.h
@ -6818,7 +6820,8 @@ DRM DRIVER FOR SOLOMON SSD130X OLED DISPLAYS
M: Javier Martinez Canillas <javierm@redhat.com>
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml
F: Documentation/devicetree/bindings/display/solomon,ssd-common.yaml
F: Documentation/devicetree/bindings/display/solomon,ssd13*.yaml
F: drivers/gpu/drm/solomon/ssd130x*
DRM DRIVER FOR ST-ERICSSON MCDE
@ -6913,12 +6916,26 @@ M: Thomas Zimmermann <tzimmermann@suse.de>
S: Maintained
W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/
F: Documentation/devicetree/bindings/gpu/
F: Documentation/gpu/
F: drivers/gpu/drm/*
F: drivers/gpu/drm/
F: drivers/gpu/vga/
F: include/drm/drm*
F: include/drm/drm
F: include/linux/vga*
F: include/uapi/drm/drm*
F: include/uapi/drm/
X: drivers/gpu/drm/amd/
X: drivers/gpu/drm/armada/
X: drivers/gpu/drm/etnaviv/
X: drivers/gpu/drm/exynos/
X: drivers/gpu/drm/i915/
X: drivers/gpu/drm/kmb/
X: drivers/gpu/drm/mediatek/
X: drivers/gpu/drm/msm/
X: drivers/gpu/drm/nouveau/
X: drivers/gpu/drm/radeon/
X: drivers/gpu/drm/renesas/
X: drivers/gpu/drm/tegra/
DRM DRIVERS FOR ALLWINNER A10
M: Maxime Ripard <mripard@kernel.org>
@ -6939,6 +6956,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
F: Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
F: Documentation/gpu/meson.rst
F: drivers/gpu/drm/ci/xfails/meson*
F: drivers/gpu/drm/meson/
DRM DRIVERS FOR ATMEL HLCDC
@ -6962,7 +6980,9 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/bridge/
F: drivers/gpu/drm/bridge/
F: drivers/gpu/drm/drm_bridge.c
F: drivers/gpu/drm/drm_bridge_connector.c
F: include/drm/drm_bridge.h
F: include/drm/drm_bridge_connector.h
DRM DRIVERS FOR EXYNOS
M: Inki Dae <inki.dae@samsung.com>
@ -6986,10 +7006,12 @@ F: Documentation/devicetree/bindings/display/fsl,dcu.txt
F: Documentation/devicetree/bindings/display/fsl,tcon.txt
F: drivers/gpu/drm/fsl-dcu/
DRM DRIVERS FOR FREESCALE IMX
DRM DRIVERS FOR FREESCALE IMX 5/6
M: Philipp Zabel <p.zabel@pengutronix.de>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
T: git git://git.pengutronix.de/git/pza/linux
F: Documentation/devicetree/bindings/display/imx/
F: drivers/gpu/drm/imx/ipuv3/
F: drivers/gpu/ipu-v3/
@ -7008,7 +7030,7 @@ DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
M: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://github.com/patjak/drm-gma500
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/gma500/
DRM DRIVERS FOR HISILICON
@ -7047,6 +7069,7 @@ L: dri-devel@lists.freedesktop.org
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/display/mediatek/
F: drivers/gpu/drm/ci/xfails/mediatek*
F: drivers/gpu/drm/mediatek/
F: drivers/phy/mediatek/phy-mtk-dp.c
F: drivers/phy/mediatek/phy-mtk-hdmi*
@ -7087,6 +7110,7 @@ L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/rockchip/
F: drivers/gpu/drm/ci/xfails/rockchip*
F: drivers/gpu/drm/rockchip/
DRM DRIVERS FOR STI
@ -7183,7 +7207,7 @@ F: Documentation/devicetree/bindings/display/xlnx/
F: drivers/gpu/drm/xlnx/
DRM GPU SCHEDULER
M: Luben Tuikov <luben.tuikov@amd.com>
M: Luben Tuikov <ltuikov89@gmail.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@ -7192,6 +7216,7 @@ F: include/drm/gpu_scheduler.h
DRM PANEL DRIVERS
M: Neil Armstrong <neil.armstrong@linaro.org>
R: Jessica Zhang <quic_jesszhan@quicinc.com>
R: Sam Ravnborg <sam@ravnborg.org>
L: dri-devel@lists.freedesktop.org
S: Maintained
@ -9129,6 +9154,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F: Documentation/ABI/testing/debugfs-driver-habanalabs
F: Documentation/ABI/testing/sysfs-driver-habanalabs
F: drivers/accel/habanalabs/
F: include/linux/habanalabs/
F: include/trace/events/habanalabs.h
F: include/uapi/drm/habanalabs_accel.h
@ -10535,6 +10561,7 @@ C: irc://irc.oftc.net/intel-gfx
T: git git://anongit.freedesktop.org/drm-intel
F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
F: Documentation/gpu/i915.rst
F: drivers/gpu/drm/ci/xfails/i915*
F: drivers/gpu/drm/i915/
F: include/drm/i915*
F: include/uapi/drm/i915_drm.h
@ -13577,7 +13604,7 @@ F: drivers/usb/mtu3/
MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
M: Peter Senna Tschudin <peter.senna@gmail.com>
M: Martin Donnelly <martin.donnelly@ge.com>
M: Ian Ray <ian.ray@ge.com>
M: Martyn Welch <martyn.welch@collabora.co.uk>
S: Maintained
F: Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
@ -15414,6 +15441,7 @@ M: Laurentiu Palcu <laurentiu.palcu@oss.nxp.com>
R: Lucas Stach <l.stach@pengutronix.de>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
F: drivers/gpu/drm/imx/dcss/
@ -17927,6 +17955,7 @@ C: irc://irc.oftc.net/radeon
T: git https://gitlab.freedesktop.org/agd5f/linux.git
F: Documentation/gpu/amdgpu/
F: drivers/gpu/drm/amd/
F: drivers/gpu/drm/ci/xfails/amd*
F: drivers/gpu/drm/radeon/
F: include/uapi/drm/amdgpu_drm.h
F: include/uapi/drm/radeon_drm.h
@ -22907,6 +22936,7 @@ L: dri-devel@lists.freedesktop.org
L: virtualization@lists.linux-foundation.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/ci/xfails/virtio*
F: drivers/gpu/drm/virtio/
F: include/uapi/linux/virtio_gpu.h

View File

@ -835,6 +835,7 @@ CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
CONFIG_DRM_PANEL_LVDS=m
CONFIG_DRM_PANEL_SIMPLE=m
CONFIG_DRM_PANEL_EDP=m
CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m
CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
CONFIG_DRM_PANEL_SITRONIX_ST7703=m

View File

@ -8,17 +8,16 @@
#include <asm/page.h>
struct file;
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (efi_range_is_wc(vm_start, vm_end - vm_start))
return pgprot_writecombine(prot);
else
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
return pgprot_noncached(prot);
}
#define fb_pgprotect fb_pgprotect
#define pgprot_framebuffer pgprot_framebuffer
static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
{

View File

@ -5,26 +5,27 @@
#include <asm/page.h>
#include <asm/setup.h>
struct file;
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
#ifdef CONFIG_MMU
#ifdef CONFIG_SUN3
pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
pgprot_val(prot) |= SUN3_PAGE_NOCACHE;
#else
if (CPU_IS_020_OR_030)
pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
pgprot_val(prot) |= _PAGE_NOCACHE030;
if (CPU_IS_040_OR_060) {
pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
pgprot_val(prot) &= _CACHEMASK040;
/* Use no-cache mode, serialized */
pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
pgprot_val(prot) |= _PAGE_NOCACHE_S;
}
#endif /* CONFIG_SUN3 */
#endif /* CONFIG_MMU */
return prot;
}
#define fb_pgprotect fb_pgprotect
#define pgprot_framebuffer pgprot_framebuffer
#include <asm-generic/fb.h>

View File

@ -3,14 +3,13 @@
#include <asm/page.h>
struct file;
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
return pgprot_noncached(prot);
}
#define fb_pgprotect fb_pgprotect
#define pgprot_framebuffer pgprot_framebuffer
/*
* MIPS doesn't define __raw_ I/O macros, so the helpers

View File

@ -2,18 +2,20 @@
#ifndef _ASM_FB_H_
#define _ASM_FB_H_
#include <linux/fs.h>
#include <asm/page.h>
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
/*
* PowerPC's implementation of phys_mem_access_prot() does
* not use the file argument. Set it to NULL in preparation
* of later updates to the interface.
*/
return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot);
}
#define fb_pgprotect fb_pgprotect
#define pgprot_framebuffer pgprot_framebuffer
#include <asm-generic/fb.h>

View File

@ -4,15 +4,18 @@
#include <linux/io.h>
#include <asm/page.h>
struct fb_info;
struct file;
struct vm_area_struct;
#ifdef CONFIG_SPARC32
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
{ }
#define fb_pgprotect fb_pgprotect
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
return prot;
}
#define pgprot_framebuffer pgprot_framebuffer
#endif
int fb_is_primary_device(struct fb_info *info);

View File

@ -2,12 +2,14 @@
#ifndef _ASM_X86_FB_H
#define _ASM_X86_FB_H
struct fb_info;
struct file;
struct vm_area_struct;
#include <asm/page.h>
void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off);
#define fb_pgprotect fb_pgprotect
struct fb_info;
pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset);
#define pgprot_framebuffer pgprot_framebuffer
int fb_is_primary_device(struct fb_info *info);
#define fb_is_primary_device fb_is_primary_device

View File

@ -13,16 +13,17 @@
#include <linux/vgaarb.h>
#include <asm/fb.h>
void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off)
pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long vm_start, unsigned long vm_end,
unsigned long offset)
{
unsigned long prot;
prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
pgprot_val(prot) &= ~_PAGE_CACHE_MASK;
if (boot_cpu_data.x86 > 3)
pgprot_val(vma->vm_page_prot) =
prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
pgprot_val(prot) |= cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
return prot;
}
EXPORT_SYMBOL(fb_pgprotect);
EXPORT_SYMBOL(pgprot_framebuffer);
int fb_is_primary_device(struct fb_info *info)
{

View File

@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root;
static struct class *accel_class;
static struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
}
static const struct class accel_class = {
.name = "accel",
.devnode = accel_devnode,
};
static int accel_sysfs_init(void)
{
accel_class = class_create("accel");
if (IS_ERR(accel_class))
return PTR_ERR(accel_class);
accel_class->devnode = accel_devnode;
return 0;
return class_register(&accel_class);
}
static void accel_sysfs_destroy(void)
{
if (IS_ERR_OR_NULL(accel_class))
return;
class_destroy(accel_class);
accel_class = NULL;
class_unregister(&accel_class);
}
static int accel_name_info(struct seq_file *m, void *data)
@ -79,29 +74,30 @@ static const struct drm_info_list accel_debugfs_list[] = {
#define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
/**
* accel_debugfs_init() - Initialize debugfs for accel minor
* @minor: Pointer to the drm_minor instance.
* @minor_id: The minor's id
* accel_debugfs_init() - Initialize debugfs for device
* @dev: Pointer to the device instance.
*
* This function initializes the drm minor's debugfs members and creates
* a root directory for the minor in debugfs. It also creates common files
* for accelerators and calls the driver's debugfs init callback.
* This function creates a root directory for the device in debugfs.
*/
void accel_debugfs_init(struct drm_minor *minor, int minor_id)
void accel_debugfs_init(struct drm_device *dev)
{
struct drm_device *dev = minor->dev;
char name[64];
drm_debugfs_dev_init(dev, accel_debugfs_root);
}
INIT_LIST_HEAD(&minor->debugfs_list);
mutex_init(&minor->debugfs_lock);
sprintf(name, "%d", minor_id);
minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root);
/**
* accel_debugfs_register() - Register debugfs for device
* @dev: Pointer to the device instance.
*
* Creates common files for accelerators.
*/
void accel_debugfs_register(struct drm_device *dev)
{
struct drm_minor *minor = dev->accel;
minor->debugfs_root = dev->debugfs_root;
drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
if (dev->driver->debugfs_init)
dev->driver->debugfs_init(minor);
dev->debugfs_root, minor);
}
/**
@ -116,7 +112,7 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id)
void accel_set_device_instance_params(struct device *kdev, int index)
{
kdev->devt = MKDEV(ACCEL_MAJOR, index);
kdev->class = accel_class;
kdev->class = &accel_class;
kdev->type = &accel_sysfs_device_minor;
}

View File

@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
return rc;
}
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
union hl_cb_args *args = data;
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev;
union hl_cb_args *args = data;
u64 handle = 0, device_va = 0;
enum hl_device_status status;
u32 usage_cnt = 0;

View File

@ -31,6 +31,24 @@ enum hl_cs_wait_status {
CS_WAIT_STATUS_GONE
};
/*
* Data used while handling wait/timestamp nodes.
* The purpose of this struct is to store the needed data for both operations
* in one variable instead of passing large number of arguments to functions.
*/
struct wait_interrupt_data {
struct hl_user_interrupt *interrupt;
struct hl_mmap_mem_buf *buf;
struct hl_mem_mgr *mmg;
struct hl_cb *cq_cb;
u64 ts_handle;
u64 ts_offset;
u64 cq_handle;
u64 cq_offset;
u64 target_value;
u64 intr_timeout_us;
};
static void job_wq_completion(struct work_struct *work);
static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
enum hl_cs_wait_status *status, s64 *timestamp);
@ -1079,19 +1097,22 @@ static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{
struct hl_user_pending_interrupt *pend, *temp;
unsigned long flags;
spin_lock(&interrupt->wait_list_lock);
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
if (pend->ts_reg_info.buf) {
list_del(&pend->wait_list_node);
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
hl_cb_put(pend->ts_reg_info.cq_cb);
} else {
pend->fence.error = -EIO;
complete_all(&pend->fence.completion);
}
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
pend->fence.error = -EIO;
complete_all(&pend->fence.completion);
}
spin_unlock(&interrupt->wait_list_lock);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
list_del(&pend->list_node);
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
hl_cb_put(pend->ts_reg_info.cq_cb);
}
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
}
void hl_release_pending_user_interrupts(struct hl_device *hdev)
@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
/* Need to wait for restore completion before execution phase */
if (num_chunks) {
enum hl_cs_wait_status status;
wait_again:
ret = _hl_cs_wait_ioctl(hdev, ctx,
jiffies_to_usecs(hdev->timeout_jiffies),
*cs_seq, &status, NULL);
if (ret) {
if (ret == -ERESTARTSYS) {
usleep_range(100, 200);
goto wait_again;
}
dev_err(hdev->dev,
"Restore CS for context %d failed to complete %d\n",
ctx->asid, ret);
@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
return 0;
}
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
union hl_cs_args *args = data;
enum hl_cs_type cs_type = 0;
u64 cs_seq = ULONG_MAX;
@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
struct hl_cb *cq_cb,
u64 ts_offset, u64 cq_offset, u64 target_value,
spinlock_t *wait_list_lock,
struct hl_user_pending_interrupt **pend)
static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
{
struct hl_ts_buff *ts_buff = buf->private;
struct hl_user_pending_interrupt *requested_offset_record =
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
ts_offset;
struct hl_user_pending_interrupt *cb_last =
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
record->ts_reg_info.cq_cb = cq_cb;
record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
record->cq_target_value = target_value;
}
static int validate_and_get_ts_record(struct device *dev,
struct hl_ts_buff *ts_buff, u64 ts_offset,
struct hl_user_pending_interrupt **req_event_record)
{
struct hl_user_pending_interrupt *ts_cb_last;
*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
ts_offset;
ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
unsigned long iter_counter = 0;
u64 current_cq_counter;
ktime_t timestamp;
/* Validate ts_offset not exceeding last max */
if (requested_offset_record >= cb_last) {
dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
(u64)(uintptr_t)cb_last);
if (*req_event_record >= ts_cb_last) {
dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
ts_offset, (u64)(uintptr_t)ts_cb_last);
return -EINVAL;
}
timestamp = ktime_get();
start_over:
spin_lock(wait_list_lock);
/* Unregister only if we didn't reach the target value
* since in this case there will be no handling in irq context
* and then it's safe to delete the node out of the interrupt list
* then re-use it on other interrupt
*/
if (requested_offset_record->ts_reg_info.in_use) {
current_cq_counter = *requested_offset_record->cq_kernel_addr;
if (current_cq_counter < requested_offset_record->cq_target_value) {
list_del(&requested_offset_record->wait_list_node);
spin_unlock(wait_list_lock);
hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
dev_dbg(buf->mmg->dev,
"ts node removed from interrupt list now can re-use\n");
} else {
dev_dbg(buf->mmg->dev,
"ts node in middle of irq handling\n");
/* irq thread handling in the middle give it time to finish */
spin_unlock(wait_list_lock);
usleep_range(100, 1000);
if (++iter_counter == MAX_TS_ITER_NUM) {
dev_err(buf->mmg->dev,
"Timestamp offset processing reached timeout of %lld ms\n",
ktime_ms_delta(ktime_get(), timestamp));
return -EAGAIN;
}
goto start_over;
}
} else {
/* Fill up the new registration node info */
requested_offset_record->ts_reg_info.buf = buf;
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
(u64 *) ts_buff->user_buff_address + ts_offset;
requested_offset_record->cq_kernel_addr =
(u64 *) cq_cb->kernel_address + cq_offset;
requested_offset_record->cq_target_value = target_value;
spin_unlock(wait_list_lock);
}
*pend = requested_offset_record;
dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
requested_offset_record);
return 0;
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
u64 target_value, struct hl_user_interrupt *interrupt,
bool register_ts_record, u64 ts_handle, u64 ts_offset,
static void unregister_timestamp_node(struct hl_device *hdev,
struct hl_user_pending_interrupt *record, bool need_lock)
{
struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
bool ts_rec_found = false;
unsigned long flags;
if (need_lock)
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
if (record->ts_reg_info.in_use) {
record->ts_reg_info.in_use = false;
list_del(&record->list_node);
ts_rec_found = true;
}
if (need_lock)
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
/* Put refcounts that were taken when we registered the event */
if (ts_rec_found) {
hl_mmap_mem_buf_put(record->ts_reg_info.buf);
hl_cb_put(record->ts_reg_info.cq_cb);
}
}
static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
struct wait_interrupt_data *data, unsigned long *flags,
struct hl_user_pending_interrupt **pend)
{
struct hl_user_pending_interrupt *req_offset_record;
struct hl_ts_buff *ts_buff = data->buf->private;
bool need_lock = false;
int rc;
rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
&req_offset_record);
if (rc)
return rc;
/* In case the node already registered, need to unregister first then re-use */
if (req_offset_record->ts_reg_info.in_use) {
dev_dbg(data->buf->mmg->dev,
"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
req_offset_record,
req_offset_record->ts_reg_info.interrupt->interrupt_id,
req_offset_record->ts_reg_info.timestamp_kernel_addr,
data->interrupt->interrupt_id);
/*
* Since interrupt here can be different than the one the node currently registered
* on, and we don't want to lock two lists while we're doing unregister, so
* unlock the new interrupt wait list here and acquire the lock again after you done
*/
if (data->interrupt->interrupt_id !=
req_offset_record->ts_reg_info.interrupt->interrupt_id) {
need_lock = true;
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
}
unregister_timestamp_node(hdev, req_offset_record, need_lock);
if (need_lock)
spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
}
/* Fill up the new registration node info and add it to the list */
req_offset_record->ts_reg_info.in_use = true;
req_offset_record->ts_reg_info.buf = data->buf;
req_offset_record->ts_reg_info.timestamp_kernel_addr =
(u64 *) ts_buff->user_buff_address + data->ts_offset;
req_offset_record->ts_reg_info.interrupt = data->interrupt;
set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
data->target_value);
*pend = req_offset_record;
return rc;
}
static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct wait_interrupt_data *data,
u32 *status, u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
unsigned long timeout;
long completion_rc;
unsigned long flags;
int rc = 0;
timeout = hl_usecs64_to_jiffies(timeout_us);
hl_ctx_get(ctx);
cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
if (!cq_cb) {
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
if (!data->cq_cb) {
rc = -EINVAL;
goto put_ctx;
}
/* Validate the cq offset */
if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
rc = -EINVAL;
goto put_cq_cb;
}
if (register_ts_record) {
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
interrupt->interrupt_id, ts_offset, cq_counters_offset);
buf = hl_mmap_mem_buf_get(mmg, ts_handle);
if (!buf) {
rc = -EINVAL;
goto put_cq_cb;
}
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
data->interrupt->interrupt_id, data->ts_handle,
data->ts_offset, data->cq_offset);
/* get ts buffer record */
rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
cq_counters_offset, target_value,
&interrupt->wait_list_lock, &pend);
if (rc)
goto put_ts_buff;
} else {
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
rc = -ENOMEM;
goto put_cq_cb;
}
hl_fence_init(&pend->fence, ULONG_MAX);
pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
pend->cq_target_value = target_value;
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
if (!data->buf) {
rc = -EINVAL;
goto put_cq_cb;
}
spin_lock(&interrupt->wait_list_lock);
spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
/* get ts buffer record */
rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
if (rc) {
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
goto put_ts_buff;
}
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
* before we add the timestamp node to the ts list.
*/
if (*pend->cq_kernel_addr >= target_value) {
if (register_ts_record)
pend->ts_reg_info.in_use = 0;
spin_unlock(&interrupt->wait_list_lock);
if (*pend->cq_kernel_addr >= data->target_value) {
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
pend, data->ts_offset, data->interrupt->interrupt_id);
pend->ts_reg_info.in_use = 0;
*status = HL_WAIT_CS_STATUS_COMPLETED;
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
goto put_ts_buff;
}
list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
hl_ctx_put(ctx);
return rc;
put_ts_buff:
hl_mmap_mem_buf_put(data->buf);
put_cq_cb:
hl_cb_put(data->cq_cb);
put_ctx:
hl_ctx_put(ctx);
return rc;
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct wait_interrupt_data *data,
u32 *status, u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
unsigned long timeout, flags;
long completion_rc;
int rc = 0;
timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
hl_ctx_get(ctx);
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
if (!data->cq_cb) {
rc = -EINVAL;
goto put_ctx;
}
/* Validate the cq offset */
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
rc = -EINVAL;
goto put_cq_cb;
}
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
rc = -ENOMEM;
goto put_cq_cb;
}
hl_fence_init(&pend->fence, ULONG_MAX);
pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
pend->cq_target_value = data->target_value;
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
/* We check for completion value as interrupt could have been received
* before we add the wait node to the wait list.
*/
if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
if (*pend->cq_kernel_addr >= data->target_value)
*status = HL_WAIT_CS_STATUS_COMPLETED;
else
*status = HL_WAIT_CS_STATUS_BUSY;
if (register_ts_record) {
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
goto put_ts_buff;
} else {
pend->fence.timestamp = ktime_get();
goto set_timestamp;
}
} else if (!timeout_us) {
spin_unlock(&interrupt->wait_list_lock);
*status = HL_WAIT_CS_STATUS_BUSY;
pend->fence.timestamp = ktime_get();
goto set_timestamp;
}
@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
* Note that we cannot have sorted list by target value,
* in order to shorten the list pass loop, since
* same list could have nodes for different cq counter handle.
* Note:
* Mark ts buff offset as in use here in the spinlock protection area
* to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
* before adding the node to the list. this scenario might happen when
* multiple threads are racing on same offset and one thread could
* set the ts buff in ts_buff_get_kernel_ts_record then the other thread
* takes over and get to ts_buff_get_kernel_ts_record and then we will try
* to re-use the same ts buff offset, and will try to delete a non existing
* node from the list.
*/
if (register_ts_record)
pend->ts_reg_info.in_use = 1;
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock(&interrupt->wait_list_lock);
if (register_ts_record) {
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
goto ts_registration_exit;
}
list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
/* Wait for interrupt handler to signal completion */
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
timeout);
if (completion_rc > 0) {
*status = HL_WAIT_CS_STATUS_COMPLETED;
if (pend->fence.error == -EIO) {
dev_err_ratelimited(hdev->dev,
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
pend->fence.error);
rc = -EIO;
*status = HL_WAIT_CS_STATUS_ABORTED;
} else {
*status = HL_WAIT_CS_STATUS_COMPLETED;
}
} else {
if (completion_rc == -ERESTARTSYS) {
dev_err_ratelimited(hdev->dev,
"user process got signal while waiting for interrupt ID %d\n",
interrupt->interrupt_id);
data->interrupt->interrupt_id);
rc = -EINTR;
*status = HL_WAIT_CS_STATUS_ABORTED;
} else {
if (pend->fence.error == -EIO) {
dev_err_ratelimited(hdev->dev,
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
pend->fence.error);
rc = -EIO;
*status = HL_WAIT_CS_STATUS_ABORTED;
} else {
/* The wait has timed-out. We don't know anything beyond that
* because the workload wasn't submitted through the driver.
* Therefore, from driver's perspective, the workload is still
* executing.
*/
rc = 0;
*status = HL_WAIT_CS_STATUS_BUSY;
}
/* The wait has timed-out. We don't know anything beyond that
* because the workload was not submitted through the driver.
* Therefore, from driver's perspective, the workload is still
* executing.
*/
rc = 0;
*status = HL_WAIT_CS_STATUS_BUSY;
}
}
@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
* for ts record, the node will be deleted in the irq handler after
* we reach the target value.
*/
spin_lock(&interrupt->wait_list_lock);
list_del(&pend->wait_list_node);
spin_unlock(&interrupt->wait_list_lock);
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
list_del(&pend->list_node);
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
set_timestamp:
*timestamp = ktime_to_ns(pend->fence.timestamp);
kfree(pend);
hl_cb_put(cq_cb);
ts_registration_exit:
hl_cb_put(data->cq_cb);
hl_ctx_put(ctx);
return rc;
put_ts_buff:
hl_mmap_mem_buf_put(buf);
put_cq_cb:
hl_cb_put(cq_cb);
hl_cb_put(data->cq_cb);
put_ctx:
hl_ctx_put(ctx);
@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
unsigned long timeout;
unsigned long timeout, flags;
u64 completion_value;
long completion_rc;
int rc = 0;
@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
*/
spin_lock(&interrupt->wait_list_lock);
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock(&interrupt->wait_list_lock);
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_add_tail(&pend->list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
@ -3507,14 +3579,14 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
* If comparison fails, keep waiting until timeout expires
*/
if (completion_rc > 0) {
spin_lock(&interrupt->wait_list_lock);
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
/* reinit_completion must be called before we check for user
* completion value, otherwise, if interrupt is received after
* the comparison and before the next wait_for_completion,
* we will reach timeout and fail
*/
reinit_completion(&pend->fence.completion);
spin_unlock(&interrupt->wait_list_lock);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
@ -3551,9 +3623,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
}
remove_pending_user_interrupt:
spin_lock(&interrupt->wait_list_lock);
list_del(&pend->wait_list_node);
spin_unlock(&interrupt->wait_list_lock);
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_del(&pend->list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*timestamp = ktime_to_ns(pend->fence.timestamp);
@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return -EINVAL;
}
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
args->in.cq_counters_offset,
args->in.target, interrupt,
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
args->in.timestamp_handle, args->in.timestamp_offset,
&status, &timestamp);
else
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
struct wait_interrupt_data wait_intr_data = {0};
wait_intr_data.interrupt = interrupt;
wait_intr_data.mmg = &hpriv->mem_mgr;
wait_intr_data.cq_handle = args->in.cq_counters_handle;
wait_intr_data.cq_offset = args->in.cq_counters_offset;
wait_intr_data.ts_handle = args->in.timestamp_handle;
wait_intr_data.ts_offset = args->in.timestamp_offset;
wait_intr_data.target_value = args->in.target;
wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
/*
* Allow only one registration at a time. this is needed in order to prevent
* issues while handling the flow of re-use of the same offset.
* Since the registration flow is protected only by the interrupt lock,
* re-use flow might request to move ts node to another interrupt list,
* and in such case we're not protected.
*/
mutex_lock(&hpriv->ctx->ts_reg_lock);
rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
&status, &timestamp);
mutex_unlock(&hpriv->ctx->ts_reg_lock);
} else
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
&status, &timestamp);
} else {
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt, &status,
&timestamp);
}
if (rc)
return rc;
@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev;
union hl_wait_cs_args *args = data;
u32 flags = args->in.flags;

View File

@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
kfree(ctx->cs_pending);
if (ctx->asid != HL_KERNEL_ASID_ID) {
dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
mutex_destroy(&ctx->ts_reg_lock);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
@ -198,6 +199,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
char task_comm[TASK_COMM_LEN];
int rc = 0, i;
ctx->hdev = hdev;
@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
get_task_comm(task_comm, current), ctx->asid);
}
return 0;

View File

@ -18,8 +18,6 @@
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
#define I2C_MAX_TRANSACTION_LEN 8
static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, u8 i2c_len, u64 *val)
{
@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
{
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
dev_entry->root = hdev->drm.accel->debugfs_root;
add_files_to_device(hdev, dev_entry, dev_entry->root);
if (!hdev->asic_prop.fw_security_enabled)
add_secured_nodes(dev_entry, dev_entry->root);
}
void hl_debugfs_remove_device(struct hl_device *hdev)
{
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
debugfs_remove_recursive(entry->root);
}
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
{
struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
up_write(&dev_entry->state_dump_sem);
}
void __init hl_debugfs_init(void)
{
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
}
void hl_debugfs_fini(void)
{
debugfs_remove_recursive(hl_debug_root);
}

View File

@ -14,11 +14,14 @@
#include <linux/hwmon.h>
#include <linux/vmalloc.h>
#include <drm/drm_accel.h>
#include <drm/drm_drv.h>
#include <trace/events/habanalabs.h>
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5
#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30
enum dma_alloc_type {
DMA_ALLOC_COHERENT,
@ -185,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir, const char *caller)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
int rc, i;
rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
if (rc)
return rc;
if (!trace_habanalabs_dma_map_page_enabled())
return 0;
for_each_sgtable_dma_sg(sgt, sg, i)
trace_habanalabs_dma_map_page(hdev->dev,
page_to_phys(sg_page(sg)),
sg->dma_address - prop->device_dma_offset_for_host_access,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length,
#else
sg->length,
#endif
dir, caller);
return 0;
}
int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
@ -203,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da
return 0;
}
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir, const char *caller)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
int i;
hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
if (trace_habanalabs_dma_unmap_page_enabled()) {
for_each_sgtable_dma_sg(sgt, sg, i)
trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
sg->dma_address - prop->device_dma_offset_for_host_access,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length,
#else
sg->length,
#endif
dir, caller);
}
}
void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
@ -315,7 +370,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
if (hdev->reset_info.in_reset) {
if (hdev->device_fini_pending) {
status = HL_DEVICE_STATUS_MALFUNCTION;
} else if (hdev->reset_info.in_reset) {
if (hdev->reset_info.in_compute_reset)
status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
else
@ -343,9 +400,9 @@ bool hl_device_operational(struct hl_device *hdev,
*status = current_status;
switch (current_status) {
case HL_DEVICE_STATUS_MALFUNCTION:
case HL_DEVICE_STATUS_IN_RESET:
case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
case HL_DEVICE_STATUS_MALFUNCTION:
case HL_DEVICE_STATUS_NEEDS_RESET:
return false;
case HL_DEVICE_STATUS_OPERATIONAL:
@ -406,8 +463,6 @@ static void hpriv_release(struct kref *ref)
hdev->asic_funcs->send_device_activity(hdev, false);
put_pid(hpriv->taskpid);
hl_debugfs_remove_file(hpriv);
mutex_destroy(&hpriv->ctx_lock);
@ -424,7 +479,7 @@ static void hpriv_release(struct kref *ref)
/* Check the device idle status and reset if not idle.
* Skip it if already in reset, or if device is going to be reset in any case.
*/
if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm)
if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
if (!device_is_idle) {
@ -446,14 +501,18 @@ static void hpriv_release(struct kref *ref)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);
put_pid(hpriv->taskpid);
if (reset_device) {
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
} else {
/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
int rc = hdev->asic_funcs->scrub_device_mem(hdev);
if (rc)
if (rc) {
dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
}
}
/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
@ -516,24 +575,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
}
/*
* hl_device_release - release function for habanalabs device
*
* @inode: pointer to inode structure
* @filp: pointer to file structure
* hl_device_release() - release function for habanalabs device.
* @ddev: pointer to DRM device structure.
* @file: pointer to DRM file private data structure.
*
* Called when process closes an habanalabs device
*/
static int hl_device_release(struct inode *inode, struct file *filp)
void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = filp->private_data;
struct hl_device *hdev = hpriv->hdev;
filp->private_data = NULL;
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = to_hl_device(ddev);
if (!hdev) {
pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
put_pid(hpriv->taskpid);
return 0;
}
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
@ -551,8 +606,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
}
hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
return 0;
}
static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
@ -571,11 +624,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
out:
/* release the eventfd */
if (hpriv->notifier_event.eventfd)
eventfd_ctx_put(hpriv->notifier_event.eventfd);
mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);
kfree(hpriv);
@ -583,18 +631,8 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
return 0;
}
/*
* hl_mmap - mmap function for habanalabs device
*
* @*filp: pointer to file structure
* @*vma: pointer to vm_area_struct of the process
*
* Called when process does an mmap on habanalabs device. Call the relevant mmap
* function at the end of the common code.
*/
static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{
struct hl_fpriv *hpriv = filp->private_data;
struct hl_device *hdev = hpriv->hdev;
unsigned long vm_pgoff;
@ -617,14 +655,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
return -EINVAL;
}
static const struct file_operations hl_ops = {
.owner = THIS_MODULE,
.open = hl_device_open,
.release = hl_device_release,
.mmap = hl_mmap,
.unlocked_ioctl = hl_ioctl,
.compat_ioctl = hl_ioctl
};
/*
* hl_mmap - mmap function for habanalabs device
*
* @*filp: pointer to file structure
* @*vma: pointer to vm_area_struct of the process
*
* Called when process does an mmap on habanalabs device. Call the relevant mmap
* function at the end of the common code.
*/
int hl_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct drm_file *file_priv = filp->private_data;
struct hl_fpriv *hpriv = file_priv->driver_priv;
return __hl_mmap(hpriv, vma);
}
static const struct file_operations hl_ctrl_ops = {
.owner = THIS_MODULE,
@ -645,14 +691,14 @@ static void device_release_func(struct device *dev)
* @hdev: pointer to habanalabs device structure
* @class: pointer to the class object of the device
* @minor: minor number of the specific device
* @fpos: file operations to install for this device
* @fops: file operations to install for this device
* @name: name of the device as it will appear in the filesystem
* @cdev: pointer to the char device object that will be initialized
* @dev: pointer to the device object that will be initialized
*
* Initialize a cdev and a Linux device for habanalabs's device.
*/
static int device_init_cdev(struct hl_device *hdev, struct class *class,
static int device_init_cdev(struct hl_device *hdev, const struct class *class,
int minor, const struct file_operations *fops,
char *name, struct cdev *cdev,
struct device **dev)
@ -676,23 +722,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,
static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
{
const struct class *accel_class = hdev->drm.accel->kdev->class;
char name[32];
int rc;
rc = cdev_device_add(&hdev->cdev, hdev->dev);
if (rc) {
dev_err(hdev->dev,
"failed to add a char device to the system\n");
hdev->cdev_idx = hdev->drm.accel->index;
/* Initialize cdev and device structures for the control device */
snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx);
rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name,
&hdev->cdev_ctrl, &hdev->dev_ctrl);
if (rc)
return rc;
}
rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
if (rc) {
dev_err(hdev->dev,
"failed to add a control char device to the system\n");
goto delete_cdev_device;
dev_err(hdev->dev_ctrl,
"failed to add an accel control char device to the system\n");
goto free_ctrl_device;
}
/* hl_sysfs_init() must be done after adding the device to the system */
rc = hl_sysfs_init(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize sysfs\n");
@ -707,23 +756,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
delete_ctrl_cdev_device:
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
delete_cdev_device:
cdev_device_del(&hdev->cdev, hdev->dev);
free_ctrl_device:
put_device(hdev->dev_ctrl);
return rc;
}
static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
{
if (!hdev->cdev_sysfs_debugfs_created)
goto put_devices;
return;
hl_debugfs_remove_device(hdev);
hl_sysfs_fini(hdev);
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
cdev_device_del(&hdev->cdev, hdev->dev);
put_devices:
put_device(hdev->dev);
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
put_device(hdev->dev_ctrl);
}
@ -996,6 +1041,20 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
}
static void hl_device_eq_heartbeat(struct hl_device *hdev)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (!prop->cpucp_info.eq_health_check_supported)
return;
if (hdev->eq_heartbeat_received)
hdev->eq_heartbeat_received = false;
else
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
}
static void hl_device_heartbeat(struct work_struct *work)
{
struct hl_device *hdev = container_of(work, struct hl_device,
@ -1003,9 +1062,16 @@ static void hl_device_heartbeat(struct work_struct *work)
struct hl_info_fw_err_info info = {0};
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
if (!hl_device_operational(hdev, NULL))
/* Start heartbeat checks only after driver has enabled events from FW */
if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
goto reschedule;
/*
* For EQ health check need to check if driver received the heartbeat eq event
* in order to validate the eq is working.
*/
hl_device_eq_heartbeat(hdev);
if (!hdev->asic_funcs->send_heartbeat(hdev))
goto reschedule;
@ -1062,7 +1128,15 @@ static int device_late_init(struct hl_device *hdev)
hdev->high_pll = hdev->asic_prop.high_pll;
if (hdev->heartbeat) {
/*
* Before scheduling the heartbeat driver will check if eq event has received.
* for the first schedule we need to set the indication as true then for the next
* one this indication will be true only if eq event was sent by FW.
*/
hdev->eq_heartbeat_received = true;
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
}
@ -1302,18 +1376,18 @@ int hl_device_resume(struct hl_device *hdev)
static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
{
struct task_struct *task = NULL;
struct list_head *fd_list;
struct hl_fpriv *hpriv;
struct mutex *fd_lock;
struct list_head *hpriv_list;
struct hl_fpriv *hpriv;
struct mutex *hpriv_lock;
u32 pending_cnt;
fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
/* Giving time for user to close FD, and for processes that are inside
* hl_device_open to finish
*/
if (!list_empty(fd_list))
if (!list_empty(hpriv_list))
ssleep(1);
if (timeout) {
@ -1329,12 +1403,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
}
}
mutex_lock(fd_lock);
mutex_lock(hpriv_lock);
/* This section must be protected because we are dereferencing
* pointers that are freed if the process exits
*/
list_for_each_entry(hpriv, fd_list, dev_node) {
list_for_each_entry(hpriv, hpriv_list, dev_node) {
task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
if (task) {
dev_info(hdev->dev, "Killing user process pid=%d\n",
@ -1344,17 +1418,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
put_task_struct(task);
} else {
/*
* If we got here, it means that process was killed from outside the driver
* right after it started looping on fd_list and before get_pid_task, thus
* we don't need to kill it.
*/
dev_dbg(hdev->dev,
"Can't get task struct for user process, assuming process was killed from outside the driver\n");
"Can't get task struct for user process %d, process was killed from outside the driver\n",
pid_nr(hpriv->taskpid));
}
}
mutex_unlock(fd_lock);
mutex_unlock(hpriv_lock);
/*
* We killed the open users, but that doesn't mean they are closed.
@ -1366,7 +1436,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
*/
wait_for_processes:
while ((!list_empty(fd_list)) && (pending_cnt)) {
while ((!list_empty(hpriv_list)) && (pending_cnt)) {
dev_dbg(hdev->dev,
"Waiting for all unmap operations to finish before hard reset\n");
@ -1376,7 +1446,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
}
/* All processes exited successfully */
if (list_empty(fd_list))
if (list_empty(hpriv_list))
return 0;
/* Give up waiting for processes to exit */
@ -1390,17 +1460,17 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
{
struct list_head *fd_list;
struct list_head *hpriv_list;
struct hl_fpriv *hpriv;
struct mutex *fd_lock;
struct mutex *hpriv_lock;
fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
mutex_lock(fd_lock);
list_for_each_entry(hpriv, fd_list, dev_node)
mutex_lock(hpriv_lock);
list_for_each_entry(hpriv, hpriv_list, dev_node)
hpriv->hdev = NULL;
mutex_unlock(fd_lock);
mutex_unlock(hpriv_lock);
}
static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
@ -1916,7 +1986,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
}
ctx = hl_get_compute_ctx(hdev);
if (!ctx || !ctx->hpriv->notifier_event.eventfd)
if (!ctx)
goto device_reset;
/*
* There is no point in postponing the reset if user is not registered for events.
* However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
* just implies that user has unregistered as part of handling a previous event. In this
* case an immediate reset is not required.
*/
if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
goto device_reset;
/* Schedule the device release watchdog work unless reset is already in progress or if the
@ -1928,8 +2007,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
goto device_reset;
}
if (hdev->reset_info.watchdog_active)
if (hdev->reset_info.watchdog_active) {
hdev->device_release_watchdog_work.flags |= flags;
goto out;
}
hdev->device_release_watchdog_work.flags = flags;
dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
@ -1990,59 +2071,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
hl_notifier_event_send(&hpriv->notifier_event, event_mask);
mutex_unlock(&hdev->fpriv_list_lock);
/* control device */
mutex_lock(&hdev->fpriv_ctrl_list_lock);
list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
hl_notifier_event_send(&hpriv->notifier_event, event_mask);
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
}
static int create_cdev(struct hl_device *hdev)
{
char *name;
int rc;
hdev->cdev_idx = hdev->id / 2;
name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
if (!name) {
rc = -ENOMEM;
goto out_err;
}
/* Initialize cdev and device structures */
rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
&hdev->cdev, &hdev->dev);
kfree(name);
if (rc)
goto out_err;
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
if (!name) {
rc = -ENOMEM;
goto free_dev;
}
/* Initialize cdev and device structures for control device */
rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
kfree(name);
if (rc)
goto free_dev;
return 0;
free_dev:
put_device(hdev->dev);
out_err:
return rc;
}
/*
@ -2057,16 +2085,14 @@ static int create_cdev(struct hl_device *hdev)
int hl_device_init(struct hl_device *hdev)
{
int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
struct hl_ts_free_jobs *free_jobs_data;
bool expose_interfaces_on_err = false;
rc = create_cdev(hdev);
if (rc)
goto out_disabled;
void *p;
/* Initialize ASIC function pointers and perform early init */
rc = device_early_init(hdev);
if (rc)
goto free_dev;
goto out_disabled;
user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
hdev->asic_prop.user_interrupt_count;
@ -2078,15 +2104,43 @@ int hl_device_init(struct hl_device *hdev)
rc = -ENOMEM;
goto early_fini;
}
/* Timestamp records supported only if CQ supported in device */
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
for (i = 0 ; i < user_interrupt_cnt ; i++) {
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
sizeof(struct timestamp_reg_free_node));
if (!p) {
rc = -ENOMEM;
goto free_usr_intr_mem;
}
free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
free_jobs_data->free_nodes_pool = p;
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
free_jobs_data->next_avail_free_node_idx = 0;
}
}
}
free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
sizeof(struct timestamp_reg_free_node));
if (!p) {
rc = -ENOMEM;
goto free_usr_intr_mem;
}
free_jobs_data->free_nodes_pool = p;
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
free_jobs_data->next_avail_free_node_idx = 0;
/*
* Start calling ASIC initialization. First S/W then H/W and finally
* late init
*/
rc = hdev->asic_funcs->sw_init(hdev);
if (rc)
goto free_usr_intr_mem;
goto free_common_usr_intr_mem;
/* initialize completion structure for multi CS wait */
@ -2253,6 +2307,14 @@ int hl_device_init(struct hl_device *hdev)
* From here there is no need to expose them in case of an error.
*/
expose_interfaces_on_err = false;
rc = drm_dev_register(&hdev->drm, 0);
if (rc) {
dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
rc = 0;
goto out_disabled;
}
rc = cdev_sysfs_debugfs_add(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
@ -2284,8 +2346,6 @@ int hl_device_init(struct hl_device *hdev)
"Successfully added device %s to habanalabs driver\n",
dev_name(&(hdev)->pdev->dev));
hdev->init_done = true;
/* After initialization is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events and if
* those events are fatal, we won't know about it and the device will
@ -2293,6 +2353,8 @@ int hl_device_init(struct hl_device *hdev)
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
hdev->init_done = true;
return 0;
cb_pool_fini:
@ -2317,19 +2379,27 @@ int hl_device_init(struct hl_device *hdev)
hl_hw_queues_destroy(hdev);
sw_fini:
hdev->asic_funcs->sw_fini(hdev);
free_common_usr_intr_mem:
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
free_usr_intr_mem:
kfree(hdev->user_interrupt);
if (user_interrupt_cnt) {
for (i = 0 ; i < user_interrupt_cnt ; i++) {
if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
break;
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
}
kfree(hdev->user_interrupt);
}
early_fini:
device_early_fini(hdev);
free_dev:
put_device(hdev->dev_ctrl);
put_device(hdev->dev);
out_disabled:
hdev->disabled = true;
if (expose_interfaces_on_err)
if (expose_interfaces_on_err) {
drm_dev_register(&hdev->drm, 0);
cdev_sysfs_debugfs_add(hdev);
dev_err(&hdev->pdev->dev,
"Failed to initialize hl%d. Device %s is NOT usable !\n",
}
pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
hdev->cdev_idx, dev_name(&hdev->pdev->dev));
return rc;
@ -2344,12 +2414,13 @@ int hl_device_init(struct hl_device *hdev)
*/
void hl_device_fini(struct hl_device *hdev)
{
u32 user_interrupt_cnt;
bool device_in_reset;
ktime_t timeout;
u64 reset_sec;
int i, rc;
dev_info(hdev->dev, "Removing device\n");
dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));
hdev->device_fini_pending = 1;
flush_delayed_work(&hdev->device_reset_work.reset_work);
@ -2425,14 +2496,14 @@ void hl_device_fini(struct hl_device *hdev)
hdev->process_kill_trial_cnt = 0;
rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
if (rc) {
dev_crit(hdev->dev, "Failed to kill all open processes\n");
dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
device_disable_open_processes(hdev, false);
}
hdev->process_kill_trial_cnt = 0;
rc = device_kill_open_processes(hdev, 0, true);
if (rc) {
dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
device_disable_open_processes(hdev, true);
}
@ -2464,7 +2535,20 @@ void hl_device_fini(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
kfree(hdev->user_interrupt);
user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
hdev->asic_prop.user_interrupt_count;
if (user_interrupt_cnt) {
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
for (i = 0 ; i < user_interrupt_cnt ; i++)
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
}
kfree(hdev->user_interrupt);
}
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
hl_hw_queues_destroy(hdev);
@ -2475,6 +2559,7 @@ void hl_device_fini(struct hl_device *hdev)
/* Hide devices and sysfs/debugfs files from user */
cdev_sysfs_debugfs_remove(hdev);
drm_dev_unregister(&hdev->drm);
hl_debugfs_device_fini(hdev);
@ -2690,6 +2775,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
}
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
{
struct engine_err_info *info = &hdev->captured_err_info.engine_err;
/* Capture only the first engine error */
if (atomic_cmpxchg(&info->event_detected, 0, 1))
return;
info->event.timestamp = ktime_to_ns(ktime_get());
info->event.engine_id = engine_id;
info->event.error_count = error_count;
info->event_info_available = true;
}
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
{
vfree(captured_err_info->page_fault_info.user_mappings);

View File

@ -6,7 +6,7 @@
*/
#include "habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include <linux/firmware.h>
#include <linux/crc32.h>
@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
/* Ignore this bit, don't prevent driver loading */
dev_dbg(hdev->dev, "device unusable status is set\n");
@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
dev_err(hdev->dev,
"Device boot progress - Stuck in preboot after security initialization\n");
break;
case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
dev_err(hdev->dev,
"Device boot progress - Stuck in preparation for shutdown\n");
break;
default:
dev_err(hdev->dev,
"Device boot progress - Invalid or unexpected status code %d\n", status);
@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
int hl_fw_wait_preboot_ready(struct hl_device *hdev)
{
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
u32 status;
int rc;
u32 status = 0, timeout;
int rc, tries = 1;
bool preboot_still_runs;
/* Need to check two possible scenarios:
*
@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
* All other status values - for older firmwares where the uboot was
* loaded from the FLASH
*/
timeout = pre_fw_load->wait_for_preboot_timeout;
retry:
rc = hl_poll_timeout(
hdev,
pre_fw_load->cpu_boot_status_reg,
@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
hdev->fw_poll_interval_usec,
pre_fw_load->wait_for_preboot_timeout);
timeout);
/*
* if F/W reports "security-ready" it means preboot might take longer.
* If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
* with that timeout
*/
preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
status == CPU_BOOT_STATUS_IN_PREBOOT ||
status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
status == CPU_BOOT_STATUS_DRAM_RDY);
if (rc && tries && preboot_still_runs) {
tries--;
if (pre_fw_load->wait_for_preboot_extended_timeout) {
timeout = pre_fw_load->wait_for_preboot_extended_timeout;
goto retry;
}
}
if (rc) {
detect_cpu_boot_status(hdev, status);
@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
if (rc)
goto protocol_err;
@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning);
}
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
}
return 0;
}

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2022 HabanaLabs, Ltd.
* Copyright 2016-2023 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@ -8,7 +8,7 @@
#ifndef HABANALABSP_H_
#define HABANALABSP_H_
#include "../include/common/cpucp_if.h"
#include <linux/habanalabs/cpucp_if.h>
#include "../include/common/qman_if.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include <uapi/drm/habanalabs_accel.h>
@ -29,6 +29,9 @@
#include <linux/coresight.h>
#include <linux/dma-buf.h>
#include <drm/drm_device.h>
#include <drm/drm_file.h>
#include "security.h"
#define HL_NAME "habanalabs"
@ -82,8 +85,6 @@ struct hl_fpriv;
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */
#define HL_INVALID_QUEUE UINT_MAX
#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF
@ -103,6 +104,8 @@ struct hl_fpriv;
/* MMU */
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
#define TIMESTAMP_FREE_NODES_NUM 512
/**
* enum hl_mmu_page_table_location - mmu page table location
* @MMU_DR_PGT: page-table is located on device DRAM.
@ -154,6 +157,11 @@ enum hl_mmu_page_table_location {
#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
#define hl_dma_map_sgtable(hdev, sgt, dir) \
hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__)
#define hl_dma_unmap_sgtable(hdev, sgt, dir) \
hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__)
/*
* Reset Flags
*
@ -545,8 +553,7 @@ struct hl_hints_range {
* allocated with huge pages.
* @hints_dram_reserved_va_range: dram hint addresses reserved range.
* @hints_host_reserved_va_range: host hint addresses reserved range.
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
* range.
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
@ -585,7 +592,7 @@ struct hl_hints_range {
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
* @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
* @dram_page_size: page size for MMU DRAM allocation.
* @dram_page_size: The DRAM physical page size.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
* @max_asid: maximum number of open contexts (ASIDs).
@ -641,6 +648,7 @@ struct hl_hints_range {
* @glbl_err_cause_num: global err cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@ -686,9 +694,10 @@ struct hl_hints_range {
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device
* @dma_mask: the dma mask to be set for this device.
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
* @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@ -772,6 +781,7 @@ struct asic_fixed_properties {
u32 num_of_special_blocks;
u32 glbl_err_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@ -808,6 +818,7 @@ struct asic_fixed_properties {
u8 dma_mask;
u8 supports_advanced_cpucp_rc;
u8 supports_engine_modes;
u8 support_dynamic_resereved_fw_size;
};
/**
@ -1097,20 +1108,42 @@ enum hl_user_interrupt_type {
HL_USR_INTERRUPT_UNEXPECTED
};
/**
* struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
* @free_nodes_pool: pool of nodes to be used for free timestamp jobs
* @free_nodes_length: number of nodes in free_nodes_pool
* @next_avail_free_node_idx: index of the next free node in the pool
*
* the free nodes pool must be protected by the user interrupt lock
* to avoid race between different interrupts which are using the same
* ts buffer with different offsets.
*/
struct hl_ts_free_jobs {
struct timestamp_reg_free_node *free_nodes_pool;
u32 free_nodes_length;
u32 next_avail_free_node_idx;
};
/**
* struct hl_user_interrupt - holds user interrupt information
* @hdev: pointer to the device structure
* @ts_free_jobs_data: timestamp free jobs related data
* @type: user interrupt type
* @wait_list_head: head to the list of user threads pending on this interrupt
* @ts_list_head: head to the list of timestamp records
* @wait_list_lock: protects wait_list_head
* @ts_list_lock: protects ts_list_head
* @timestamp: last timestamp taken upon interrupt
* @interrupt_id: msix interrupt id
*/
struct hl_user_interrupt {
struct hl_device *hdev;
struct hl_ts_free_jobs ts_free_jobs_data;
enum hl_user_interrupt_type type;
struct list_head wait_list_head;
struct list_head ts_list_head;
spinlock_t wait_list_lock;
spinlock_t ts_list_lock;
ktime_t timestamp;
u32 interrupt_id;
};
@ -1120,11 +1153,15 @@ struct hl_user_interrupt {
* @free_objects_node: node in the list free_obj_jobs
* @cq_cb: pointer to cq command buffer to be freed
* @buf: pointer to timestamp buffer to be freed
* @in_use: indicates whether the node still in use in workqueue thread.
* @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
*/
struct timestamp_reg_free_node {
struct list_head free_objects_node;
struct hl_cb *cq_cb;
struct hl_mmap_mem_buf *buf;
atomic_t in_use;
u8 dynamic_alloc;
};
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@ -1133,17 +1170,21 @@ struct timestamp_reg_free_node {
* @free_obj: workqueue object to free timestamp registration node objects
* @hdev: pointer to the device structure
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
* @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
* interrupt handler.
*/
struct timestamp_reg_work_obj {
struct work_struct free_obj;
struct hl_device *hdev;
struct list_head *free_obj_head;
struct list_head *dynamic_alloc_free_obj_head;
};
/* struct timestamp_reg_info - holds the timestamp registration related data.
* @buf: pointer to the timestamp buffer which include both user/kernel buffers.
* relevant only when doing timestamps records registration.
* @cq_cb: pointer to CQ counter CB.
* @interrupt: interrupt that the node hanged on it's wait list.
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
* relevant only when doing timestamps records
* registration.
@ -1153,17 +1194,18 @@ struct timestamp_reg_work_obj {
* allocating records dynamically.
*/
struct timestamp_reg_info {
struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
u64 *timestamp_kernel_addr;
u8 in_use;
struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
struct hl_user_interrupt *interrupt;
u64 *timestamp_kernel_addr;
bool in_use;
};
/**
* struct hl_user_pending_interrupt - holds a context to a user thread
* pending on an interrupt
* @ts_reg_info: holds the timestamps registration nodes info
* @wait_list_node: node in the list of user threads pending on an interrupt
* @list_node: node in the list of user threads pending on an interrupt or timestamp
* @fence: hl fence object for interrupt completion
* @cq_target_value: CQ target value
* @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
@ -1171,7 +1213,7 @@ struct timestamp_reg_info {
*/
struct hl_user_pending_interrupt {
struct timestamp_reg_info ts_reg_info;
struct list_head wait_list_node;
struct list_head list_node;
struct hl_fence fence;
u64 cq_target_value;
u64 *cq_kernel_addr;
@ -1370,6 +1412,8 @@ struct dynamic_fw_load_mgr {
* @boot_err0_reg: boot_err0 register address
* @boot_err1_reg: boot_err1 register address
* @wait_for_preboot_timeout: timeout to poll for preboot ready
* @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know
* preboot needs longer time.
*/
struct pre_fw_load_props {
u32 cpu_boot_status_reg;
@ -1378,6 +1422,7 @@ struct pre_fw_load_props {
u32 boot_err0_reg;
u32 boot_err1_reg;
u32 wait_for_preboot_timeout;
u32 wait_for_preboot_extended_timeout;
};
/**
@ -1477,11 +1522,9 @@ struct engines_data {
* @asic_dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
* @asic_dma_unmap_single: unmap a single DMA buffer
* @asic_dma_map_single: map a single buffer to a DMA
* @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
* @dma_unmap_sgtable: DMA unmap scatter-gather table.
* @dma_map_sgtable: DMA map scatter-gather table.
* @cs_parser: parse Command Submission.
* @asic_dma_map_sgtable: DMA map scatter-gather table.
* @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
* @update_eq_ci: update event queue CI.
* @context_switch: called upon ASID context switch.
@ -1602,18 +1645,11 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr);
void (*asic_dma_unmap_single)(struct hl_device *hdev,
dma_addr_t dma_addr, int len,
void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
void *addr, int len,
enum dma_data_direction dir);
void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
struct sg_table *sgt,
int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
void (*add_end_of_cb_packets)(struct hl_device *hdev,
void *kernel_address, u32 len,
u32 original_len,
@ -1771,16 +1807,19 @@ struct hl_cs_counters_atomic {
* @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
* where virtual memory is supported.
* @memhash_hnode: pointer to the memhash node. this object holds the export count.
* @device_address: physical address of the device's memory. Relevant only
* if phys_pg_pack is NULL (dma-buf was exported from address).
* The total size can be taken from the dmabuf object.
* @offset: the offset into the buffer from which the memory is exported.
* Relevant only if virtual memory is supported and phys_pg_pack is being used.
* device_phys_addr: physical address of the device's memory. Relevant only
* if phys_pg_pack is NULL (dma-buf was exported from address).
* The total size can be taken from the dmabuf object.
*/
struct hl_dmabuf_priv {
struct dma_buf *dmabuf;
struct hl_ctx *ctx;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_vm_hash_node *memhash_hnode;
uint64_t device_address;
u64 offset;
u64 device_phys_addr;
};
#define HL_CS_OUTCOME_HISTORY_LEN 256
@ -1835,6 +1874,7 @@ struct hl_cs_outcome_store {
* @va_range: holds available virtual addresses for host and dram mappings.
* @mem_hash_lock: protects the mem_hash.
* @hw_block_list_lock: protects the HW block memory list.
* @ts_reg_lock: timestamp registration ioctls lock.
* @debugfs_list: node in debugfs list of contexts.
* @hw_block_mem_list: list of HW block virtual mapped addresses.
* @cs_counters: context command submission counters.
@ -1871,6 +1911,7 @@ struct hl_ctx {
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
struct mutex mem_hash_lock;
struct mutex hw_block_list_lock;
struct mutex ts_reg_lock;
struct list_head debugfs_list;
struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
@ -1917,17 +1958,17 @@ struct hl_ctx_mgr {
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
*/
struct hl_userptr {
enum vm_type vm_type; /* must be first */
struct list_head job_node;
struct page **pages;
unsigned int npages;
struct sg_table *sgt;
enum dma_data_direction dir;
struct list_head debugfs_list;
pid_t pid;
u64 addr;
u64 size;
u8 dma_mapped;
enum vm_type vm_type; /* must be first */
struct list_head job_node;
struct page **pages;
unsigned int npages;
struct sg_table *sgt;
enum dma_data_direction dir;
struct list_head debugfs_list;
pid_t pid;
u64 addr;
u64 size;
u8 dma_mapped;
};
/**
@ -2148,7 +2189,6 @@ struct hl_vm_hw_block_list_node {
* @pages: the physical page array.
* @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list.
* @exported_size: buffer exported size.
* @node: used to attach to deletion list that is used when all the allocations are cleared
* at the teardown of the context.
* @mapping_cnt: number of shared mappings.
@ -2165,7 +2205,6 @@ struct hl_vm_phys_pg_pack {
u64 *pages;
u64 npages;
u64 total_size;
u64 exported_size;
struct list_head node;
atomic_t mapping_cnt;
u32 asid;
@ -2250,7 +2289,7 @@ struct hl_notifier_event {
/**
* struct hl_fpriv - process information stored in FD private data.
* @hdev: habanalabs device structure.
* @filp: pointer to the given file structure.
* @file_priv: pointer to the DRM file private data structure.
* @taskpid: current process ID.
* @ctx: current executing context. TODO: remove for multiple ctx per process
* @ctx_mgr: context manager to handle multiple context for this FD.
@ -2265,7 +2304,7 @@ struct hl_notifier_event {
*/
struct hl_fpriv {
struct hl_device *hdev;
struct file *filp;
struct drm_file *file_priv;
struct pid *taskpid;
struct hl_ctx *ctx;
struct hl_ctx_mgr ctx_mgr;
@ -2706,6 +2745,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
usr_intr.type = intr_type; \
INIT_LIST_HEAD(&usr_intr.wait_list_head); \
spin_lock_init(&usr_intr.wait_list_lock); \
INIT_LIST_HEAD(&usr_intr.ts_list_head); \
spin_lock_init(&usr_intr.ts_list_lock); \
})
struct hwmon_chip_info;
@ -3054,6 +3095,20 @@ struct fw_err_info {
bool event_info_available;
};
/**
* struct engine_err_info - engine error information.
* @event: holds information on the event.
* @event_detected: if set as 1, then an engine event was discovered for the
* first time after the driver has finished booting-up.
* @event_info_available: indicates that an engine event info is now available.
*/
struct engine_err_info {
struct hl_info_engine_err_event event;
atomic_t event_detected;
bool event_info_available;
};
/**
* struct hl_error_info - holds information collected during an error.
* @cs_timeout: CS timeout error information.
@ -3062,6 +3117,7 @@ struct fw_err_info {
* @page_fault_info: page fault information.
* @hw_err: (fatal) hardware error information.
* @fw_err: firmware error information.
* @engine_err: engine error information.
*/
struct hl_error_info {
struct cs_timeout_info cs_timeout;
@ -3070,6 +3126,7 @@ struct hl_error_info {
struct page_fault_info page_fault_info;
struct hw_err_info hw_err;
struct fw_err_info fw_err;
struct engine_err_info engine_err;
};
/**
@ -3117,8 +3174,7 @@ struct hl_reset_info {
* (required only for PCI address match mode)
* @pcie_bar: array of available PCIe bars virtual addresses.
* @rmmio: configuration area address on SRAM.
* @hclass: pointer to the habanalabs class.
* @cdev: related char device.
* @drm: related DRM device.
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
* @dev: related kernel basic device structure.
* @dev_ctrl: related kernel device structure for the control device
@ -3245,8 +3301,7 @@ struct hl_reset_info {
* @rotator_binning: contains mask of rotators engines that is received from the f/w
* which indicates which rotator engines are binned-out(Gaudi3 and above).
* @id: device minor.
* @id_control: minor of the control device.
* @cdev_idx: char device index. Used for setting its name.
* @cdev_idx: char device index.
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
* addresses.
* @is_in_dram_scrub: true if dram scrub operation is on going.
@ -3289,6 +3344,7 @@ struct hl_reset_info {
* device.
* @supports_ctx_switch: true if a ctx switch is required upon first submission.
* @support_preboot_binning: true if we support read binning info from preboot.
* @eq_heartbeat_received: indication that eq heartbeat event has received from FW.
* @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
* @fw_components: Controls which f/w components to load to the device. There are multiple f/w
* stages and sometimes we want to stop at a certain stage. Used only for testing.
@ -3308,8 +3364,7 @@ struct hl_device {
u64 pcie_bar_phys[HL_PCI_NUM_BARS];
void __iomem *pcie_bar[HL_PCI_NUM_BARS];
void __iomem *rmmio;
struct class *hclass;
struct cdev cdev;
struct drm_device drm;
struct cdev cdev_ctrl;
struct device *dev;
struct device *dev_ctrl;
@ -3418,7 +3473,6 @@ struct hl_device {
u32 device_release_watchdog_timeout_sec;
u32 rotator_binning;
u16 id;
u16 id_control;
u16 cdev_idx;
u16 cpu_pci_msb_addr;
u8 is_in_dram_scrub;
@ -3451,6 +3505,7 @@ struct hl_device {
u8 reset_upon_device_release;
u8 supports_ctx_switch;
u8 support_preboot_binning;
u8 eq_heartbeat_received;
/* Parameters for bring-up to be upstreamed */
u64 nic_ports_mask;
@ -3582,6 +3637,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
return false;
}
static inline struct hl_device *to_hl_device(struct drm_device *ddev)
{
return container_of(ddev, struct hl_device, drm);
}
/**
* hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
* @address: The start address of the area we want to validate.
@ -3611,8 +3671,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t
dma_addr_t *dma_handle, const char *caller);
void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
const char *caller);
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir, const char *caller);
void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir, const char *caller);
int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
@ -3620,7 +3685,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type);
int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
u64 addr, u64 *val, enum debugfs_access_type acc_type);
int hl_device_open(struct inode *inode, struct file *filp);
int hl_mmap(struct file *filp, struct vm_area_struct *vma);
int hl_device_open(struct drm_device *drm, struct drm_file *file_priv);
void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv);
int hl_device_open_ctrl(struct inode *inode, struct file *filp);
bool hl_device_operational(struct hl_device *hdev,
enum hl_device_status *status);
@ -3652,8 +3722,9 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg);
irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
u32 hl_cq_inc_ptr(u32 ptr);
int hl_asid_init(struct hl_device *hdev);
@ -3944,16 +4015,14 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
u64 *event_mask);
void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
#ifdef CONFIG_DEBUG_FS
void hl_debugfs_init(void);
void hl_debugfs_fini(void);
int hl_debugfs_device_init(struct hl_device *hdev);
void hl_debugfs_device_fini(struct hl_device *hdev);
void hl_debugfs_add_device(struct hl_device *hdev);
void hl_debugfs_remove_device(struct hl_device *hdev);
void hl_debugfs_add_file(struct hl_fpriv *hpriv);
void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
void hl_debugfs_add_cb(struct hl_cb *cb);
@ -3972,14 +4041,6 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
#else
static inline void __init hl_debugfs_init(void)
{
}
static inline void hl_debugfs_fini(void)
{
}
static inline int hl_debugfs_device_init(struct hl_device *hdev)
{
return 0;
@ -3993,10 +4054,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev)
{
}
static inline void hl_debugfs_remove_device(struct hl_device *hdev)
{
}
static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
{
}
@ -4108,11 +4165,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset,
const u32 pb_blocks[], u32 blocks_array_size);
/* IOCTLs */
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
#endif /* HABANALABSP_H_ */

View File

@ -14,6 +14,11 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/version.h>
#include <drm/drm_accel.h>
#include <drm/drm_drv.h>
#include <drm/drm_ioctl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/habanalabs.h>
@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
MODULE_LICENSE("GPL v2");
static int hl_major;
static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock);
@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
};
MODULE_DEVICE_TABLE(pci, ids);
static const struct drm_ioctl_desc hl_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
};
static const struct file_operations hl_fops = {
.owner = THIS_MODULE,
.open = accel_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.compat_ioctl = drm_compat_ioctl,
.llseek = noop_llseek,
.mmap = hl_mmap
};
static const struct drm_driver hl_driver = {
.driver_features = DRIVER_COMPUTE_ACCEL,
.name = HL_NAME,
.desc = HL_DRIVER_DESC,
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
.date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
.postclose = hl_device_release,
.ioctls = hl_drm_ioctls,
.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
};
/*
* get_asic_type - translate device id to asic type
*
@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
}
/*
* hl_device_open - open function for habanalabs device
*
* @inode: pointer to inode structure
* @filp: pointer to file structure
* hl_device_open() - open function for habanalabs device.
* @ddev: pointer to DRM device structure.
* @file: pointer to DRM file private data structure.
*
* Called when process opens an habanalabs device.
*/
int hl_device_open(struct inode *inode, struct file *filp)
int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
{
struct hl_device *hdev = to_hl_device(ddev);
enum hl_device_status status;
struct hl_device *hdev;
struct hl_fpriv *hpriv;
int rc;
mutex_lock(&hl_devs_idr_lock);
hdev = idr_find(&hl_devs_idr, iminor(inode));
mutex_unlock(&hl_devs_idr_lock);
if (!hdev) {
pr_err("Couldn't find device %d:%d\n",
imajor(inode), iminor(inode));
return -ENXIO;
}
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv)
return -ENOMEM;
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex);
mutex_init(&hpriv->ctx_lock);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hdev->last_successful_open_jif = jiffies;
hdev->last_successful_open_ktime = ktime_get();
file_priv->driver_priv = hpriv;
hpriv->file_priv = file_priv;
return 0;
out_err:
@ -232,7 +260,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->ctx_lock);
mutex_destroy(&hpriv->restore_phase_mutex);
mutex_destroy(&hpriv->notifier_event.lock);
@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
*/
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock);
nonseekable_open(inode, filp);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
hdev->major = hl_major;
hdev->hclass = hl_class;
hdev->memory_scrub = memory_scrub;
hdev->reset_on_lockup = reset_on_lockup;
hdev->boot_error_status_mask = boot_error_status_mask;
@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
return 0;
}
static int allocate_device_id(struct hl_device *hdev)
{
int id;
mutex_lock(&hl_devs_idr_lock);
id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if (id < 0) {
if (id == -ENOSPC)
pr_err("too many devices in the system\n");
return -EBUSY;
}
hdev->id = id;
/*
* Firstly initialized with the internal device ID.
* Will be updated later after the DRM device registration to hold the minor ID.
*/
hdev->cdev_idx = hdev->id;
return 0;
}
/**
* create_hdev - create habanalabs device instance
*
@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev)
*/
static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
{
int main_id, ctrl_id = 0, rc = 0;
struct hl_device *hdev;
int rc;
*dev = NULL;
hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
if (!hdev)
return -ENOMEM;
hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
if (IS_ERR(hdev))
return PTR_ERR(hdev);
hdev->dev = hdev->drm.dev;
/* Will be NULL in case of simulator device */
hdev->pdev = pdev;
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
"in reset after device release", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
"in reset after device release", HL_STR_MAX);
/* First, we must find out which ASIC are we handling. This is needed
@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
if (hdev->asic_type == ASIC_INVALID) {
dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV;
goto free_hdev;
goto out_err;
}
copy_kernel_module_params_to_device(hdev);
@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
fixup_device_params(hdev);
mutex_lock(&hl_devs_idr_lock);
/* Always save 2 numbers, 1 for main device and 1 for control.
* They must be consecutive
*/
main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
if (main_id >= 0)
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
main_id + 2, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if ((main_id < 0) || (ctrl_id < 0)) {
if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
pr_err("too many devices in the system\n");
if (main_id >= 0) {
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, main_id);
mutex_unlock(&hl_devs_idr_lock);
}
rc = -EBUSY;
goto free_hdev;
}
hdev->id = main_id;
hdev->id_control = ctrl_id;
rc = allocate_device_id(hdev);
if (rc)
goto out_err;
*dev = hdev;
return 0;
free_hdev:
kfree(hdev);
out_err:
return rc;
}
@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
/* Remove device from the device list */
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, hdev->id);
idr_remove(&hl_devs_idr, hdev->id_control);
mutex_unlock(&hl_devs_idr_lock);
kfree(hdev);
}
static int hl_pmops_suspend(struct device *dev)
@ -691,28 +713,16 @@ static int __init hl_init(void)
hl_major = MAJOR(dev);
hl_class = class_create(HL_NAME);
if (IS_ERR(hl_class)) {
pr_err("failed to allocate class\n");
rc = PTR_ERR(hl_class);
goto remove_major;
}
hl_debugfs_init();
rc = pci_register_driver(&hl_pci_driver);
if (rc) {
pr_err("failed to register pci device\n");
goto remove_debugfs;
goto remove_major;
}
pr_debug("driver loaded\n");
return 0;
remove_debugfs:
hl_debugfs_fini();
class_destroy(hl_class);
remove_major:
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
return rc;
@ -725,14 +735,6 @@ static void __exit hl_exit(void)
{
pci_unregister_driver(&hl_pci_driver);
/*
* Removing debugfs must be after all devices or simulator devices
* have been removed because otherwise we get a bug in the
* debugfs module for referencing NULL objects
*/
hl_debugfs_fini();
class_destroy(hl_class);
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
idr_destroy(&hl_devs_idr);

View File

@ -17,6 +17,8 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/msr.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
time_sync.host_time = ktime_get_raw_ns();
time_sync.tsc_time = rdtsc();
return copy_to_user(out, &time_sync,
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
return rc ? -EFAULT : 0;
}
static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
struct hl_device *hdev = hpriv->hdev;
u32 user_buf_size = args->return_size;
struct engine_err_info *info;
int rc;
if (!user_buf)
return -EINVAL;
info = &hdev->captured_err_info.engine_err;
if (!info->event_info_available)
return 0;
if (user_buf_size < sizeof(struct hl_info_engine_err_event))
return -ENOMEM;
rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
return rc ? -EFAULT : 0;
}
static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
{
void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_FW_ERR_EVENT:
return fw_err_info(hpriv, args);
case HL_INFO_USER_ENGINE_ERR_EVENT:
return engine_err_info(hpriv, args);
case HL_INFO_DRAM_USAGE:
return dram_usage_info(hpriv, args);
default:
@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
return rc;
}
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
}
static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
{
struct hl_info_args *args = data;
switch (args->op) {
case HL_INFO_GET_EVENTS:
case HL_INFO_UNREGISTER_EVENTFD:
case HL_INFO_REGISTER_EVENTFD:
return -EOPNOTSUPP;
default:
break;
}
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
}
static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_debug_args *args = data;
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev;
struct hl_debug_args *args = data;
enum hl_device_status status;
int rc = 0;
@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
}
#define HL_IOCTL_DEF(ioctl, _func) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
static const struct hl_ioctl_desc hl_ioctls_control[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
};
static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev)
static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev)
{
struct hl_fpriv *hpriv = filep->private_data;
unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128] = {0};
char *kdata = NULL;
@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
retcode = -EFAULT;
out_err:
if (retcode)
dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
task_pid_nr(current), cmd, nr);
if (retcode) {
char task_comm[TASK_COMM_LEN];
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
}
if (kdata != stack_kdata)
kfree(kdata);
@ -1204,29 +1240,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
return retcode;
}
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
if (!hdev) {
pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
return -ENODEV;
}
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
ioctl = &hl_ioctls[nr];
} else {
dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
}
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
return -ENODEV;
}
if (nr == _IOC_NR(HL_IOCTL_INFO)) {
ioctl = &hl_ioctls_control[nr];
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else {
dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
char task_comm[TASK_COMM_LEN];
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
}

View File

@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
{
struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj);
struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
kfree(free_obj);
atomic_set(&free_obj->in_use, 0);
}
kfree(free_list_head);
if (dynamic_alloc_free_list_head) {
list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
free_objects_node) {
dev_dbg(hdev->dev,
"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
free_obj->buf,
free_obj->cq_cb);
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
list_del(&free_obj->free_objects_node);
kfree(free_obj);
}
kfree(dynamic_alloc_free_list_head);
}
kfree(job);
}
@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list, ktime_t now)
struct list_head **free_list,
struct list_head **dynamic_alloc_list,
struct hl_user_interrupt *intr)
{
struct hl_ts_free_jobs *ts_free_jobs_data;
struct timestamp_reg_free_node *free_node;
u32 free_node_index;
u64 timestamp;
ts_free_jobs_data = &intr->ts_free_jobs_data;
free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
INIT_LIST_HEAD(*free_list);
}
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
dev_dbg(hdev->dev,
"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
pend->ts_reg_info.buf,
pend,
intr->interrupt_id);
timestamp = ktime_to_ns(now);
if (!(*dynamic_alloc_list)) {
*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*dynamic_alloc_list))
return -ENOMEM;
INIT_LIST_HEAD(*dynamic_alloc_list);
}
free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
free_node->dynamic_alloc = 1;
}
timestamp = ktime_to_ns(intr->timestamp);
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
list_del(&pend->wait_list_node);
/* Mark kernel CB node as free */
pend->ts_reg_info.in_use = 0;
list_del(&pend->list_node);
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
free_node->buf = pend->ts_reg_info.buf;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
list_add(&free_node->free_objects_node, *free_list);
if (free_node->dynamic_alloc) {
list_add(&free_node->free_objects_node, *dynamic_alloc_list);
} else {
ts_free_jobs_data->next_avail_free_node_idx =
(++free_node_index) % ts_free_jobs_data->free_nodes_length;
list_add(&free_node->free_objects_node, *free_list);
}
/* Mark TS record as free */
pend->ts_reg_info.in_use = false;
return 0;
}
static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false;
unsigned long flags;
int rc;
/* For registration nodes:
@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
* or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt wait list
* and move nodes hanged on the free list back to the interrupt ts list
* we always alloc the job of the WQ at the beginning.
*/
job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return;
spin_lock(&intr->wait_list_lock);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
spin_lock_irqsave(&intr->ts_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
if (pend->ts_reg_info.buf) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head, intr->timestamp);
if (rc)
reg_node_handle_fail = true;
}
} else {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head,
&dynamic_alloc_list_head, intr);
if (rc)
reg_node_handle_fail = true;
}
}
}
spin_unlock(&intr->wait_list_lock);
spin_unlock_irqrestore(&intr->ts_list_lock, flags);
if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head;
job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else {
@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
}
}
static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct hl_user_pending_interrupt *pend, *temp_pend;
unsigned long flags;
spin_lock_irqsave(&intr->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
}
}
spin_unlock_irqrestore(&intr->wait_list_lock, flags);
}
static void handle_tpc_interrupt(struct hl_device *hdev)
{
u64 event_mask;
@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
}
/**
* hl_irq_handler_user_interrupt - irq handler for user interrupts
* hl_irq_user_interrupt_handler - irq handler for user interrupts.
*
* @irq: irq number
* @arg: pointer to user interrupt structure
*
*/
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
{
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
/* First handle user waiters threads */
handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_wait_list(hdev, user_int);
return IRQ_WAKE_THREAD;
/* Second handle user timestamp registrations */
handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_ts_list(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt_wait_list(hdev, user_int);
break;
default:
break;
}
return IRQ_HANDLED;
}
/**
@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
/* Handle user cq interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_TPC:
handle_tpc_interrupt(hdev);
break;
@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
return IRQ_HANDLED;
}
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct hl_device *hdev = arg;
dev_err(hdev->dev, "EQ error interrupt received\n");
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
return IRQ_HANDLED;
}
/**
* hl_irq_handler_eq - irq handler for event queue
*

View File

@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
*p_userptr = userptr;
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto dma_map_err;
@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
* physical pages
*
* This function does the following:
* - Pin the physical pages related to the given virtual block.
* - Create a physical page pack from the physical pages related to the given
* virtual block.
*/
@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
}
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
u64 page_size, u64 exported_size,
u64 page_size, u64 exported_size, u64 offset,
struct device *dev, enum dma_data_direction dir)
{
u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
struct asic_fixed_properties *prop;
int rc, i, j, nents, cur_page;
u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
left_size_in_dma_seg, device_address, bar_address, start_page;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct scatterlist *sg;
unsigned int nents, i;
struct sg_table *sgt;
bool next_sg_entry;
int rc;
prop = &hdev->asic_prop;
dma_max_seg_size = dma_get_max_seg_size(dev);
/* We would like to align the max segment size to PAGE_SIZE, so the
* SGL will contain aligned addresses that can be easily mapped to
* an MMU
*/
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
if (dma_max_seg_size < PAGE_SIZE) {
dev_err_ratelimited(hdev->dev,
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
if (!sgt)
return ERR_PTR(-ENOMEM);
/* remove export size restrictions in case not explicitly defined */
cur_size_to_export = exported_size ? exported_size : (npages * page_size);
/* Use the offset to move to the actual first page that is exported */
for (start_page = 0 ; start_page < npages ; ++start_page) {
if (offset < page_size)
break;
/* If the size of each page is larger than the dma max segment size,
* then we can't combine pages and the number of entries in the SGL
* will just be the
* <number of pages> * <chunks of max segment size in each page>
*/
if (page_size > dma_max_seg_size) {
/* we should limit number of pages according to the exported size */
cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
} else {
cur_npages = npages;
/* The offset value was validated so there can't be an underflow */
offset -= page_size;
}
/* Get number of non-contiguous chunks */
for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
if (pages[i - 1] + page_size != pages[i] ||
chunk_size + page_size > dma_max_seg_size) {
nents++;
chunk_size = page_size;
continue;
}
/* Calculate the required number of entries for the SG table */
curr_page = start_page;
nents = 1;
left_size_to_export = exported_size;
left_size_in_page = page_size - offset;
left_size_in_dma_seg = dma_max_seg_size;
next_sg_entry = false;
chunk_size += page_size;
while (true) {
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
left_size_to_export -= size;
left_size_in_page -= size;
left_size_in_dma_seg -= size;
if (!left_size_to_export)
break;
if (!left_size_in_page) {
/* left_size_to_export is not zero so there must be another page */
if (pages[curr_page] + page_size != pages[curr_page + 1])
next_sg_entry = true;
++curr_page;
left_size_in_page = page_size;
}
if (!left_size_in_dma_seg) {
next_sg_entry = true;
left_size_in_dma_seg = dma_max_seg_size;
}
if (next_sg_entry) {
++nents;
next_sg_entry = false;
}
}
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
if (rc)
goto error_free;
goto err_free_sgt;
cur_page = 0;
/* Prepare the SG table entries */
curr_page = start_page;
device_address = pages[curr_page] + offset;
left_size_to_export = exported_size;
left_size_in_page = page_size - offset;
left_size_in_dma_seg = dma_max_seg_size;
next_sg_entry = false;
if (page_size > dma_max_seg_size) {
u64 size_left, cur_device_address = 0;
for_each_sgtable_dma_sg(sgt, sg, i) {
bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
chunk_size = 0;
size_left = page_size;
for ( ; curr_page < npages ; ++curr_page) {
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
chunk_size += size;
left_size_to_export -= size;
left_size_in_page -= size;
left_size_in_dma_seg -= size;
/* Need to split each page into the number of chunks of
* dma_max_seg_size
*/
for_each_sgtable_dma_sg(sgt, sg, i) {
if (size_left == page_size)
cur_device_address =
pages[cur_page] - prop->dram_base_address;
else
cur_device_address += dma_max_seg_size;
if (!left_size_to_export)
break;
/* make sure not to export over exported size */
chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
if (!left_size_in_page) {
/* left_size_to_export is not zero so there must be another page */
if (pages[curr_page] + page_size != pages[curr_page + 1]) {
device_address = pages[curr_page + 1];
next_sg_entry = true;
}
bar_address = hdev->dram_pci_bar_start + cur_device_address;
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc)
goto error_unmap;
cur_size_to_export -= chunk_size;
if (size_left > dma_max_seg_size) {
size_left -= dma_max_seg_size;
} else {
cur_page++;
size_left = page_size;
}
}
} else {
/* Merge pages and put them into the scatterlist */
for_each_sgtable_dma_sg(sgt, sg, i) {
chunk_size = page_size;
for (j = cur_page + 1 ; j < cur_npages ; j++) {
if (pages[j - 1] + page_size != pages[j] ||
chunk_size + page_size > dma_max_seg_size)
break;
chunk_size += page_size;
left_size_in_page = page_size;
}
bar_address = hdev->dram_pci_bar_start +
(pages[cur_page] - prop->dram_base_address);
if (!left_size_in_dma_seg) {
/*
* Skip setting a new device address if already moving to a page
* which is not contiguous with the current page.
*/
if (!next_sg_entry) {
device_address += chunk_size;
next_sg_entry = true;
}
/* make sure not to export over exported size */
chunk_size = min(chunk_size, cur_size_to_export);
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc)
goto error_unmap;
left_size_in_dma_seg = dma_max_seg_size;
}
cur_size_to_export -= chunk_size;
cur_page = j;
if (next_sg_entry) {
next_sg_entry = false;
break;
}
}
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc)
goto err_unmap;
}
/* Because we are not going to include a CPU list we want to have some
* chance that other users will detect this by setting the orig_nents
* to 0 and using only nents (length of DMA list) when going over the
* sgl
/* There should be nothing left to export exactly after looping over all SG elements */
if (left_size_to_export) {
dev_err(hdev->dev,
"left size to export %#llx after initializing %u SG elements\n",
left_size_to_export, sgt->nents);
rc = -ENOMEM;
goto err_unmap;
}
/*
* Because we are not going to include a CPU list, we want to have some chance that other
* users will detect this when going over SG table, by setting the orig_nents to 0 and using
* only nents (length of DMA list).
*/
sgt->orig_nents = 0;
dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
nents, dev_name(dev));
for_each_sgtable_dma_sg(sgt, sg, i)
dev_dbg(hdev->dev,
"SG entry %d: address %#llx, length %#x\n",
i, sg_dma_address(sg), sg_dma_len(sg));
return sgt;
error_unmap:
err_unmap:
for_each_sgtable_dma_sg(sgt, sg, i) {
if (!sg_dma_len(sg))
continue;
dma_unmap_resource(dev, sg_dma_address(sg),
sg_dma_len(sg), dir,
dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
DMA_ATTR_SKIP_CPU_SYNC);
}
sg_free_table(sgt);
error_free:
err_free_sgt:
kfree(sgt);
return ERR_PTR(rc);
}
@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
{
u64 *pages, npages, page_size, exported_size, offset;
struct dma_buf *dma_buf = attachment->dmabuf;
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_dmabuf_priv *hl_dmabuf;
@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
hl_dmabuf = dma_buf->priv;
hdev = hl_dmabuf->ctx->hdev;
phys_pg_pack = hl_dmabuf->phys_pg_pack;
if (!attachment->peer2peer) {
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
return ERR_PTR(-EPERM);
}
if (phys_pg_pack)
sgt = alloc_sgt_from_device_pages(hdev,
phys_pg_pack->pages,
phys_pg_pack->npages,
phys_pg_pack->page_size,
phys_pg_pack->exported_size,
attachment->dev,
dir);
else
sgt = alloc_sgt_from_device_pages(hdev,
&hl_dmabuf->device_address,
1,
hl_dmabuf->dmabuf->size,
0,
attachment->dev,
dir);
exported_size = hl_dmabuf->dmabuf->size;
offset = hl_dmabuf->offset;
phys_pg_pack = hl_dmabuf->phys_pg_pack;
if (phys_pg_pack) {
pages = phys_pg_pack->pages;
npages = phys_pg_pack->npages;
page_size = phys_pg_pack->page_size;
} else {
pages = &hl_dmabuf->device_phys_addr;
npages = 1;
page_size = hl_dmabuf->dmabuf->size;
}
sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
attachment->dev, dir);
if (IS_ERR(sgt))
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
hl_ctx_put(ctx);
/* Paired with get_file() in export_dmabuf() */
fput(ctx->hpriv->filp);
fput(ctx->hpriv->file_priv->filp);
kfree(hl_dmabuf);
}
@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
* released first and only then the compute device.
* Paired with fput() in hl_release_dmabuf().
*/
get_file(ctx->hpriv->filp);
get_file(ctx->hpriv->file_priv->filp);
*dmabuf_fd = fd;
@ -1876,22 +1898,29 @@ static int export_dmabuf(struct hl_ctx *ctx,
return rc;
}
static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
{
if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
if (!PAGE_ALIGNED(addr)) {
dev_dbg(hdev->dev,
"exported device memory address 0x%llx should be aligned to 0x%lx\n",
device_addr, PAGE_SIZE);
"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
addr, PAGE_SIZE);
return -EINVAL;
}
if (size < PAGE_SIZE) {
if (!size || !PAGE_ALIGNED(size)) {
dev_dbg(hdev->dev,
"exported device memory size %llu should be equal to or greater than %lu\n",
"exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
size, PAGE_SIZE);
return -EINVAL;
}
if (!PAGE_ALIGNED(offset)) {
dev_dbg(hdev->dev,
"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
offset, PAGE_SIZE);
return -EINVAL;
}
return 0;
}
@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
u64 bar_address;
int rc;
rc = validate_export_params_common(hdev, device_addr, size);
rc = validate_export_params_common(hdev, device_addr, size, 0);
if (rc)
return rc;
if (device_addr < prop->dram_user_base_address ||
(device_addr + size) > prop->dram_end_address ||
(device_addr + size) < device_addr) {
(device_addr + size) > prop->dram_end_address ||
(device_addr + size) < device_addr) {
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
device_addr, size);
@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
u64 bar_address;
int i, rc;
rc = validate_export_params_common(hdev, device_addr, size);
rc = validate_export_params_common(hdev, device_addr, size, offset);
if (rc)
return rc;
if ((offset + size) > phys_pg_pack->total_size) {
dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
offset, size, phys_pg_pack->total_size);
offset, size, phys_pg_pack->total_size);
return -EINVAL;
}
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
bar_address = hdev->dram_pci_bar_start +
(phys_pg_pack->pages[i] - prop->dram_base_address);
(phys_pg_pack->pages[i] - prop->dram_base_address);
if ((bar_address + phys_pg_pack->page_size) >
(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
(bar_address + phys_pg_pack->page_size) < bar_address) {
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
phys_pg_pack->pages[i],
phys_pg_pack->page_size);
phys_pg_pack->pages[i], phys_pg_pack->page_size);
return -EINVAL;
}
}
@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
struct asic_fixed_properties *prop;
struct hl_dmabuf_priv *hl_dmabuf;
struct hl_device *hdev;
u64 export_addr;
int rc;
hdev = ctx->hdev;
@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
return -EINVAL;
}
export_addr = addr + offset;
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
if (!hl_dmabuf)
return -ENOMEM;
@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
rc = PTR_ERR(phys_pg_pack);
goto dec_memhash_export_cnt;
}
rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
if (rc)
goto dec_memhash_export_cnt;
phys_pg_pack->exported_size = size;
hl_dmabuf->phys_pg_pack = phys_pg_pack;
hl_dmabuf->memhash_hnode = hnode;
hl_dmabuf->offset = offset;
} else {
rc = validate_export_params_no_mmu(hdev, export_addr, size);
rc = validate_export_params_no_mmu(hdev, addr, size);
if (rc)
goto err_free_dmabuf_wrapper;
}
hl_dmabuf->device_address = export_addr;
hl_dmabuf->device_phys_addr = addr;
}
rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
if (rc)
@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
return 0;
}
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
enum hl_device_status status;
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);

View File

@ -63,6 +63,10 @@
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
static int gaudi_scrub_device_mem(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
u64 addr, size, val = hdev->memory_scrub_val;
ktime_t timeout;
int rc = 0;
@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
return rc;
if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable,
.dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci,
.context_switch = gaudi_context_switch,

View File

@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi/gaudi_packets.h"
#include "../include/gaudi/gaudi.h"
#include "../include/gaudi/gaudi_async_events.h"

View File

@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x20);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + 0x304);
val |= 0x1000;
WREG32(base_reg + 0x304, val);
@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);

View File

@ -66,7 +66,6 @@
#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
#define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
#define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
#define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
#define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
#define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
"sbte_prtn_intr_4",
};
static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
"i0",
"i1",
"i2",
"i3",
"i4",
};
static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
"WBC ERR RESP_0",
"WBC ERR RESP_1",
@ -993,6 +984,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
"TLP is blocked by RR"
};
static const int gaudi2_queue_id_to_engine_id[] = {
[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
GAUDI2_DCORE0_ENGINE_ID_MME,
[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
GAUDI2_DCORE1_ENGINE_ID_MME,
[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
GAUDI2_DCORE2_ENGINE_ID_MME,
[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
GAUDI2_DCORE3_ENGINE_ID_MME,
[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_0,
[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_1,
[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_2,
[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_3,
[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_4,
[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_5,
[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
GAUDI2_DCORE0_ENGINE_ID_TPC_6,
[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_0,
[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_1,
[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_2,
[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_3,
[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_4,
[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
GAUDI2_DCORE1_ENGINE_ID_TPC_5,
[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_0,
[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_1,
[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_2,
[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_3,
[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_4,
[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
GAUDI2_DCORE2_ENGINE_ID_TPC_5,
[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_0,
[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_1,
[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_2,
[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_3,
[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_4,
[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
GAUDI2_DCORE3_ENGINE_ID_TPC_5,
[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
};
const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
@ -2001,7 +2097,8 @@ enum razwi_event_sources {
RAZWI_PDMA,
RAZWI_NIC,
RAZWI_DEC,
RAZWI_ROT
RAZWI_ROT,
RAZWI_ARC_FARM
};
struct hbm_mc_error_causes {
@ -2431,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
prop->mme_master_slave_mode = 1;
@ -2884,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
}
if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
/* Overwrite binning masks with the actual binning values from F/W */
hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
@ -4077,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
return "gaudi2 unexpected error";
case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
return "gaudi2 user completion";
case GAUDI2_IRQ_NUM_EQ_ERROR:
return "gaudi2 eq error";
default:
return "invalid";
}
@ -4127,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
gaudi2_irq_name(i), (void *) dec);
} else {
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
gaudi2_irq_name(i),
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
(void *) &hdev->user_interrupt[dec->core_id]);
}
@ -4187,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
}
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
&hdev->tpc_interrupt);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_dec_irq;
}
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
&hdev->unexpected_error_interrupt);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@ -4209,16 +4307,23 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
i++, j++, user_irq_init_cnt++) {
irq = pci_irq_vector(hdev->pdev, i);
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
gaudi2_irq_name(i), &hdev->user_interrupt[j]);
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
&hdev->user_interrupt[j]);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_user_irq;
}
}
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
hdev);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_user_irq;
}
gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
return 0;
@ -4278,6 +4383,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
}
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
}
static void gaudi2_disable_msix(struct hl_device *hdev)
@ -4314,6 +4420,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
free_irq(irq, cq);
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
free_irq(irq, hdev);
pci_free_irq_vectors(hdev->pdev);
gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
@ -4716,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
pre_fw_load->wait_for_preboot_extended_timeout =
GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
}
static void gaudi2_init_firmware_loader(struct hl_device *hdev)
@ -6157,17 +6268,14 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
u32 poll_timeout_us)
{
struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
int rc = 0;
int rc;
if (!driver_performs_reset) {
if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
/* set SP to indicate reset request sent to FW */
if (dyn_regs->cpu_rst_status)
WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
else
WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
/* wait for f/w response */
@ -6623,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
enum dma_data_direction dir)
{
dma_addr_t dma_addr;
dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
return 0;
return dma_addr;
}
static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
enum dma_data_direction dir)
{
dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
}
static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
{
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
@ -7703,11 +7793,13 @@ static inline bool is_info_event(u32 event)
switch (event) {
case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
/* return in case of NIC status event - these events are received periodically and not as
* an indication to an error.
*/
case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
return true;
default:
return false;
@ -7739,21 +7831,34 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
struct hl_eq_ecc_data *ecc_data)
{
u64 ecc_address = 0, ecc_syndrom = 0;
u64 ecc_address = 0, ecc_syndrome = 0;
u8 memory_wrapper_idx = 0;
bool has_block_id = false;
u16 block_id;
if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
has_block_id = true;
ecc_address = le64_to_cpu(ecc_data->ecc_address);
ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
memory_wrapper_idx = ecc_data->memory_wrapper_idx;
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
if (has_block_id) {
block_id = le16_to_cpu(ecc_data->block_id);
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
ecc_data->is_critical);
} else {
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
}
return !!ecc_data->is_critical;
}
static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
{
u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
u64 cq_ptr, arc_cq_ptr, cp_current_inst;
@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
dev_info(hdev->dev,
"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
if (arc_cq_ptr) {
hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
} else {
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
}
hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
}
}
static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
u64 qman_base, u32 qid_base)
u64 qman_base, u32 qid_base, u64 *event_mask)
{
u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
u64 glbl_sts_addr, arb_err_addr;
@ -7812,8 +7929,22 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
error_count++;
}
if (i == QMAN_STREAMS)
print_lower_qman_data_on_err(hdev, qman_base);
if (i == QMAN_STREAMS && error_count) {
/* check for undefined opcode */
if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
hdev->captured_err_info.undef_opcode.write_enable) {
memset(&hdev->captured_err_info.undef_opcode, 0,
sizeof(hdev->captured_err_info.undef_opcode));
hdev->captured_err_info.undef_opcode.write_enable = false;
hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
hdev->captured_err_info.undef_opcode.engine_id =
gaudi2_queue_id_to_engine_id[qid_base];
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
}
handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
}
}
arb_err_val = RREG32(arb_err_addr);
@ -7927,6 +8058,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
case RAZWI_ROT:
return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
case RAZWI_ARC_FARM:
return GAUDI2_ENGINE_ID_ARC_FARM;
default:
return GAUDI2_ENGINE_ID_SIZE;
}
@ -8036,6 +8170,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "ROT_%u", module_idx);
break;
case RAZWI_ARC_FARM:
lbw_rtr_id = DCORE1_RTR5;
hbw_rtr_id = DCORE1_RTR7;
sprintf(initiator_name, "ARC_FARM_%u", module_idx);
break;
default:
return;
}
@ -8149,11 +8288,11 @@ static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u
eng_id[num_of_eng] = razwi_info[i].eng_id;
base[num_of_eng] = razwi_info[i].rtr_ctrl;
if (!num_of_eng)
str_size += snprintf(eng_name + str_size,
str_size += scnprintf(eng_name + str_size,
PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
razwi_info[i].eng_name);
else
str_size += snprintf(eng_name + str_size,
str_size += scnprintf(eng_name + str_size,
PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
razwi_info[i].eng_name);
num_of_eng++;
@ -8475,7 +8614,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
return 0;
}
error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
qid_base, event_mask);
/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
@ -8488,7 +8628,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
return error_count;
}
static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
@ -8510,6 +8650,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type
sts_clr_val);
}
gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
hl_check_for_glbl_errors(hdev);
return error_count;
@ -8649,21 +8790,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
return error_count;
}
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
u64 intr_cause_data)
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
{
int i, error_count = 0;
for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
if (intr_cause_data & BIT(i)) {
gaudi2_print_event(hdev, event_type, true,
"err cause: %s", guadi2_mme_sbte_error_cause[i]);
error_count++;
}
/*
* We have a single error cause here but the report mechanism is
* buggy. Hence there is no good reason to fetch the cause so we
* just check for glbl_errors and exit.
*/
hl_check_for_glbl_errors(hdev);
return error_count;
return GAUDI2_NA_EVENT_CAUSE;
}
static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@ -9460,6 +9596,176 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
}
}
static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
{
enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
u16 index;
switch (event_type) {
case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
type = GAUDI2_BLOCK_TYPE_TPC;
break;
case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
index = event_type - GAUDI2_EVENT_TPC0_QM;
type = GAUDI2_BLOCK_TYPE_TPC;
break;
case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
case GAUDI2_EVENT_MME0_QM:
index = 0;
type = GAUDI2_BLOCK_TYPE_MME;
break;
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
case GAUDI2_EVENT_MME1_QM:
index = 1;
type = GAUDI2_BLOCK_TYPE_MME;
break;
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
case GAUDI2_EVENT_MME2_QM:
index = 2;
type = GAUDI2_BLOCK_TYPE_MME;
break;
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
case GAUDI2_EVENT_MME3_QM:
index = 3;
type = GAUDI2_BLOCK_TYPE_MME;
break;
case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
case GAUDI2_EVENT_KDMA_BM_SPMU:
case GAUDI2_EVENT_KDMA0_CORE:
return GAUDI2_ENGINE_ID_KDMA;
case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
case GAUDI2_EVENT_PDMA0_CORE:
case GAUDI2_EVENT_PDMA0_BM_SPMU:
case GAUDI2_EVENT_PDMA0_QM:
return GAUDI2_ENGINE_ID_PDMA_0;
case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
case GAUDI2_EVENT_PDMA1_CORE:
case GAUDI2_EVENT_PDMA1_BM_SPMU:
case GAUDI2_EVENT_PDMA1_QM:
return GAUDI2_ENGINE_ID_PDMA_1;
case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
type = GAUDI2_BLOCK_TYPE_DEC;
break;
case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
type = GAUDI2_BLOCK_TYPE_DEC;
break;
case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
index = event_type - GAUDI2_EVENT_NIC0_QM0;
return GAUDI2_ENGINE_ID_NIC0_0 + index;
case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
type = GAUDI2_BLOCK_TYPE_TPC;
break;
case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
return GAUDI2_ENGINE_ID_ROT_0;
case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
return GAUDI2_ENGINE_ID_ROT_1;
case GAUDI2_EVENT_HDMA0_BM_SPMU:
case GAUDI2_EVENT_HDMA0_QM:
case GAUDI2_EVENT_HDMA0_CORE:
return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
case GAUDI2_EVENT_HDMA1_BM_SPMU:
case GAUDI2_EVENT_HDMA1_QM:
case GAUDI2_EVENT_HDMA1_CORE:
return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
case GAUDI2_EVENT_HDMA2_BM_SPMU:
case GAUDI2_EVENT_HDMA2_QM:
case GAUDI2_EVENT_HDMA2_CORE:
return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
case GAUDI2_EVENT_HDMA3_BM_SPMU:
case GAUDI2_EVENT_HDMA3_QM:
case GAUDI2_EVENT_HDMA3_CORE:
return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
case GAUDI2_EVENT_HDMA4_BM_SPMU:
case GAUDI2_EVENT_HDMA4_QM:
case GAUDI2_EVENT_HDMA4_CORE:
return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
case GAUDI2_EVENT_HDMA5_BM_SPMU:
case GAUDI2_EVENT_HDMA5_QM:
case GAUDI2_EVENT_HDMA5_CORE:
return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
case GAUDI2_EVENT_HDMA6_BM_SPMU:
case GAUDI2_EVENT_HDMA6_QM:
case GAUDI2_EVENT_HDMA6_CORE:
return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
case GAUDI2_EVENT_HDMA7_BM_SPMU:
case GAUDI2_EVENT_HDMA7_QM:
case GAUDI2_EVENT_HDMA7_CORE:
return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
default:
break;
}
switch (type) {
case GAUDI2_BLOCK_TYPE_TPC:
switch (index) {
case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
default:
break;
}
break;
case GAUDI2_BLOCK_TYPE_MME:
switch (index) {
case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
default:
break;
}
break;
case GAUDI2_BLOCK_TYPE_DEC:
switch (index) {
case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
default:
break;
}
break;
default:
break;
}
return U16_MAX;
}
static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
{
hdev->eq_heartbeat_received = true;
}
static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
@ -9501,7 +9807,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
break;
case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
@ -9724,8 +10030,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
@ -9875,6 +10180,21 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
is_critical = true;
break;
case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
error_count = GAUDI2_NA_EVENT_CAUSE;
dev_info_ratelimited(hdev->dev, "%s event received\n",
gaudi2_irq_map_table[event_type].name);
break;
case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
hl_eq_heartbeat_event_handle(hdev);
error_count = GAUDI2_NA_EVENT_CAUSE;
break;
default:
if (gaudi2_irq_map_table[event_type].valid) {
dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
@ -9883,6 +10203,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
}
}
if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
/* Make sure to dump an error in case no error cause was printed so far.
* Note that although we have counted the errors, we use this number as
* a boolean.
@ -10523,6 +10846,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
{
int rc;
if (ctx->asid == HL_KERNEL_ASID_ID)
return 0;
rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
if (rc)
return rc;
@ -11014,6 +11340,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
static void gaudi2_get_msi_info(__le32 *table)
{
table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
}
static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
@ -11170,11 +11497,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.asic_dma_pool_free = gaudi2_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
.asic_dma_unmap_single = gaudi2_dma_unmap_single,
.asic_dma_map_single = gaudi2_dma_map_single,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = gaudi2_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable,
.dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = NULL,
.update_eq_ci = gaudi2_update_eq_ci,
.context_switch = gaudi2_context_switch,

View File

@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi2/gaudi2.h"
#include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h"
@ -84,6 +84,7 @@
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
@ -419,6 +420,7 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_EQ_ERROR,
GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,

View File

@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
[GAUDI2_STM_PSOC_ARC0_CS] = 0,
[GAUDI2_STM_PSOC_ARC1_CS] = 0,
[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
[GAUDI2_BMON_PSOC_ARC0_0] = 0,
[GAUDI2_BMON_PSOC_ARC0_1] = 0,
[GAUDI2_BMON_PSOC_ARC1_0] = 0,
[GAUDI2_BMON_PSOC_ARC1_1] = 0,
[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (rc)
return -EIO;
val = RREG32(base_reg + mmETF_CTL_OFFSET);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + mmETF_FFCR_OFFSET);
val |= 0x1000;
WREG32(base_reg + mmETF_FFCR_OFFSET, val);
@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (!input)
return -EINVAL;
val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
if (val) {
val = ffs(val);
WREG32(base_reg + mmETF_PSCR_OFFSET, val);
} else {
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
}
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
WREG32(base_reg + mmETF_CTL_OFFSET, 1);
} else {
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
if (rc)
return -EIO;
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);
@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
* set enabled events mask based on input->event_types_num
*/
event_mask = 0x80000000;
event_mask |= GENMASK(input->event_types_num, 0);
if (input->event_types_num)
event_mask |= GENMASK(input->event_types_num - 1, 0);
WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
} else {

View File

@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
mmDCORE0_TPC0_CFG_TPC_COUNT,
mmDCORE0_TPC0_CFG_TPC_ID,
mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
* - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
*
* If F/W security is not enabled:
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
*/
u64 lbw_range_min_short[] = {
mmNIC0_TX_AXUSER_BASE,
@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
mmNIC10_TX_AXUSER_BASE,
mmNIC11_TX_AXUSER_BASE,
mmPSOC_I2C_M0_BASE,
mmPSOC_EFUSE_BASE
mmPSOC_GPIO0_BASE
};
u64 lbw_range_max_short[] = {
mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
*/
static int gaudi2_init_protection_bits(struct hl_device *hdev)
{
u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 instance_offset;
int rc = 0;
@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
/* PSOC.
* Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
* protected by privileged RR.
* For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
* cores to raise events towards F/W.
*/
engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
user_regs_array = &engine_core_intr_reg;
user_regs_array_size = 1;
} else {
dev_err(hdev->dev,
"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
engine_core_intr_reg);
}
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
NULL, HL_PB_NA);
user_regs_array, user_regs_array_size);
if (!hdev->asic_prop.fw_security_enabled)
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,

View File

@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
}
if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
return 0;
@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
.asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = goya_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable,
.dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = goya_add_end_of_cb_packets,
.update_eq_ci = goya_update_eq_ci,
.context_switch = goya_context_switch,

View File

@ -9,8 +9,8 @@
#define GOYAP_H_
#include <uapi/drm/habanalabs_accel.h>
#include <linux/habanalabs/hl_boot_if.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include "../include/goya/goya_packets.h"
#include "../include/goya/goya.h"
#include "../include/goya/goya_async_events.h"

View File

@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x20);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + 0x304);
val |= 0x1000;
WREG32(base_reg + 0x304, val);
@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);

View File

@ -44,38 +44,6 @@ struct eq_nic_sei_event {
__u8 pad[6];
};
/*
* struct gaudi_nic_status - describes the status of a NIC port.
* @port: NIC port index.
* @bad_format_cnt: e.g. CRC.
* @responder_out_of_sequence_psn_cnt: e.g NAK.
* @high_ber_reinit_cnt: link reinit due to high BER.
* @correctable_err_cnt: e.g. bit-flip.
* @uncorrectable_err_cnt: e.g. MAC errors.
* @retraining_cnt: re-training counter.
* @up: is port up.
* @pcs_link: has PCS link.
* @phy_ready: is PHY ready.
* @auto_neg: is Autoneg enabled.
* @timeout_retransmission_cnt: timeout retransmission events
* @high_ber_cnt: high ber events
*/
struct gaudi_nic_status {
__u32 port;
__u32 bad_format_cnt;
__u32 responder_out_of_sequence_psn_cnt;
__u32 high_ber_reinit;
__u32 correctable_err_cnt;
__u32 uncorrectable_err_cnt;
__u32 retraining_cnt;
__u8 up;
__u8 pcs_link;
__u8 phy_ready;
__u8 auto_neg;
__u32 timeout_retransmission_cnt;
__u32 high_ber_cnt;
};
struct gaudi_cold_rst_data {
union {
struct {

View File

@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
GAUDI2_EVENT_SIZE,
};

View File

@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
.name = "" },
{ .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "PCIE_P2P_MSIX" },
{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
.name = "PCIE_DRAIN_COMPLETE" },
{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
.name = "TPC0_BMON_SPMU" },
@ -2673,6 +2673,20 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
.name = "FP32_NOT_SUPPORTED" },
{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
.name = "DEV_RESET_REQ" },
{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_BRK_ENTRY" },
{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_BRK_EXT" },
{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_RD_MODE0" },
{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_RD_MODE1" },
{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_RD_MODE2" },
{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_PWR_RD_MODE3" },
{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
.name = "ARC_EQ_HEARTBEAT" },
};
#endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */

View File

@ -2,7 +2,6 @@
# Copyright (C) 2023 Intel Corporation
intel_vpu-y := \
ivpu_debugfs.o \
ivpu_drv.o \
ivpu_fw.o \
ivpu_fw_log.o \
@ -16,4 +15,6 @@ intel_vpu-y := \
ivpu_mmu_context.o \
ivpu_pm.o
intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o

View File

@ -1,11 +0,0 @@
- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
- Implement support for BLOB IDs
- Add debugfs support to improve debugging and testing
- Add tracing events for performance debugging
- Implement HW based scheduling support
- Use syncobjs for submit/sync
- Refactor IPC protocol to improve message latency
- Implement BO cache and MADVISE IOCTL
- Add support for user allocated buffers using prime import and dma-buf heaps
- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
- Add driver/device documentation

View File

@ -17,20 +17,26 @@
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s)
{
struct drm_debugfs_entry *entry = s->private;
return to_ivpu_device(entry->dev);
}
static int bo_list_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct drm_printer p = drm_seq_file_printer(s);
struct ivpu_device *vdev = seq_to_ivpu(s);
ivpu_bo_list(node->minor->dev, &p);
ivpu_bo_list(&vdev->drm, &p);
return 0;
}
static int fw_name_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
seq_printf(s, "%s\n", vdev->fw->name);
return 0;
@ -38,8 +44,7 @@ static int fw_name_show(struct seq_file *s, void *v)
static int fw_trace_capability_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
u64 trace_hw_component_mask;
u32 trace_destination_mask;
int ret;
@ -57,8 +62,7 @@ static int fw_trace_capability_show(struct seq_file *s, void *v)
static int fw_trace_config_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
/**
* WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet,
* so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config()
@ -78,8 +82,7 @@ static int fw_trace_config_show(struct seq_file *s, void *v)
static int last_bootmode_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot");
@ -88,8 +91,7 @@ static int last_bootmode_show(struct seq_file *s, void *v)
static int reset_counter_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter));
return 0;
@ -97,14 +99,13 @@ static int reset_counter_show(struct seq_file *s, void *v)
static int reset_pending_show(struct seq_file *s, void *v)
{
struct drm_info_node *node = (struct drm_info_node *)s->private;
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
struct ivpu_device *vdev = seq_to_ivpu(s);
seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
return 0;
}
static const struct drm_info_list vdev_debugfs_list[] = {
static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"bo_list", bo_list_show, 0},
{"fw_name", fw_name_show, 0},
{"fw_trace_capability", fw_trace_capability_show, 0},
@ -270,25 +271,24 @@ static const struct file_operations ivpu_reset_engine_fops = {
.write = ivpu_reset_engine_fn,
};
void ivpu_debugfs_init(struct drm_minor *minor)
void ivpu_debugfs_init(struct ivpu_device *vdev)
{
struct ivpu_device *vdev = to_ivpu_device(minor->dev);
struct dentry *debugfs_root = vdev->drm.debugfs_root;
drm_debugfs_create_files(vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list),
minor->debugfs_root, minor);
drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list));
debugfs_create_file("force_recovery", 0200, minor->debugfs_root, vdev,
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
debugfs_create_file("fw_log", 0644, minor->debugfs_root, vdev,
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
&fw_log_fops);
debugfs_create_file("fw_trace_destination_mask", 0200, minor->debugfs_root, vdev,
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
&fw_trace_destination_mask_fops);
debugfs_create_file("fw_trace_hw_comp_mask", 0200, minor->debugfs_root, vdev,
debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev,
&fw_trace_hw_comp_mask_fops);
debugfs_create_file("fw_trace_level", 0200, minor->debugfs_root, vdev,
debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
&fw_trace_level_fops);
debugfs_create_file("reset_engine", 0200, minor->debugfs_root, vdev,
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
}

View File

@ -6,8 +6,12 @@
#ifndef __IVPU_DEBUGFS_H__
#define __IVPU_DEBUGFS_H__
struct drm_minor;
struct ivpu_device;
void ivpu_debugfs_init(struct drm_minor *minor);
#if defined(CONFIG_DEBUG_FS)
void ivpu_debugfs_init(struct ivpu_device *vdev);
#else
static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { }
#endif
#endif /* __IVPU_DEBUGFS_H__ */

View File

@ -131,6 +131,22 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
return 0;
}
static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
{
int ret;
ret = ivpu_rpm_get_if_active(vdev);
if (ret < 0)
return ret;
*clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
if (ret)
ivpu_rpm_put(vdev);
return 0;
}
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
@ -154,7 +170,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
args->value = ivpu_hw_reg_pll_freq_get(vdev);
ret = ivpu_get_core_clock_rate(vdev, &args->value);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@ -400,10 +416,6 @@ static const struct drm_driver driver = {
.postclose = ivpu_postclose,
.gem_prime_import = ivpu_gem_prime_import,
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = ivpu_debugfs_init,
#endif
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
.fops = &ivpu_fops,
@ -523,78 +535,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
ret = ivpu_pci_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
if (ret)
goto err_xa_destroy;
}
ret = ivpu_irq_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
if (ret)
goto err_xa_destroy;
}
/* Init basic HW info based on buttress registers which are accessible before power up */
ret = ivpu_hw_info_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
if (ret)
goto err_xa_destroy;
}
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
if (ret)
goto err_xa_destroy;
}
ret = ivpu_mmu_global_context_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
if (ret)
goto err_power_down;
}
ret = ivpu_mmu_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
if (ret)
goto err_mmu_gctx_fini;
ret = ivpu_mmu_reserved_context_init(vdev);
if (ret)
goto err_mmu_gctx_fini;
}
ret = ivpu_fw_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
goto err_mmu_gctx_fini;
}
if (ret)
goto err_mmu_rctx_fini;
ret = ivpu_ipc_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
if (ret)
goto err_fw_fini;
}
ret = ivpu_pm_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
goto err_ipc_fini;
}
ivpu_pm_init(vdev);
ret = ivpu_job_done_thread_init(vdev);
if (ret) {
ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
if (ret)
goto err_ipc_fini;
}
ret = ivpu_fw_load(vdev);
if (ret) {
ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
goto err_job_done_thread_fini;
}
ret = ivpu_boot(vdev);
if (ret) {
ivpu_err(vdev, "Failed to boot: %d\n", ret);
if (ret)
goto err_job_done_thread_fini;
}
ivpu_pm_enable(vdev);
@ -606,6 +592,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
ivpu_ipc_fini(vdev);
err_fw_fini:
ivpu_fw_fini(vdev);
err_mmu_rctx_fini:
ivpu_mmu_reserved_context_fini(vdev);
err_mmu_gctx_fini:
ivpu_mmu_global_context_fini(vdev);
err_power_down:
@ -629,6 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_ipc_fini(vdev);
ivpu_fw_fini(vdev);
ivpu_mmu_reserved_context_fini(vdev);
ivpu_mmu_global_context_fini(vdev);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
@ -657,10 +646,10 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, vdev);
ret = ivpu_dev_init(vdev);
if (ret) {
dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
if (ret)
return ret;
}
ivpu_debugfs_init(vdev);
ret = drm_dev_register(&vdev->drm, 0);
if (ret) {

View File

@ -29,12 +29,13 @@
#define IVPU_HW_37XX 37
#define IVPU_HW_40XX 40
#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
/* SSID 1 is used by the VPU to represent invalid context */
#define IVPU_USER_CONTEXT_MIN_SSID 2
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
/* SSID 1 is used by the VPU to represent reserved context */
#define IVPU_RESERVED_CONTEXT_MMU_SSID 1
#define IVPU_USER_CONTEXT_MIN_SSID 2
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
#define IVPU_NUM_ENGINES 2
#define IVPU_NUM_ENGINES 2
#define IVPU_PLATFORM_SILICON 0
#define IVPU_PLATFORM_SIMICS 2
@ -76,6 +77,11 @@
#define IVPU_WA(wa_name) (vdev->wa.wa_name)
#define IVPU_PRINT_WA(wa_name) do { \
if (IVPU_WA(wa_name)) \
ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n"); \
} while (0)
struct ivpu_wa_table {
bool punit_disabled;
bool clear_runtime_mem;
@ -105,6 +111,7 @@ struct ivpu_device {
struct ivpu_pm_info *pm;
struct ivpu_mmu_context gctx;
struct ivpu_mmu_context rctx;
struct xarray context_xa;
struct xa_limit context_xa_limit;
@ -118,6 +125,7 @@ struct ivpu_device {
int jsm;
int tdr;
int reschedule_suspend;
int autosuspend;
} timeout;
};

View File

@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
if (ret)
goto err_fw_release;
ivpu_fw_load(vdev);
return 0;
err_fw_release:
@ -314,25 +316,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev)
ivpu_fw_release(vdev);
}
int ivpu_fw_load(struct ivpu_device *vdev)
void ivpu_fw_load(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
u64 image_end_offset = fw->image_load_offset + fw->image_size;
memset(fw->mem->kvaddr, 0, fw->image_load_offset);
memcpy(fw->mem->kvaddr + fw->image_load_offset,
memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset);
memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset,
fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);
if (IVPU_WA(clear_runtime_mem)) {
u8 *start = fw->mem->kvaddr + image_end_offset;
u64 size = fw->mem->base.size - image_end_offset;
u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset;
u64 size = ivpu_bo_size(fw->mem) - image_end_offset;
memset(start, 0, size);
}
wmb(); /* Flush WC buffers after writing fw->mem */
return 0;
}
static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@ -451,10 +451,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
vdev->hw->ranges.global.start;
boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
@ -486,9 +486,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->trace_destination_mask = vdev->fw->trace_destination_mask;
boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask;
boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr;
boot_params->crit_tracing_buff_size = vdev->fw->mem_log_crit->base.size;
boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit);
boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr;
boot_params->verbose_tracing_buff_size = vdev->fw->mem_log_verb->base.size;
boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb);
boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);

View File

@ -31,7 +31,7 @@ struct ivpu_fw_info {
int ivpu_fw_init(struct ivpu_device *vdev);
void ivpu_fw_fini(struct ivpu_device *vdev);
int ivpu_fw_load(struct ivpu_device *vdev);
void ivpu_fw_load(struct ivpu_device *vdev);
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)

View File

@ -31,10 +31,10 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
{
struct vpu_tracing_buffer_header *log;
if ((*offset + sizeof(*log)) > bo->base.size)
if ((*offset + sizeof(*log)) > ivpu_bo_size(bo))
return -EINVAL;
log = bo->kvaddr + *offset;
log = ivpu_bo_vaddr(bo) + *offset;
if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY)
return -EINVAL;
@ -43,7 +43,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size);
return -EINVAL;
}
if ((char *)log + log->size > (char *)bo->kvaddr + bo->base.size) {
if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) {
ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size);
return -EINVAL;
}

View File

@ -69,7 +69,7 @@ static const struct ivpu_bo_ops prime_ops = {
static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
{
int npages = bo->base.size >> PAGE_SHIFT;
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct page **pages;
pages = drm_gem_get_pages(&bo->base);
@ -88,7 +88,7 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
static void shmem_free_pages_locked(struct ivpu_bo *bo)
{
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
drm_gem_put_pages(&bo->base, bo->pages, true, false);
bo->pages = NULL;
@ -96,7 +96,7 @@ static void shmem_free_pages_locked(struct ivpu_bo *bo)
static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
{
int npages = bo->base.size >> PAGE_SHIFT;
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
struct sg_table *sgt;
int ret;
@ -142,7 +142,7 @@ static const struct ivpu_bo_ops shmem_ops = {
static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
{
unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
struct page **pages;
int ret;
@ -171,10 +171,10 @@ static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
static void internal_free_pages_locked(struct ivpu_bo *bo)
{
unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
for (i = 0; i < npages; i++)
put_page(bo->pages[i]);
@ -291,7 +291,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
}
mutex_lock(&ctx->lock);
ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
bo->vpu_addr = bo->mm_node.start;
@ -438,7 +438,7 @@ static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
if (obj->import_attach) {
/* Drop the reference drm_gem_mmap_obj() acquired.*/
@ -553,7 +553,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
drm_gem_object_put(&bo->base);
ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
return ret;
}
@ -590,22 +590,22 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
goto err_put;
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
if (bo->flags & DRM_IVPU_BO_WC)
set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
else if (bo->flags & DRM_IVPU_BO_UNCACHED)
set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
if (!bo->kvaddr) {
ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
goto err_put;
}
ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
bo->vpu_addr, bo->base.size, flags);
bo->vpu_addr, ivpu_bo_size(bo), flags);
return bo;
@ -718,7 +718,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
}

Some files were not shown because too many files have changed in this diff Show More