mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-07 13:53:24 +00:00
drm for 6.7-rc1
kernel: - add initial vmemdup-user-array core: - fix platform remove() to return void - drm_file owner updated to reflect owner - move size calcs to drm buddy allocator - let GPUVM build as a module - allow variable number of run-queues in scheduler edid: - handle bad h/v sync_end in EDIDs panfrost: - add Boris as maintainer fbdev: - use fb_ops helpers more - only allow logo use from fbcon - rename fb_pgproto to pgprot_framebuffer - add HPD state to drm_connector_oob_hotplug_event - convert to fbdev i/o mem helpers i915: - Enable meteorlake by default - Early Xe2 LPD/Lunarlake display enablement - Rework subplatforms into IP version checks - GuC based TLB invalidation for Meteorlake - Display rework for future Xe driver integration - LNL FBC features - LNL display feature capability reads - update recommended fw versions for DG2+ - drop fastboot module parameter - added deviceid for Arrowlake-S - drop preproduction workarounds - don't disable preemption for resets - cleanup inlines in headers - PXP firmware loading fix - Fix sg list lengths - DSC PPS state readout/verification - Add more RPL P/U PCI IDs - Add new DG2-G12 stepping - DP enhanced framing support to state checker - Improve shared link bandwidth management - stop using GEM macros in display code - refactor related code into display code - locally enable W=1 warnings - remove PSR watchdog timers on LNL amdgpu: - RAS/FRU EEPROM updatse - IP discovery updatses - GC 11.5 support - DCN 3.5 support - VPE 6.1 support - NBIO 7.11 support - DML2 support - lots of IP updates - use flexible arrays for bo list handling - W=1 fixes - Enable seamless boot in more cases - Enable context type property for HDMI - Rework GPUVM TLB flushing - VCN IB start/size alignment fixes amdkfd: - GC 10/11 fixes - GC 11.5 support - use partial migration in GPU faults radeon: - W=1 Fixes - fix some possible buffer overflow/NULL derefs nouveau: - update uapi for NO_PREFETCH - scheduler/fence fixes - rework suspend/resume for GSP-RM - rework display in preparation for GSP-RM habanalabs: - uapi: expose tsc clock - uapi: block access to eventfd through control device - uapi: force dma-buf export to PAGE_SIZE alignments - complete move to accel subsystem - move firmware interface include files - perform hard reset on PCIe AXI drain event - optimise user interrupt handling msm: - DP: use existing helpers for DPCD - DPU: interrupts reworked - gpu: a7xx (a730/a740) support - decouple msm_drv from kms for headless devices mediatek: - MT8188 dsi/dp/edp support - DDP GAMMA - 12 bit LUT support - connector dynamic selection capability rockchip: - rv1126 mipi-dsi/vop support - add planar formats ast: - rename constants panels: - Mitsubishi AA084XE01 - JDI LPM102A188A - LTK050H3148W-CTA6 ivpu: - power management fixes qaic: - add detach slice bo api komeda: - add NV12 writeback tegra: - support NVSYNC/NHSYNC - host1x suspend fixes ili9882t: - separate into own driver -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmVAgzYACgkQDHTzWXnE hr7ZEQ//UXne3tyGOsU3X8r+lstLFDMa90a3hvTg6hX+Q0MjHd/clwkKFkLpkipL n7gIZlaHl11dRs0FzrIZA5EVAAgjMLKmIl10NBDFec6ZFA3VERcggx8y61uifI15 VviMR1VbLHYZaCdyrQOK0A4wcktWnKXyoXp7cwy9crdc2GOBMUZkdIqtvD7jHxQx UMIFnzi1CyKUX/Fjt/JceYcNk9y2ZGkzakYO3sHcUdv4DPu9qX4kNzpjF691AZBP UeKWvCswTRVg2M0kuo/RYIBzqaTmOlk6dHLWBognIeZPyuyhCcaGC2d64c6tShwQ dtHdi+IgyQ8s2qb350ymKTQUP7xA/DfZBwH7LvrZALBxeQGYQN1CnsgDMOS2wcUc XrRFiS7PxEOtMMBctcPBnnoV5ttnsLLlPpzM9puh9sUFMn6CgLzcAMqXdqxzMajH +dz2aD1N0vMqq4varozOg9SC2QamgUiPN/TQfrulhCTCfQaXczy5x1OYiIz65+Sl mKoe2WASuP9Ve8do4N/wEwH5SZY2ItipBdUTRxttY9NTanmV0X5DjZBXH5b9XGci Zl5Ar613f9zwm5T5BVA5k6s3ZbGY6QcP5pDNTCPaSgitfFXIdReBZ2CaYzK3MPg/ Wit/TXrud9yT6VPpI1igboMyasf5QubV1MY1K83kOCWr9u8R2CM= =l79u -----END PGP SIGNATURE----- Merge tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "Highlights: - AMD adds some more upcoming HW platforms - Intel made Meteorlake stable and started adding Lunarlake - nouveau has a bunch of display rework in prepartion for the NVIDIA GSP firmware support - msm adds a7xx support - habanalabs has finished migration to accel subsystem Detail summary: kernel: - add initial vmemdup-user-array core: - fix platform remove() to return void - drm_file owner updated to reflect owner - move size calcs to drm buddy allocator - let GPUVM build as a module - allow variable number of run-queues in scheduler edid: - handle bad h/v sync_end in EDIDs panfrost: - add Boris as maintainer fbdev: - use fb_ops helpers more - only allow logo use from fbcon - rename fb_pgproto to pgprot_framebuffer - add HPD state to drm_connector_oob_hotplug_event - convert to fbdev i/o mem helpers i915: - Enable meteorlake by default - Early Xe2 LPD/Lunarlake display enablement - Rework subplatforms into IP version checks - GuC based TLB invalidation for Meteorlake - Display rework for future Xe driver integration - LNL FBC features - LNL display feature capability reads - update recommended fw versions for DG2+ - drop fastboot module parameter - added deviceid for Arrowlake-S - drop preproduction workarounds - don't disable preemption for resets - cleanup inlines in headers - PXP firmware loading fix - Fix sg list lengths - DSC PPS state readout/verification - Add more RPL P/U PCI IDs - Add new DG2-G12 stepping - DP enhanced framing support to state checker - Improve shared link bandwidth management - stop using GEM macros in display code - refactor related code into display code - locally enable W=1 warnings - remove PSR watchdog timers on LNL amdgpu: - RAS/FRU EEPROM updatse - IP discovery updatses - GC 11.5 support - DCN 3.5 support - VPE 6.1 support - NBIO 7.11 support - DML2 support - lots of IP updates - use flexible arrays for bo list handling - W=1 fixes - Enable seamless boot in more cases - Enable context type property for HDMI - Rework GPUVM TLB flushing - VCN IB start/size alignment fixes amdkfd: - GC 10/11 fixes - GC 11.5 support - use partial migration in GPU faults radeon: - W=1 Fixes - fix some possible buffer overflow/NULL derefs nouveau: - update uapi for NO_PREFETCH - scheduler/fence fixes - rework suspend/resume for GSP-RM - rework display in preparation for GSP-RM habanalabs: - uapi: expose tsc clock - uapi: block access to eventfd through control device - uapi: force dma-buf export to PAGE_SIZE alignments - complete move to accel subsystem - move firmware interface include files - perform hard reset on PCIe AXI drain event - optimise user interrupt handling msm: - DP: use existing helpers for DPCD - DPU: interrupts reworked - gpu: a7xx (a730/a740) support - decouple msm_drv from kms for headless devices mediatek: - MT8188 dsi/dp/edp support - DDP GAMMA - 12 bit LUT support - connector dynamic selection capability rockchip: - rv1126 mipi-dsi/vop support - add planar formats ast: - rename constants panels: - Mitsubishi AA084XE01 - JDI LPM102A188A - LTK050H3148W-CTA6 ivpu: - power management fixes qaic: - add detach slice bo api komeda: - add NV12 writeback tegra: - support NVSYNC/NHSYNC - host1x suspend fixes ili9882t: - separate into own driver" * tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm: (1803 commits) drm/amdgpu: Remove unused variables from amdgpu_show_fdinfo drm/amdgpu: Remove duplicate fdinfo fields drm/amd/amdgpu: avoid to disable gfxhub interrupt when driver is unloaded drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table drm/amdgpu: Identify data parity error corrected in replay mode drm/amdgpu: Fix typo in IP discovery parsing drm/amd/display: fix S/G display enablement drm/amdxcp: fix amdxcp unloads incompletely drm/amd/amdgpu: fix the GPU power print error in pm info drm/amdgpu: Use pcie domain of xcc acpi objects drm/amd: check num of link levels when update pcie param drm/amdgpu: Add a read to GFX v9.4.3 ring test drm/amd/pm: call smu_cmn_get_smc_version in is_mode1_reset_supported. drm/amdgpu: get RAS poison status from DF v4_6_2 drm/amdgpu: Use discovery table's subrevision drm/amd/display: 3.2.256 drm/amd/display: add interface to query SubVP status drm/amd/display: Read before writing Backlight Mode Set Register drm/amd/display: Disable SYMCLK32_SE RCO on DCN314 ...
This commit is contained in:
commit
7d461b291e
@ -1,4 +1,4 @@
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/addr
|
||||
What: /sys/kernel/debug/accel/<n>/addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -8,34 +8,34 @@ Description: Sets the device address to be used for read or write through
|
||||
only when the IOMMU is disabled.
|
||||
The acceptable value is a string that starts with "0x"
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
|
||||
What: /sys/kernel/debug/accel/<n>/clk_gate
|
||||
Date: May 2020
|
||||
KernelVersion: 5.8
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: This setting is now deprecated as clock gating is handled solely by the f/w
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||
What: /sys/kernel/debug/accel/<n>/command_buffers
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays a list with information about the currently allocated
|
||||
command buffers
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission
|
||||
What: /sys/kernel/debug/accel/<n>/command_submission
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays a list with information about the currently active
|
||||
command submissions
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
|
||||
What: /sys/kernel/debug/accel/<n>/command_submission_jobs
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays a list with detailed information about each JOB (CB) of
|
||||
each active command submission
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/data32
|
||||
What: /sys/kernel/debug/accel/<n>/data32
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -50,7 +50,7 @@ Description: Allows the root user to read or write directly through the
|
||||
If the IOMMU is disabled, it also allows the root user to read
|
||||
or write from the host a device VA of a host mapped memory
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/data64
|
||||
What: /sys/kernel/debug/accel/<n>/data64
|
||||
Date: Jan 2020
|
||||
KernelVersion: 5.6
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -65,7 +65,7 @@ Description: Allows the root user to read or write 64 bit data directly
|
||||
If the IOMMU is disabled, it also allows the root user to read
|
||||
or write from the host a device VA of a host mapped memory
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/data_dma
|
||||
What: /sys/kernel/debug/accel/<n>/data_dma
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -79,11 +79,11 @@ Description: Allows the root user to read from the device's internal
|
||||
a very long time.
|
||||
This interface doesn't support concurrency in the same device.
|
||||
In GAUDI and GOYA, this action can cause undefined behavior
|
||||
in case the it is done while the device is executing user
|
||||
in case it is done while the device is executing user
|
||||
workloads.
|
||||
Only supported on GAUDI at this stage.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/device
|
||||
What: /sys/kernel/debug/accel/<n>/device
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -91,14 +91,14 @@ Description: Enables the root user to set the device to specific state.
|
||||
Valid values are "disable", "enable", "suspend", "resume".
|
||||
User can read this property to see the valid values
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
|
||||
What: /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
|
||||
Date: Oct 2022
|
||||
KernelVersion: 6.2
|
||||
Contact: ttayar@habana.ai
|
||||
Description: The watchdog timeout value in seconds for a device release upon
|
||||
certain error cases, after which the device is reset.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/dma_size
|
||||
What: /sys/kernel/debug/accel/<n>/dma_size
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -108,7 +108,7 @@ Description: Specify the size of the DMA transaction when using DMA to read
|
||||
When the write is finished, the user can read the "data_dma"
|
||||
blob
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
|
||||
What: /sys/kernel/debug/accel/<n>/dump_razwi_events
|
||||
Date: Aug 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: fkassabri@habana.ai
|
||||
@ -117,7 +117,7 @@ Description: Dumps all razwi events to dmesg if exist.
|
||||
the routine will clear the status register.
|
||||
Usage: cat dump_razwi_events
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
|
||||
What: /sys/kernel/debug/accel/<n>/dump_security_violations
|
||||
Date: Jan 2021
|
||||
KernelVersion: 5.12
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -125,14 +125,14 @@ Description: Dumps all security violations to dmesg. This will also ack
|
||||
all security violations meanings those violations will not be
|
||||
dumped next time user calls this API
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/engines
|
||||
What: /sys/kernel/debug/accel/<n>/engines
|
||||
Date: Jul 2019
|
||||
KernelVersion: 5.3
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the status registers values of the device engines and
|
||||
their derived idle status
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
|
||||
What: /sys/kernel/debug/accel/<n>/i2c_addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -140,7 +140,7 @@ Description: Sets I2C device address for I2C transaction that is generated
|
||||
by the device's CPU, Not available when device is loaded with secured
|
||||
firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
|
||||
What: /sys/kernel/debug/accel/<n>/i2c_bus
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -148,7 +148,7 @@ Description: Sets I2C bus address for I2C transaction that is generated by
|
||||
the device's CPU, Not available when device is loaded with secured
|
||||
firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
|
||||
What: /sys/kernel/debug/accel/<n>/i2c_data
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -157,7 +157,7 @@ Description: Triggers an I2C transaction that is generated by the device's
|
||||
reading from the file generates a read transaction, Not available
|
||||
when device is loaded with secured firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_len
|
||||
What: /sys/kernel/debug/accel/<n>/i2c_len
|
||||
Date: Dec 2021
|
||||
KernelVersion: 5.17
|
||||
Contact: obitton@habana.ai
|
||||
@ -165,7 +165,7 @@ Description: Sets I2C length in bytes for I2C transaction that is generated b
|
||||
the device's CPU, Not available when device is loaded with secured
|
||||
firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
|
||||
What: /sys/kernel/debug/accel/<n>/i2c_reg
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -173,35 +173,35 @@ Description: Sets I2C register id for I2C transaction that is generated by
|
||||
the device's CPU, Not available when device is loaded with secured
|
||||
firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led0
|
||||
What: /sys/kernel/debug/accel/<n>/led0
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the state of the first S/W led on the device, Not available
|
||||
when device is loaded with secured firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led1
|
||||
What: /sys/kernel/debug/accel/<n>/led1
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the state of the second S/W led on the device, Not available
|
||||
when device is loaded with secured firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/led2
|
||||
What: /sys/kernel/debug/accel/<n>/led2
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the state of the third S/W led on the device, Not available
|
||||
when device is loaded with secured firmware
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
|
||||
What: /sys/kernel/debug/accel/<n>/memory_scrub
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: dhirschfeld@habana.ai
|
||||
Description: Allows the root user to scrub the dram memory. The scrubbing
|
||||
value can be set using the debugfs file memory_scrub_val.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub_val
|
||||
What: /sys/kernel/debug/accel/<n>/memory_scrub_val
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: dhirschfeld@habana.ai
|
||||
@ -209,7 +209,7 @@ Description: The value to which the dram will be set to when the user
|
||||
scrubs the dram using 'memory_scrub' debugfs file and
|
||||
the scrubbing value when using module param 'memory_scrub'
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
|
||||
What: /sys/kernel/debug/accel/<n>/mmu
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -217,19 +217,19 @@ Description: Displays the hop values and physical address for a given ASID
|
||||
and virtual address. The user should write the ASID and VA into
|
||||
the file and then read the file to get the result.
|
||||
e.g. to display info about VA 0x1000 for ASID 1 you need to do:
|
||||
echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
|
||||
echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/mmu_error
|
||||
What: /sys/kernel/debug/accel/<n>/mmu_error
|
||||
Date: Mar 2021
|
||||
KernelVersion: 5.12
|
||||
Contact: fkassabri@habana.ai
|
||||
Description: Check and display page fault or access violation mmu errors for
|
||||
all MMUs specified in mmu_cap_mask.
|
||||
e.g. to display error info for MMU hw cap bit 9, you need to do:
|
||||
echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
|
||||
cat /sys/kernel/debug/habanalabs/hl0/mmu_error
|
||||
echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
|
||||
cat /sys/kernel/debug/accel/0/mmu_error
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
|
||||
What: /sys/kernel/debug/accel/<n>/monitor_dump
|
||||
Date: Mar 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: osharabi@habana.ai
|
||||
@ -243,7 +243,7 @@ Description: Allows the root user to dump monitors status from the device's
|
||||
This interface doesn't support concurrency in the same device.
|
||||
Only supported on GAUDI.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
|
||||
What: /sys/kernel/debug/accel/<n>/monitor_dump_trig
|
||||
Date: Mar 2022
|
||||
KernelVersion: 5.19
|
||||
Contact: osharabi@habana.ai
|
||||
@ -253,14 +253,14 @@ Description: Triggers dump of monitor data. The value to trigger the operatio
|
||||
When the write is finished, the user can read the "monitor_dump"
|
||||
blob
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/set_power_state
|
||||
What: /sys/kernel/debug/accel/<n>/set_power_state
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the PCI power state. Valid values are "1" for D0 and "2"
|
||||
for D3Hot
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
|
||||
What: /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
|
||||
Date: Jun 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: ynudelman@habana.ai
|
||||
@ -268,7 +268,7 @@ Description: Sets the skip reset on timeout option for the device. Value of
|
||||
"0" means device will be reset in case some CS has timed out,
|
||||
otherwise it will not be reset.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/state_dump
|
||||
What: /sys/kernel/debug/accel/<n>/state_dump
|
||||
Date: Oct 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: ynudelman@habana.ai
|
||||
@ -279,7 +279,7 @@ Description: Gets the state dump occurring on a CS timeout or failure.
|
||||
Writing an integer X discards X state dumps, so that the
|
||||
next read would return X+1-st newest state dump.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
|
||||
What: /sys/kernel/debug/accel/<n>/stop_on_err
|
||||
Date: Mar 2020
|
||||
KernelVersion: 5.6
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -287,21 +287,21 @@ Description: Sets the stop-on_error option for the device engines. Value of
|
||||
"0" is for disable, otherwise enable.
|
||||
Relevant only for GOYA and GAUDI.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||
What: /sys/kernel/debug/accel/<n>/timeout_locked
|
||||
Date: Sep 2021
|
||||
KernelVersion: 5.16
|
||||
Contact: obitton@habana.ai
|
||||
Description: Sets the command submission timeout value in seconds.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/userptr
|
||||
What: /sys/kernel/debug/accel/<n>/userptr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays a list with information about the currently user
|
||||
Description: Displays a list with information about the current user
|
||||
pointers (user virtual addresses) that are pinned and mapped
|
||||
to DMA addresses
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
|
||||
What: /sys/kernel/debug/accel/<n>/userptr_lookup
|
||||
Date: Oct 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -309,7 +309,7 @@ Description: Allows to search for specific user pointers (user virtual
|
||||
addresses) that are pinned and mapped to DMA addresses, and see
|
||||
their resolution to the specific dma address.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/vm
|
||||
What: /sys/kernel/debug/accel/<n>/vm
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
|
@ -1,4 +1,4 @@
|
||||
What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
|
||||
What: /sys/class/accel/accel<n>/device/armcp_kernel_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -6,7 +6,7 @@ Description: Version of the Linux kernel running on the device's CPU.
|
||||
Will be DEPRECATED in Linux kernel version 5.10, and be
|
||||
replaced with cpucp_kernel_ver
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/armcp_ver
|
||||
What: /sys/class/accel/accel<n>/device/armcp_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -14,7 +14,7 @@ Description: Version of the application running on the device's CPU
|
||||
Will be DEPRECATED in Linux kernel version 5.10, and be
|
||||
replaced with cpucp_ver
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
|
||||
What: /sys/class/accel/accel<n>/device/clk_max_freq_mhz
|
||||
Date: Jun 2019
|
||||
KernelVersion: 5.7
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -24,58 +24,58 @@ Description: Allows the user to set the maximum clock frequency, in MHz.
|
||||
frequency value of the device clock. This property is valid
|
||||
only for the Gaudi ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
|
||||
What: /sys/class/accel/accel<n>/device/clk_cur_freq_mhz
|
||||
Date: Jun 2019
|
||||
KernelVersion: 5.7
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the current frequency, in MHz, of the device clock.
|
||||
This property is valid only for the Gaudi ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpld_ver
|
||||
What: /sys/class/accel/accel<n>/device/cpld_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's CPLD F/W
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
|
||||
What: /sys/class/accel/accel<n>/device/cpucp_kernel_ver
|
||||
Date: Oct 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Linux kernel running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/cpucp_ver
|
||||
What: /sys/class/accel/accel<n>/device/cpucp_ver
|
||||
Date: Oct 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the application running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/device_type
|
||||
What: /sys/class/accel/accel<n>/device/device_type
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the code name of the device according to its type.
|
||||
The supported values are: "GOYA"
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/eeprom
|
||||
What: /sys/class/accel/accel<n>/device/eeprom
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: A binary file attribute that contains the contents of the
|
||||
on-board EEPROM
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/fuse_ver
|
||||
What: /sys/class/accel/accel<n>/device/fuse_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the device's version from the eFuse
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/fw_os_ver
|
||||
What: /sys/class/accel/accel<n>/device/fw_os_ver
|
||||
Date: Dec 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the firmware OS running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset
|
||||
What: /sys/class/accel/accel<n>/device/hard_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -83,14 +83,14 @@ Description: Interface to trigger a hard-reset operation for the device.
|
||||
Hard-reset will reset ALL internal components of the device
|
||||
except for the PCI interface and the internal PLLs
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset_cnt
|
||||
What: /sys/class/accel/accel<n>/device/hard_reset_cnt
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays how many times the device have undergone a hard-reset
|
||||
operation since the driver was loaded
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/high_pll
|
||||
What: /sys/class/accel/accel<n>/device/high_pll
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -98,7 +98,7 @@ Description: Allows the user to set the maximum clock frequency for MME, TPC
|
||||
and IC when the power management profile is set to "automatic".
|
||||
This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/ic_clk
|
||||
What: /sys/class/accel/accel<n>/device/ic_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -110,27 +110,27 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
frequency value of the IC. This property is valid only for the
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/ic_clk_curr
|
||||
What: /sys/class/accel/accel<n>/device/ic_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the current clock frequency, in Hz, of the Interconnect
|
||||
fabric. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/infineon_ver
|
||||
What: /sys/class/accel/accel<n>/device/infineon_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/max_power
|
||||
What: /sys/class/accel/accel<n>/device/max_power
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Allows the user to set the maximum power consumption of the
|
||||
device in milliwatts.
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/mme_clk
|
||||
What: /sys/class/accel/accel<n>/device/mme_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -142,21 +142,21 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
frequency value of the MME. This property is valid only for the
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/mme_clk_curr
|
||||
What: /sys/class/accel/accel<n>/device/mme_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the current clock frequency, in Hz, of the MME compute
|
||||
engine. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/pci_addr
|
||||
What: /sys/class/accel/accel<n>/device/pci_addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the PCI address of the device. This is needed so the
|
||||
user would be able to open a device based on its PCI address
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/pm_mng_profile
|
||||
What: /sys/class/accel/accel<n>/device/pm_mng_profile
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -170,19 +170,19 @@ Description: Power management profile. Values are "auto", "manual". In "auto"
|
||||
ic_clk, mme_clk and tpc_clk. This property is valid only for
|
||||
the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/preboot_btl_ver
|
||||
What: /sys/class/accel/accel<n>/device/preboot_btl_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the device's preboot F/W code
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/security_enabled
|
||||
What: /sys/class/accel/accel<n>/device/security_enabled
|
||||
Date: Oct 2022
|
||||
KernelVersion: 6.1
|
||||
Contact: obitton@habana.ai
|
||||
Description: Displays the device's security status
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/soft_reset
|
||||
What: /sys/class/accel/accel<n>/device/soft_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -190,14 +190,14 @@ Description: Interface to trigger a soft-reset operation for the device.
|
||||
Soft-reset will reset only the compute and DMA engines of the
|
||||
device
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/soft_reset_cnt
|
||||
What: /sys/class/accel/accel<n>/device/soft_reset_cnt
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays how many times the device have undergone a soft-reset
|
||||
operation since the driver was loaded
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/status
|
||||
What: /sys/class/accel/accel<n>/device/status
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -215,13 +215,13 @@ Description: Status of the card:
|
||||
a compute-reset which is executed after a device release
|
||||
(relevant for Gaudi2 only).
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/thermal_ver
|
||||
What: /sys/class/accel/accel<n>/device/thermal_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's thermal daemon
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/tpc_clk
|
||||
What: /sys/class/accel/accel<n>/device/tpc_clk
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
@ -233,20 +233,20 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
frequency value of the TPC. This property is valid only for
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/tpc_clk_curr
|
||||
What: /sys/class/accel/accel<n>/device/tpc_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the current clock frequency, in Hz, of the TPC compute
|
||||
engines. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/uboot_ver
|
||||
What: /sys/class/accel/accel<n>/device/uboot_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the u-boot running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/vrm_ver
|
||||
What: /sys/class/accel/accel<n>/device/vrm_ver
|
||||
Date: Jan 2022
|
||||
KernelVersion: 5.17
|
||||
Contact: ogabbay@kernel.org
|
||||
|
@ -123,6 +123,16 @@ DRM_IOCTL_QAIC_PART_DEV
|
||||
AIC100 device and can be used for limiting a process to some subset of
|
||||
resources.
|
||||
|
||||
DRM_IOCTL_QAIC_DETACH_SLICE_BO
|
||||
This IOCTL allows userspace to remove the slicing information from a BO that
|
||||
was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
|
||||
is the inverse of DRM_IOCTL_QAIC_ATTACH_SLICE_BO. The BO must be idle for
|
||||
DRM_IOCTL_QAIC_DETACH_SLICE_BO to be called. After a successful detach slice
|
||||
operation the BO may have new slicing information attached with a new call
|
||||
to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. After detach slice, the BO cannot be
|
||||
executed until after a new attach slice operation. Combining attach slice
|
||||
and detach slice calls allows userspace to use a BO with multiple workloads.
|
||||
|
||||
Userspace Client Isolation
|
||||
==========================
|
||||
|
||||
|
@ -17,6 +17,7 @@ properties:
|
||||
- analogix,anx7808
|
||||
- analogix,anx7812
|
||||
- analogix,anx7814
|
||||
- analogix,anx7816
|
||||
- analogix,anx7818
|
||||
|
||||
reg:
|
||||
|
@ -0,0 +1,115 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/bridge/fsl,imx93-mipi-dsi.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Freescale i.MX93 specific extensions to Synopsys Designware MIPI DSI
|
||||
|
||||
maintainers:
|
||||
- Liu Ying <victor.liu@nxp.com>
|
||||
|
||||
description: |
|
||||
There is a Synopsys Designware MIPI DSI Host Controller and a Synopsys
|
||||
Designware MIPI DPHY embedded in Freescale i.MX93 SoC. Some configurations
|
||||
and extensions to them are controlled by i.MX93 media blk-ctrl.
|
||||
|
||||
allOf:
|
||||
- $ref: snps,dw-mipi-dsi.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: fsl,imx93-mipi-dsi
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: apb clock
|
||||
- description: pixel clock
|
||||
- description: PHY configuration clock
|
||||
- description: PHY reference clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: pclk
|
||||
- const: pix
|
||||
- const: phy_cfg
|
||||
- const: phy_ref
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
fsl,media-blk-ctrl:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
i.MX93 media blk-ctrl, as a syscon, controls pixel component bit map
|
||||
configurations from LCDIF display controller to the MIPI DSI host
|
||||
controller and MIPI DPHY PLL related configurations through PLL SoC
|
||||
interface.
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- interrupts
|
||||
- fsl,media-blk-ctrl
|
||||
- power-domains
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/imx93-clock.h>
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/fsl,imx93-power.h>
|
||||
|
||||
dsi@4ae10000 {
|
||||
compatible = "fsl,imx93-mipi-dsi";
|
||||
reg = <0x4ae10000 0x10000>;
|
||||
interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&clk IMX93_CLK_MIPI_DSI_GATE>,
|
||||
<&clk IMX93_CLK_MEDIA_DISP_PIX>,
|
||||
<&clk IMX93_CLK_MIPI_PHY_CFG>,
|
||||
<&clk IMX93_CLK_24M>;
|
||||
clock-names = "pclk", "pix", "phy_cfg", "phy_ref";
|
||||
fsl,media-blk-ctrl = <&media_blk_ctrl>;
|
||||
power-domains = <&media_blk_ctrl IMX93_MEDIABLK_PD_MIPI_DSI>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
panel@0 {
|
||||
compatible = "raydium,rm67191";
|
||||
reg = <0>;
|
||||
reset-gpios = <&adp5585gpio 6 GPIO_ACTIVE_LOW>;
|
||||
dsi-lanes = <4>;
|
||||
video-mode = <2>;
|
||||
|
||||
port {
|
||||
panel_in: endpoint {
|
||||
remote-endpoint = <&dsi_out>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
|
||||
dsi_to_lcdif: endpoint {
|
||||
remote-endpoint = <&lcdif_to_dsi>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
|
||||
dsi_out: endpoint {
|
||||
remote-endpoint = <&panel_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
@ -0,0 +1,84 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/lvds-data-mapping.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: LVDS Data Mapping
|
||||
|
||||
maintainers:
|
||||
- Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
|
||||
- Thierry Reding <thierry.reding@gmail.com>
|
||||
|
||||
description: |
|
||||
LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
|
||||
incompatible data link layers have been used over time to transmit image data
|
||||
to LVDS devices. This bindings supports devices compatible with the following
|
||||
specifications.
|
||||
|
||||
[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
|
||||
1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
|
||||
[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
|
||||
Semiconductor
|
||||
[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
|
||||
Electronics Standards Association (VESA)
|
||||
|
||||
Device compatible with those specifications have been marketed under the
|
||||
FPD-Link and FlatLink brands.
|
||||
|
||||
properties:
|
||||
data-mapping:
|
||||
enum:
|
||||
- jeida-18
|
||||
- jeida-24
|
||||
- vesa-24
|
||||
description: |
|
||||
The color signals mapping order.
|
||||
|
||||
LVDS data mappings are defined as follows.
|
||||
|
||||
- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
|
||||
[VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
|
||||
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
|
||||
|
||||
- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
|
||||
specifications. Data are transferred as follows on 4 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
|
||||
DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
|
||||
DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
|
||||
|
||||
- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
|
||||
Data are transferred as follows on 4 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
|
||||
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
|
||||
DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
|
||||
|
||||
Control signals are mapped as follows.
|
||||
|
||||
CTL0: HSync
|
||||
CTL1: VSync
|
||||
CTL2: Data Enable
|
||||
CTL3: 0
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
@ -6,83 +6,24 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: LVDS Display Common Properties
|
||||
|
||||
allOf:
|
||||
- $ref: lvds-data-mapping.yaml#
|
||||
|
||||
maintainers:
|
||||
- Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
|
||||
- Thierry Reding <thierry.reding@gmail.com>
|
||||
|
||||
description: |+
|
||||
LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple
|
||||
incompatible data link layers have been used over time to transmit image data
|
||||
to LVDS devices. This bindings supports devices compatible with the following
|
||||
specifications.
|
||||
|
||||
[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February
|
||||
1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA)
|
||||
[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National
|
||||
Semiconductor
|
||||
[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video
|
||||
Electronics Standards Association (VESA)
|
||||
|
||||
Device compatible with those specifications have been marketed under the
|
||||
FPD-Link and FlatLink brands.
|
||||
description:
|
||||
This binding extends the data mapping defined in lvds-data-mapping.yaml.
|
||||
It supports reversing the bit order on the formats defined there in order
|
||||
to accomodate for even more specialized data formats, since a variety of
|
||||
data formats and layouts is used to drive LVDS displays.
|
||||
|
||||
properties:
|
||||
data-mapping:
|
||||
enum:
|
||||
- jeida-18
|
||||
- jeida-24
|
||||
- vesa-24
|
||||
description: |
|
||||
The color signals mapping order.
|
||||
|
||||
LVDS data mappings are defined as follows.
|
||||
|
||||
- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and
|
||||
[VESA] specifications. Data are transferred as follows on 3 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
|
||||
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
|
||||
|
||||
- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI]
|
||||
specifications. Data are transferred as follows on 4 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__><
|
||||
DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__><
|
||||
DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__><
|
||||
|
||||
- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification.
|
||||
Data are transferred as follows on 4 LVDS lanes.
|
||||
|
||||
Slot 0 1 2 3 4 5 6
|
||||
________________ _________________
|
||||
Clock \_______________________/
|
||||
______ ______ ______ ______ ______ ______ ______
|
||||
DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__><
|
||||
DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__><
|
||||
DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__><
|
||||
DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__><
|
||||
|
||||
Control signals are mapped as follows.
|
||||
|
||||
CTL0: HSync
|
||||
CTL1: VSync
|
||||
CTL2: Data Enable
|
||||
CTL3: 0
|
||||
|
||||
data-mirror:
|
||||
type: boolean
|
||||
description:
|
||||
If set, reverse the bit order described in the data mappings below on all
|
||||
If set, reverse the bit order described in the data mappings on all
|
||||
data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6.
|
||||
|
||||
additionalProperties: true
|
||||
|
@ -21,6 +21,8 @@ description: |
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- mediatek,mt8188-dp-tx
|
||||
- mediatek,mt8188-edp-tx
|
||||
- mediatek,mt8195-dp-tx
|
||||
- mediatek,mt8195-edp-tx
|
||||
|
||||
|
@ -30,6 +30,7 @@ properties:
|
||||
- mediatek,mt8173-dsi
|
||||
- mediatek,mt8183-dsi
|
||||
- mediatek,mt8186-dsi
|
||||
- mediatek,mt8188-dsi
|
||||
- items:
|
||||
- enum:
|
||||
- mediatek,mt6795-dsi
|
||||
|
@ -114,6 +114,7 @@ properties:
|
||||
|
||||
port@1:
|
||||
$ref: /schemas/graph.yaml#/$defs/port-base
|
||||
unevaluatedProperties: false
|
||||
description: Output endpoint of the controller
|
||||
properties:
|
||||
endpoint:
|
||||
|
@ -21,7 +21,7 @@ properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
- items:
|
||||
- pattern: '^qcom,adreno-gmu-6[0-9][0-9]\.[0-9]$'
|
||||
- pattern: '^qcom,adreno-gmu-[67][0-9][0-9]\.[0-9]$'
|
||||
- const: qcom,adreno-gmu
|
||||
- const: qcom,adreno-gmu-wrapper
|
||||
|
||||
@ -64,6 +64,10 @@ properties:
|
||||
iommus:
|
||||
maxItems: 1
|
||||
|
||||
qcom,qmp:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description: Reference to the AOSS side-channel message RAM
|
||||
|
||||
operating-points-v2: true
|
||||
|
||||
opp-table:
|
||||
@ -213,6 +217,47 @@ allOf:
|
||||
- const: axi
|
||||
- const: memnoc
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,adreno-gmu-730.1
|
||||
- qcom,adreno-gmu-740.1
|
||||
then:
|
||||
properties:
|
||||
reg:
|
||||
items:
|
||||
- description: Core GMU registers
|
||||
- description: Resource controller registers
|
||||
- description: GMU PDC registers
|
||||
reg-names:
|
||||
items:
|
||||
- const: gmu
|
||||
- const: rscc
|
||||
- const: gmu_pdc
|
||||
clocks:
|
||||
items:
|
||||
- description: GPU AHB clock
|
||||
- description: GMU clock
|
||||
- description: GPU CX clock
|
||||
- description: GPU AXI clock
|
||||
- description: GPU MEMNOC clock
|
||||
- description: GMU HUB clock
|
||||
- description: GPUSS DEMET clock
|
||||
clock-names:
|
||||
items:
|
||||
- const: ahb
|
||||
- const: gmu
|
||||
- const: cxo
|
||||
- const: axi
|
||||
- const: memnoc
|
||||
- const: hub
|
||||
- const: demet
|
||||
|
||||
required:
|
||||
- qcom,qmp
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
|
@ -23,7 +23,7 @@ properties:
|
||||
The driver is parsing the compat string for Adreno to
|
||||
figure out the gpu-id and patch level.
|
||||
items:
|
||||
- pattern: '^qcom,adreno-[3-6][0-9][0-9]\.[0-9]$'
|
||||
- pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]$'
|
||||
- const: qcom,adreno
|
||||
- description: |
|
||||
The driver is parsing the compat string for Imageon to
|
||||
@ -203,7 +203,7 @@ allOf:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
pattern: '^qcom,adreno-6[0-9][0-9]\.[0-9]$'
|
||||
pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]$'
|
||||
|
||||
then: # Starting with A6xx, the clocks are usually defined in the GMU node
|
||||
properties:
|
||||
|
@ -38,12 +38,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,msm8998-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -52,6 +56,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-10nm-8998
|
||||
|
@ -44,18 +44,24 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,qcm2290-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-ctrl-6g-qcm2290
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-14nm-2290
|
||||
|
@ -44,18 +44,24 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc7180-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc7180-dp
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -64,6 +70,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-10nm
|
||||
|
@ -44,18 +44,24 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc7280-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc7280-dp
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -64,12 +70,16 @@ patternProperties:
|
||||
|
||||
"^edp@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc7280-edp
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
|
@ -34,12 +34,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sc8280xp-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
|
@ -42,18 +42,24 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sdm845-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sdm845-dp
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -62,6 +68,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-10nm
|
||||
|
@ -32,12 +32,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6115-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
@ -50,6 +54,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-14nm-2290
|
||||
|
@ -43,12 +43,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6125-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -57,6 +61,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6125-dsi-phy-14nm
|
||||
|
@ -43,12 +43,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6350-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -57,6 +61,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-10nm
|
||||
|
@ -43,12 +43,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6375-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -57,6 +61,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6375-dsi-phy-7nm
|
||||
|
@ -47,12 +47,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8150-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -61,6 +65,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-7nm
|
||||
|
@ -46,12 +46,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8250-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -60,6 +64,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,dsi-phy-7nm
|
||||
|
@ -48,18 +48,24 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8350-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8350-dp
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -68,6 +74,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8350-dsi-phy-5nm
|
||||
|
@ -38,12 +38,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8450-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -52,6 +56,8 @@ patternProperties:
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -60,6 +66,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8450-dsi-phy-5nm
|
||||
|
@ -38,12 +38,16 @@ properties:
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8550-dpu
|
||||
|
||||
"^displayport-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -52,6 +56,8 @@ patternProperties:
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
@ -60,6 +66,8 @@ patternProperties:
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm8550-dsi-phy-4nm
|
||||
|
@ -0,0 +1,94 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/panel/jdi,lpm102a188a.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: JDI LPM102A188A 2560x1800 10.2" DSI Panel
|
||||
|
||||
maintainers:
|
||||
- Diogo Ivo <diogo.ivo@tecnico.ulisboa.pt>
|
||||
|
||||
description: |
|
||||
This panel requires a dual-channel DSI host to operate. It supports two modes:
|
||||
- left-right: each channel drives the left or right half of the screen
|
||||
- even-odd: each channel drives the even or odd lines of the screen
|
||||
|
||||
Each of the DSI channels controls a separate DSI peripheral. The peripheral
|
||||
driven by the first link (DSI-LINK1) is considered the primary peripheral
|
||||
and controls the device. The 'link2' property contains a phandle to the
|
||||
peripheral driven by the second link (DSI-LINK2).
|
||||
|
||||
allOf:
|
||||
- $ref: panel-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: jdi,lpm102a188a
|
||||
|
||||
reg: true
|
||||
enable-gpios: true
|
||||
reset-gpios: true
|
||||
power-supply: true
|
||||
backlight: true
|
||||
|
||||
ddi-supply:
|
||||
description: The regulator that provides IOVCC (1.8V).
|
||||
|
||||
link2:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description: |
|
||||
phandle to the DSI peripheral on the secondary link. Note that the
|
||||
presence of this property marks the containing node as DSI-LINK1.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
if:
|
||||
required:
|
||||
- link2
|
||||
then:
|
||||
required:
|
||||
- power-supply
|
||||
- ddi-supply
|
||||
- enable-gpios
|
||||
- reset-gpios
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
#include <dt-bindings/gpio/tegra-gpio.h>
|
||||
|
||||
dsia: dsi@54300000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
reg = <0x0 0x54300000 0x0 0x00040000>;
|
||||
|
||||
link2: panel@0 {
|
||||
compatible = "jdi,lpm102a188a";
|
||||
reg = <0>;
|
||||
};
|
||||
};
|
||||
|
||||
dsib: dsi@54400000{
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
reg = <0x0 0x54400000 0x0 0x00040000>;
|
||||
nvidia,ganged-mode = <&dsia>;
|
||||
|
||||
link1: panel@0 {
|
||||
compatible = "jdi,lpm102a188a";
|
||||
reg = <0>;
|
||||
power-supply = <&pplcd_vdd>;
|
||||
ddi-supply = <&pp1800_lcdio>;
|
||||
enable-gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>;
|
||||
reset-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
|
||||
link2 = <&link2>;
|
||||
backlight = <&backlight>;
|
||||
};
|
||||
};
|
||||
|
||||
...
|
@ -17,6 +17,7 @@ properties:
|
||||
enum:
|
||||
- leadtek,ltk050h3146w
|
||||
- leadtek,ltk050h3146w-a2
|
||||
- leadtek,ltk050h3148w
|
||||
reg: true
|
||||
backlight: true
|
||||
reset-gpios: true
|
||||
|
@ -7,9 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: NewVision NV3051D based LCD panel
|
||||
|
||||
description: |
|
||||
The NewVision NV3051D is a driver chip used to drive DSI panels. For now,
|
||||
this driver only supports the 640x480 panels found in the Anbernic RG353
|
||||
based devices.
|
||||
The NewVision NV3051D is a driver chip used to drive DSI panels.
|
||||
|
||||
maintainers:
|
||||
- Chris Morgan <macromorgan@hotmail.com>
|
||||
@ -21,6 +19,7 @@ properties:
|
||||
compatible:
|
||||
items:
|
||||
- enum:
|
||||
- anbernic,rg351v-panel
|
||||
- anbernic,rg353p-panel
|
||||
- anbernic,rg353v-panel
|
||||
- const: newvision,nv3051d
|
||||
|
@ -21,9 +21,9 @@ description: |
|
||||
|
||||
allOf:
|
||||
- $ref: panel-common.yaml#
|
||||
- $ref: ../lvds-data-mapping.yaml#
|
||||
|
||||
properties:
|
||||
|
||||
compatible:
|
||||
enum:
|
||||
# compatible must be listed in alphabetical order, ordered by compatible.
|
||||
@ -230,6 +230,8 @@ properties:
|
||||
- logictechno,lttd800480070-l6wh-rt
|
||||
# Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel
|
||||
- mitsubishi,aa070mc01-ca1
|
||||
# Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
|
||||
- mitsubishi,aa084xe01
|
||||
# Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
|
||||
- multi-inno,mi0700s4t-6
|
||||
# Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
|
||||
@ -347,6 +349,17 @@ properties:
|
||||
power-supply: true
|
||||
no-hpd: true
|
||||
hpd-gpios: true
|
||||
data-mapping: true
|
||||
|
||||
if:
|
||||
not:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: innolux,g101ice-l01
|
||||
then:
|
||||
properties:
|
||||
data-mapping: false
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
@ -366,3 +379,16 @@ examples:
|
||||
};
|
||||
};
|
||||
};
|
||||
- |
|
||||
panel_lvds: panel-lvds {
|
||||
compatible = "innolux,g101ice-l01";
|
||||
power-supply = <&vcc_lcd_reg>;
|
||||
|
||||
data-mapping = "jeida-24";
|
||||
|
||||
port {
|
||||
panel_in_lvds: endpoint {
|
||||
remote-endpoint = <<dc_out_lvds>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -0,0 +1,73 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/panel/raydium,rm692e5.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Raydium RM692E5 based DSI display panels
|
||||
|
||||
maintainers:
|
||||
- Konrad Dybcio <konradybcio@kernel.org>
|
||||
|
||||
description:
|
||||
The Raydium RM692E5 is a generic DSI Panel IC used to control
|
||||
AMOLED panels.
|
||||
|
||||
allOf:
|
||||
- $ref: panel-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: fairphone,fp5-rm692e5-boe
|
||||
- const: raydium,rm692e5
|
||||
|
||||
dvdd-supply:
|
||||
description: Digital voltage rail
|
||||
|
||||
vci-supply:
|
||||
description: Analog voltage rail
|
||||
|
||||
vddio-supply:
|
||||
description: I/O voltage rail
|
||||
|
||||
reg: true
|
||||
port: true
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- reset-gpios
|
||||
- dvdd-supply
|
||||
- vci-supply
|
||||
- vddio-supply
|
||||
- port
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
|
||||
dsi {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
panel@0 {
|
||||
compatible = "fairphone,fp5-rm692e5-boe", "raydium,rm692e5";
|
||||
reg = <0>;
|
||||
|
||||
reset-gpios = <&tlmm 44 GPIO_ACTIVE_LOW>;
|
||||
dvdd-supply = <&vreg_oled_vci>;
|
||||
vci-supply = <&vreg_l12c>;
|
||||
vddio-supply = <&vreg_oled_dvdd>;
|
||||
|
||||
port {
|
||||
panel_in_0: endpoint {
|
||||
remote-endpoint = <&dsi0_out>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
...
|
@ -22,6 +22,8 @@ properties:
|
||||
enum:
|
||||
# Anberic RG353V-V2 5.0" 640x480 TFT LCD panel
|
||||
- anbernic,rg353v-panel-v2
|
||||
# Powkiddy RGB30 3.0" 720x720 TFT LCD panel
|
||||
- powkiddy,rgb30-panel
|
||||
# Rocktech JH057N00900 5.5" 720x1440 TFT LCD panel
|
||||
- rocktech,jh057n00900
|
||||
# Xingbangda XBD599 5.99" 720x1440 TFT LCD panel
|
||||
|
@ -18,6 +18,7 @@ properties:
|
||||
- rockchip,rk3288-mipi-dsi
|
||||
- rockchip,rk3399-mipi-dsi
|
||||
- rockchip,rk3568-mipi-dsi
|
||||
- rockchip,rv1126-mipi-dsi
|
||||
- const: snps,dw-mipi-dsi
|
||||
|
||||
interrupts:
|
||||
@ -77,6 +78,7 @@ allOf:
|
||||
enum:
|
||||
- rockchip,px30-mipi-dsi
|
||||
- rockchip,rk3568-mipi-dsi
|
||||
- rockchip,rv1126-mipi-dsi
|
||||
|
||||
then:
|
||||
properties:
|
||||
|
@ -31,6 +31,7 @@ properties:
|
||||
- rockchip,rk3368-vop
|
||||
- rockchip,rk3399-vop-big
|
||||
- rockchip,rk3399-vop-lit
|
||||
- rockchip,rv1126-vop
|
||||
|
||||
reg:
|
||||
minItems: 1
|
||||
|
@ -0,0 +1,42 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/solomon,ssd-common.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Common properties for Solomon OLED Display Controllers
|
||||
|
||||
maintainers:
|
||||
- Javier Martinez Canillas <javierm@redhat.com>
|
||||
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
reset-gpios:
|
||||
maxItems: 1
|
||||
|
||||
# Only required for SPI
|
||||
dc-gpios:
|
||||
description:
|
||||
GPIO connected to the controller's D/C# (Data/Command) pin,
|
||||
that is needed for 4-wire SPI to tell the controller if the
|
||||
data sent is for a command register or the display data RAM
|
||||
maxItems: 1
|
||||
|
||||
solomon,height:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description:
|
||||
Height in pixel of the screen driven by the controller.
|
||||
The default value is controller-dependent.
|
||||
|
||||
solomon,width:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description:
|
||||
Width in pixel of the screen driven by the controller.
|
||||
The default value is controller-dependent.
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/spi/spi-peripheral-props.yaml#
|
||||
|
||||
additionalProperties: true
|
@ -27,38 +27,12 @@ properties:
|
||||
- solomon,ssd1307
|
||||
- solomon,ssd1309
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
pwms:
|
||||
maxItems: 1
|
||||
|
||||
reset-gpios:
|
||||
maxItems: 1
|
||||
|
||||
# Only required for SPI
|
||||
dc-gpios:
|
||||
description:
|
||||
GPIO connected to the controller's D/C# (Data/Command) pin,
|
||||
that is needed for 4-wire SPI to tell the controller if the
|
||||
data sent is for a command register or the display data RAM
|
||||
maxItems: 1
|
||||
|
||||
vbat-supply:
|
||||
description: The supply for VBAT
|
||||
|
||||
solomon,height:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description:
|
||||
Height in pixel of the screen driven by the controller.
|
||||
The default value is controller-dependent.
|
||||
|
||||
solomon,width:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description:
|
||||
Width in pixel of the screen driven by the controller.
|
||||
The default value is controller-dependent.
|
||||
|
||||
solomon,page-offset:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
default: 1
|
||||
@ -148,7 +122,7 @@ required:
|
||||
- reg
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/spi/spi-peripheral-props.yaml#
|
||||
- $ref: solomon,ssd-common.yaml#
|
||||
|
||||
- if:
|
||||
properties:
|
||||
|
@ -0,0 +1,89 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/solomon,ssd132x.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Solomon SSD132x OLED Display Controllers
|
||||
|
||||
maintainers:
|
||||
- Javier Martinez Canillas <javierm@redhat.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
- enum:
|
||||
- solomon,ssd1322
|
||||
- solomon,ssd1325
|
||||
- solomon,ssd1327
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
allOf:
|
||||
- $ref: solomon,ssd-common.yaml#
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: solomon,ssd1322
|
||||
then:
|
||||
properties:
|
||||
width:
|
||||
default: 480
|
||||
height:
|
||||
default: 128
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: solomon,ssd1325
|
||||
then:
|
||||
properties:
|
||||
width:
|
||||
default: 128
|
||||
height:
|
||||
default: 80
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: solomon,ssd1327
|
||||
then:
|
||||
properties:
|
||||
width:
|
||||
default: 128
|
||||
height:
|
||||
default: 128
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
i2c {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
oled@3c {
|
||||
compatible = "solomon,ssd1327";
|
||||
reg = <0x3c>;
|
||||
reset-gpios = <&gpio2 7>;
|
||||
};
|
||||
|
||||
};
|
||||
- |
|
||||
spi {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
oled@0 {
|
||||
compatible = "solomon,ssd1327";
|
||||
reg = <0x0>;
|
||||
reset-gpios = <&gpio2 7>;
|
||||
dc-gpios = <&gpio2 8>;
|
||||
spi-max-frequency = <10000000>;
|
||||
};
|
||||
};
|
@ -1085,6 +1085,8 @@ patternProperties:
|
||||
description: Powertip Tech. Corp.
|
||||
"^powervr,.*":
|
||||
description: PowerVR (deprecated, use img)
|
||||
"^powkiddy,.*":
|
||||
description: Powkiddy
|
||||
"^primux,.*":
|
||||
description: Primux Trading, S.L.
|
||||
"^probox2,.*":
|
||||
|
@ -5,14 +5,30 @@ The dma-buf subsystem provides the framework for sharing buffers for
|
||||
hardware (DMA) access across multiple device drivers and subsystems, and
|
||||
for synchronizing asynchronous hardware access.
|
||||
|
||||
This is used, for example, by drm "prime" multi-GPU support, but is of
|
||||
course not limited to GPU use cases.
|
||||
As an example, it is used extensively by the DRM subsystem to exchange
|
||||
buffers between processes, contexts, library APIs within the same
|
||||
process, and also to exchange buffers with other subsystems such as
|
||||
V4L2.
|
||||
|
||||
This document describes the way in which kernel subsystems can use and
|
||||
interact with the three main primitives offered by dma-buf:
|
||||
|
||||
- dma-buf, representing a sg_table and exposed to userspace as a file
|
||||
descriptor to allow passing between processes, subsystems, devices,
|
||||
etc;
|
||||
- dma-fence, providing a mechanism to signal when an asynchronous
|
||||
hardware operation has completed; and
|
||||
- dma-resv, which manages a set of dma-fences for a particular dma-buf
|
||||
allowing implicit (kernel-ordered) synchronization of work to
|
||||
preserve the illusion of coherent access
|
||||
|
||||
|
||||
Userspace API principles and use
|
||||
--------------------------------
|
||||
|
||||
For more details on how to design your subsystem's API for dma-buf use, please
|
||||
see Documentation/userspace-api/dma-buf-alloc-exchange.rst.
|
||||
|
||||
The three main components of this are: (1) dma-buf, representing a
|
||||
sg_table and exposed to userspace as a file descriptor to allow passing
|
||||
between devices, (2) fence, which provides a mechanism to signal when
|
||||
one device has finished access, and (3) reservation, which manages the
|
||||
shared or exclusive fence(s) associated with the buffer.
|
||||
|
||||
Shared DMA Buffers
|
||||
------------------
|
||||
|
@ -26,12 +26,30 @@ serial_number
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
|
||||
:doc: serial_number
|
||||
|
||||
fru_id
|
||||
-------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
|
||||
:doc: fru_id
|
||||
|
||||
manufacturer
|
||||
-------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
|
||||
:doc: manufacturer
|
||||
|
||||
unique_id
|
||||
---------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: unique_id
|
||||
|
||||
board_info
|
||||
----------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
|
||||
:doc: board_info
|
||||
|
||||
Accelerated Processing Units (APU) Info
|
||||
---------------------------------------
|
||||
|
||||
|
@ -64,6 +64,36 @@ gpu_metrics
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: gpu_metrics
|
||||
|
||||
fan_curve
|
||||
---------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: fan_curve
|
||||
|
||||
acoustic_limit_rpm_threshold
|
||||
----------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: acoustic_limit_rpm_threshold
|
||||
|
||||
acoustic_target_rpm_threshold
|
||||
-----------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: acoustic_target_rpm_threshold
|
||||
|
||||
fan_target_temperature
|
||||
----------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: fan_target_temperature
|
||||
|
||||
fan_minimum_pwm
|
||||
---------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
:doc: fan_minimum_pwm
|
||||
|
||||
GFXOFF
|
||||
======
|
||||
|
||||
|
@ -67,6 +67,19 @@ Lists the tests that for a given driver on a specific hardware revision are
|
||||
known to behave unreliably. These tests won't cause a job to fail regardless of
|
||||
the result. They will still be run.
|
||||
|
||||
Each new flake entry must be associated with a link to the email reporting the
|
||||
bug to the author of the affected driver, the board name or Device Tree name of
|
||||
the board, the first kernel version affected, and an approximation of the
|
||||
failure rate.
|
||||
|
||||
They should be provided under the following format::
|
||||
|
||||
# Bug Report: $LORE_OR_PATCHWORK_URL
|
||||
# Board Name: broken-board.dtb
|
||||
# Version: 6.6-rc1
|
||||
# Failure Rate: 100
|
||||
flaky-test
|
||||
|
||||
drivers/gpu/drm/ci/${DRIVER_NAME}-${HW_REVISION}-skips.txt
|
||||
-----------------------------------------------------------
|
||||
|
||||
@ -86,10 +99,13 @@ https://gitlab.freedesktop.org/janedoe/linux/-/settings/ci_cd), change the
|
||||
CI/CD configuration file from .gitlab-ci.yml to
|
||||
drivers/gpu/drm/ci/gitlab-ci.yml.
|
||||
|
||||
3. Next time you push to this repository, you will see a CI pipeline being
|
||||
3. Request to be added to the drm/ci-ok group so that your user has the
|
||||
necessary privileges to run the CI on https://gitlab.freedesktop.org/drm/ci-ok
|
||||
|
||||
4. Next time you push to this repository, you will see a CI pipeline being
|
||||
created (eg. https://gitlab.freedesktop.org/janedoe/linux/-/pipelines)
|
||||
|
||||
4. The various jobs will be run and when the pipeline is finished, all jobs
|
||||
5. The various jobs will be run and when the pipeline is finished, all jobs
|
||||
should be green unless a regression has been found.
|
||||
|
||||
|
||||
|
@ -18,6 +18,7 @@ GPU Driver Documentation
|
||||
xen-front
|
||||
afbc
|
||||
komeda-kms
|
||||
panfrost
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
@ -360,6 +360,8 @@ Format Functions Reference
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_fourcc.c
|
||||
:export:
|
||||
|
||||
.. _kms_dumb_buffer_objects:
|
||||
|
||||
Dumb Buffer Objects
|
||||
===================
|
||||
|
||||
|
@ -466,40 +466,40 @@ DRM MM Range Allocator Function References
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_mm.c
|
||||
:export:
|
||||
|
||||
DRM GPU VA Manager
|
||||
==================
|
||||
DRM GPUVM
|
||||
=========
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
|
||||
:doc: Overview
|
||||
|
||||
Split and Merge
|
||||
---------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
|
||||
:doc: Split and Merge
|
||||
|
||||
Locking
|
||||
-------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
|
||||
:doc: Locking
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
|
||||
:doc: Examples
|
||||
|
||||
DRM GPU VA Manager Function References
|
||||
--------------------------------------
|
||||
DRM GPUVM Function References
|
||||
-----------------------------
|
||||
|
||||
.. kernel-doc:: include/drm/drm_gpuva_mgr.h
|
||||
.. kernel-doc:: include/drm/drm_gpuvm.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
|
||||
:export:
|
||||
|
||||
DRM Buddy Allocator
|
||||
|
@ -285,6 +285,83 @@ for GPU1 and GPU2 from different vendors, and a third handler for
|
||||
mmapped regular files. Threads cause additional pain with signal
|
||||
handling as well.
|
||||
|
||||
Device reset
|
||||
============
|
||||
|
||||
The GPU stack is really complex and is prone to errors, from hardware bugs,
|
||||
faulty applications and everything in between the many layers. Some errors
|
||||
require resetting the device in order to make the device usable again. This
|
||||
section describes the expectations for DRM and usermode drivers when a
|
||||
device resets and how to propagate the reset status.
|
||||
|
||||
Device resets can not be disabled without tainting the kernel, which can lead to
|
||||
hanging the entire kernel through shrinkers/mmu_notifiers. Userspace role in
|
||||
device resets is to propagate the message to the application and apply any
|
||||
special policy for blocking guilty applications, if any. Corollary is that
|
||||
debugging a hung GPU context require hardware support to be able to preempt such
|
||||
a GPU context while it's stopped.
|
||||
|
||||
Kernel Mode Driver
|
||||
------------------
|
||||
|
||||
The KMD is responsible for checking if the device needs a reset, and to perform
|
||||
it as needed. Usually a hang is detected when a job gets stuck executing. KMD
|
||||
should keep track of resets, because userspace can query any time about the
|
||||
reset status for a specific context. This is needed to propagate to the rest of
|
||||
the stack that a reset has happened. Currently, this is implemented by each
|
||||
driver separately, with no common DRM interface. Ideally this should be properly
|
||||
integrated at DRM scheduler to provide a common ground for all drivers. After a
|
||||
reset, KMD should reject new command submissions for affected contexts.
|
||||
|
||||
User Mode Driver
|
||||
----------------
|
||||
|
||||
After command submission, UMD should check if the submission was accepted or
|
||||
rejected. After a reset, KMD should reject submissions, and UMD can issue an
|
||||
ioctl to the KMD to check the reset status, and this can be checked more often
|
||||
if the UMD requires it. After detecting a reset, UMD will then proceed to report
|
||||
it to the application using the appropriate API error code, as explained in the
|
||||
section below about robustness.
|
||||
|
||||
Robustness
|
||||
----------
|
||||
|
||||
The only way to try to keep a graphical API context working after a reset is if
|
||||
it complies with the robustness aspects of the graphical API that it is using.
|
||||
|
||||
Graphical APIs provide ways to applications to deal with device resets. However,
|
||||
there is no guarantee that the app will use such features correctly, and a
|
||||
userspace that doesn't support robust interfaces (like a non-robust
|
||||
OpenGL context or API without any robustness support like libva) leave the
|
||||
robustness handling entirely to the userspace driver. There is no strong
|
||||
community consensus on what the userspace driver should do in that case,
|
||||
since all reasonable approaches have some clear downsides.
|
||||
|
||||
OpenGL
|
||||
~~~~~~
|
||||
|
||||
Apps using OpenGL should use the available robust interfaces, like the
|
||||
extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). This
|
||||
interface tells if a reset has happened, and if so, all the context state is
|
||||
considered lost and the app proceeds by creating new ones. There's no consensus
|
||||
on what to do to if robustness is not in use.
|
||||
|
||||
Vulkan
|
||||
~~~~~~
|
||||
|
||||
Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions.
|
||||
This error code means, among other things, that a device reset has happened and
|
||||
it needs to recreate the contexts to keep going.
|
||||
|
||||
Reporting causes of resets
|
||||
--------------------------
|
||||
|
||||
Apart from propagating the reset through the stack so apps can recover, it's
|
||||
really useful for driver developers to learn more about what caused the reset in
|
||||
the first place. DRM devices should make use of devcoredump to store relevant
|
||||
information about the reset, so this information can be added to user bug
|
||||
reports.
|
||||
|
||||
.. _drm_driver_ioctl:
|
||||
|
||||
IOCTL Support on Device Nodes
|
||||
@ -450,12 +527,12 @@ VBlank event handling
|
||||
|
||||
The DRM core exposes two vertical blank related ioctls:
|
||||
|
||||
DRM_IOCTL_WAIT_VBLANK
|
||||
:c:macro:`DRM_IOCTL_WAIT_VBLANK`
|
||||
This takes a struct drm_wait_vblank structure as its argument, and
|
||||
it is used to block or request a signal when a specified vblank
|
||||
event occurs.
|
||||
|
||||
DRM_IOCTL_MODESET_CTL
|
||||
:c:macro:`DRM_IOCTL_MODESET_CTL`
|
||||
This was only used for user-mode-settind drivers around modesetting
|
||||
changes to allow the kernel to update the vblank interrupt after
|
||||
mode setting, since on many devices the vertical blank counter is
|
||||
@ -478,11 +555,18 @@ The index is used in cases where a densely packed identifier for a CRTC is
|
||||
needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct
|
||||
drm_mode_get_plane is an example.
|
||||
|
||||
DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's,
|
||||
and the CRTC index is its position in this array.
|
||||
:c:macro:`DRM_IOCTL_MODE_GETRESOURCES` populates a structure with an array of
|
||||
CRTC ID's, and the CRTC index is its position in this array.
|
||||
|
||||
.. kernel-doc:: include/uapi/drm/drm.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: include/uapi/drm/drm_mode.h
|
||||
:internal:
|
||||
|
||||
|
||||
dma-buf interoperability
|
||||
========================
|
||||
|
||||
Please see Documentation/userspace-api/dma-buf-alloc-exchange.rst for
|
||||
information on how dma-buf is integrated and exposed within DRM.
|
||||
|
@ -169,3 +169,4 @@ Driver specific implementations
|
||||
-------------------------------
|
||||
|
||||
:ref:`i915-usage-stats`
|
||||
:ref:`panfrost-usage-stats`
|
||||
|
309
Documentation/gpu/drm-vm-bind-async.rst
Normal file
309
Documentation/gpu/drm-vm-bind-async.rst
Normal file
@ -0,0 +1,309 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
|
||||
|
||||
====================
|
||||
Asynchronous VM_BIND
|
||||
====================
|
||||
|
||||
Nomenclature:
|
||||
=============
|
||||
|
||||
* ``VRAM``: On-device memory. Sometimes referred to as device local memory.
|
||||
|
||||
* ``gpu_vm``: A virtual GPU address space. Typically per process, but
|
||||
can be shared by multiple processes.
|
||||
|
||||
* ``VM_BIND``: An operation or a list of operations to modify a gpu_vm using
|
||||
an IOCTL. The operations include mapping and unmapping system- or
|
||||
VRAM memory.
|
||||
|
||||
* ``syncobj``: A container that abstracts synchronization objects. The
|
||||
synchronization objects can be either generic, like dma-fences or
|
||||
driver specific. A syncobj typically indicates the type of the
|
||||
underlying synchronization object.
|
||||
|
||||
* ``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
|
||||
for these before starting.
|
||||
|
||||
* ``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation
|
||||
signals these when the bind operation is complete.
|
||||
|
||||
* ``dma-fence``: A cross-driver synchronization object. A basic
|
||||
understanding of dma-fences is required to digest this
|
||||
document. Please refer to the ``DMA Fences`` section of the
|
||||
:doc:`dma-buf doc </driver-api/dma-buf>`.
|
||||
|
||||
* ``memory fence``: A synchronization object, different from a dma-fence.
|
||||
A memory fence uses the value of a specified memory location to determine
|
||||
signaled status. A memory fence can be awaited and signaled by both
|
||||
the GPU and CPU. Memory fences are sometimes referred to as
|
||||
user-fences, userspace-fences or gpu futexes and do not necessarily obey
|
||||
the dma-fence rule of signaling within a "reasonable amount of time".
|
||||
The kernel should thus avoid waiting for memory fences with locks held.
|
||||
|
||||
* ``long-running workload``: A workload that may take more than the
|
||||
current stipulated dma-fence maximum signal delay to complete and
|
||||
which therefore needs to set the gpu_vm or the GPU execution context in
|
||||
a certain mode that disallows completion dma-fences.
|
||||
|
||||
* ``exec function``: An exec function is a function that revalidates all
|
||||
affected gpu_vmas, submits a GPU command batch and registers the
|
||||
dma_fence representing the GPU command's activity with all affected
|
||||
dma_resvs. For completeness, although not covered by this document,
|
||||
it's worth mentioning that an exec function may also be the
|
||||
revalidation worker that is used by some drivers in compute /
|
||||
long-running mode.
|
||||
|
||||
* ``bind context``: A context identifier used for the VM_BIND
|
||||
operation. VM_BIND operations that use the same bind context can be
|
||||
assumed, where it matters, to complete in order of submission. No such
|
||||
assumptions can be made for VM_BIND operations using separate bind contexts.
|
||||
|
||||
* ``UMD``: User-mode driver.
|
||||
|
||||
* ``KMD``: Kernel-mode driver.
|
||||
|
||||
|
||||
Synchronous / Asynchronous VM_BIND operation
|
||||
============================================
|
||||
|
||||
Synchronous VM_BIND
|
||||
___________________
|
||||
With Synchronous VM_BIND, the VM_BIND operations all complete before the
|
||||
IOCTL returns. A synchronous VM_BIND takes neither in-fences nor
|
||||
out-fences. Synchronous VM_BIND may block and wait for GPU operations;
|
||||
for example swap-in or clearing, or even previous binds.
|
||||
|
||||
Asynchronous VM_BIND
|
||||
____________________
|
||||
Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the
|
||||
IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs
|
||||
before modifying the GPU page-tables, and signal the out-syncobjs when
|
||||
the modification is done in the sense that the next exec function that
|
||||
awaits for the out-syncobjs will see the change. Errors are reported
|
||||
synchronously.
|
||||
In low-memory situations the implementation may block, performing the
|
||||
VM_BIND synchronously, because there might not be enough memory
|
||||
immediately available for preparing the asynchronous operation.
|
||||
|
||||
If the VM_BIND IOCTL takes a list or an array of operations as an argument,
|
||||
the in-syncobjs needs to signal before the first operation starts to
|
||||
execute, and the out-syncobjs signal after the last operation
|
||||
completes. Operations in the operation list can be assumed, where it
|
||||
matters, to complete in order.
|
||||
|
||||
Since asynchronous VM_BIND operations may use dma-fences embedded in
|
||||
out-syncobjs and internally in KMD to signal bind completion, any
|
||||
memory fences given as VM_BIND in-fences need to be awaited
|
||||
synchronously before the VM_BIND ioctl returns, since dma-fences,
|
||||
required to signal in a reasonable amount of time, can never be made
|
||||
to depend on memory fences that don't have such a restriction.
|
||||
|
||||
The purpose of an Asynchronous VM_BIND operation is for user-mode
|
||||
drivers to be able to pipeline interleaved gpu_vm modifications and
|
||||
exec functions. For long-running workloads, such pipelining of a bind
|
||||
operation is not allowed and any in-fences need to be awaited
|
||||
synchronously. The reason for this is twofold. First, any memory
|
||||
fences gated by a long-running workload and used as in-syncobjs for the
|
||||
VM_BIND operation will need to be awaited synchronously anyway (see
|
||||
above). Second, any dma-fences used as in-syncobjs for VM_BIND
|
||||
operations for long-running workloads will not allow for pipelining
|
||||
anyway since long-running workloads don't allow for dma-fences as
|
||||
out-syncobjs, so while theoretically possible the use of them is
|
||||
questionable and should be rejected until there is a valuable use-case.
|
||||
Note that this is not a limitation imposed by dma-fence rules, but
|
||||
rather a limitation imposed to keep KMD implementation simple. It does
|
||||
not affect using dma-fences as dependencies for the long-running
|
||||
workload itself, which is allowed by dma-fence rules, but rather for
|
||||
the VM_BIND operation only.
|
||||
|
||||
An asynchronous VM_BIND operation may take substantial time to
|
||||
complete and signal the out_fence. In particular if the operation is
|
||||
deeply pipelined behind other VM_BIND operations and workloads
|
||||
submitted using exec functions. In that case, UMD might want to avoid a
|
||||
subsequent VM_BIND operation to be queued behind the first one if
|
||||
there are no explicit dependencies. In order to circumvent such a queue-up, a
|
||||
VM_BIND implementation may allow for VM_BIND contexts to be
|
||||
created. For each context, VM_BIND operations will be guaranteed to
|
||||
complete in the order they were submitted, but that is not the case
|
||||
for VM_BIND operations executing on separate VM_BIND contexts. Instead
|
||||
KMD will attempt to execute such VM_BIND operations in parallel but
|
||||
leaving no guarantee that they will actually be executed in
|
||||
parallel. There may be internal implicit dependencies that only KMD knows
|
||||
about, for example page-table structure changes. A way to attempt
|
||||
to avoid such internal dependencies is to have different VM_BIND
|
||||
contexts use separate regions of a VM.
|
||||
|
||||
Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically
|
||||
select memory fences as out-fences since that gives greater flexibility for
|
||||
the kernel mode driver to inject other operations into the bind /
|
||||
unbind operations. Like for example inserting breakpoints into batch
|
||||
buffers. The workload execution can then easily be pipelined behind
|
||||
the bind completion using the memory out-fence as the signal condition
|
||||
for a GPU semaphore embedded by UMD in the workload.
|
||||
|
||||
There is no difference in the operations supported or in
|
||||
multi-operation support between asynchronous VM_BIND and synchronous VM_BIND.
|
||||
|
||||
Multi-operation VM_BIND IOCTL error handling and interrupts
|
||||
===========================================================
|
||||
|
||||
The VM_BIND operations of the IOCTL may error for various reasons, for
|
||||
example due to lack of resources to complete and due to interrupted
|
||||
waits.
|
||||
In these situations UMD should preferably restart the IOCTL after
|
||||
taking suitable action.
|
||||
If UMD has over-committed a memory resource, an -ENOSPC error will be
|
||||
returned, and UMD may then unbind resources that are not used at the
|
||||
moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the
|
||||
IOCTL and on -ENOMEM user-space may either attempt to free known
|
||||
system memory resources or fail. In case of UMD deciding to fail a
|
||||
bind operation, due to an error return, no additional action is needed
|
||||
to clean up the failed operation, and the VM is left in the same state
|
||||
as it was before the failing IOCTL.
|
||||
Unbind operations are guaranteed not to return any errors due to
|
||||
resource constraints, but may return errors due to, for example,
|
||||
invalid arguments or the gpu_vm being banned.
|
||||
In the case an unexpected error happens during the asynchronous bind
|
||||
process, the gpu_vm will be banned, and attempts to use it after banning
|
||||
will return -ENOENT.
|
||||
|
||||
Example: The Xe VM_BIND uAPI
|
||||
============================
|
||||
|
||||
Starting with the VM_BIND operation struct, the IOCTL call can take
|
||||
zero, one or many such operations. A zero number means only the
|
||||
synchronization part of the IOCTL is carried out: an asynchronous
|
||||
VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the
|
||||
implicit dependencies to be fulfilled.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct drm_xe_vm_bind_op {
|
||||
/**
|
||||
* @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
|
||||
*/
|
||||
__u32 obj;
|
||||
|
||||
/** @pad: MBZ */
|
||||
__u32 pad;
|
||||
|
||||
union {
|
||||
/**
|
||||
* @obj_offset: Offset into the object for MAP.
|
||||
*/
|
||||
__u64 obj_offset;
|
||||
|
||||
/** @userptr: user virtual address for MAP_USERPTR */
|
||||
__u64 userptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
|
||||
*/
|
||||
__u64 range;
|
||||
|
||||
/** @addr: Address to operate on, MBZ for UNMAP_ALL */
|
||||
__u64 addr;
|
||||
|
||||
/**
|
||||
* @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
|
||||
* only applies to creating new VMAs
|
||||
*/
|
||||
__u64 tile_mask;
|
||||
|
||||
/* Map (parts of) an object into the GPU virtual address range.
|
||||
#define XE_VM_BIND_OP_MAP 0x0
|
||||
/* Unmap a GPU virtual address range */
|
||||
#define XE_VM_BIND_OP_UNMAP 0x1
|
||||
/*
|
||||
* Map a CPU virtual address range into a GPU virtual
|
||||
* address range.
|
||||
*/
|
||||
#define XE_VM_BIND_OP_MAP_USERPTR 0x2
|
||||
/* Unmap a gem object from the VM. */
|
||||
#define XE_VM_BIND_OP_UNMAP_ALL 0x3
|
||||
/*
|
||||
* Make the backing memory of an address range resident if
|
||||
* possible. Note that this doesn't pin backing memory.
|
||||
*/
|
||||
#define XE_VM_BIND_OP_PREFETCH 0x4
|
||||
|
||||
/* Make the GPU map readonly. */
|
||||
#define XE_VM_BIND_FLAG_READONLY (0x1 << 16)
|
||||
/*
|
||||
* Valid on a faulting VM only, do the MAP operation immediately rather
|
||||
* than deferring the MAP to the page fault handler.
|
||||
*/
|
||||
#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 17)
|
||||
/*
|
||||
* When the NULL flag is set, the page tables are setup with a special
|
||||
* bit which indicates writes are dropped and all reads return zero. In
|
||||
* the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
|
||||
* operations, the BO handle MBZ, and the BO offset MBZ. This flag is
|
||||
* intended to implement VK sparse bindings.
|
||||
*/
|
||||
#define XE_VM_BIND_FLAG_NULL (0x1 << 18)
|
||||
/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
|
||||
__u32 op;
|
||||
|
||||
/** @mem_region: Memory region to prefetch VMA to, instance not a mask */
|
||||
__u32 region;
|
||||
|
||||
/** @reserved: Reserved */
|
||||
__u64 reserved[2];
|
||||
};
|
||||
|
||||
|
||||
The VM_BIND IOCTL argument itself, looks like follows. Note that for
|
||||
synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here
|
||||
the ``exec_queue_id`` field is the VM_BIND context discussed previously
|
||||
that is used to facilitate out-of-order VM_BINDs.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct drm_xe_vm_bind {
|
||||
/** @extensions: Pointer to the first extension struct, if any */
|
||||
__u64 extensions;
|
||||
|
||||
/** @vm_id: The ID of the VM to bind to */
|
||||
__u32 vm_id;
|
||||
|
||||
/**
|
||||
* @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
|
||||
* and exec queue must have same vm_id. If zero, the default VM bind engine
|
||||
* is used.
|
||||
*/
|
||||
__u32 exec_queue_id;
|
||||
|
||||
/** @num_binds: number of binds in this IOCTL */
|
||||
__u32 num_binds;
|
||||
|
||||
/* If set, perform an async VM_BIND, if clear a sync VM_BIND */
|
||||
#define XE_VM_BIND_IOCTL_FLAG_ASYNC (0x1 << 0)
|
||||
|
||||
/** @flag: Flags controlling all operations in this ioctl. */
|
||||
__u32 flags;
|
||||
|
||||
union {
|
||||
/** @bind: used if num_binds == 1 */
|
||||
struct drm_xe_vm_bind_op bind;
|
||||
|
||||
/**
|
||||
* @vector_of_binds: userptr to array of struct
|
||||
* drm_xe_vm_bind_op if num_binds > 1
|
||||
*/
|
||||
__u64 vector_of_binds;
|
||||
};
|
||||
|
||||
/** @num_syncs: amount of syncs to wait for or to signal on completion. */
|
||||
__u32 num_syncs;
|
||||
|
||||
/** @pad2: MBZ */
|
||||
__u32 pad2;
|
||||
|
||||
/** @syncs: pointer to struct drm_xe_sync array */
|
||||
__u64 syncs;
|
||||
|
||||
/** @reserved: Reserved */
|
||||
__u64 reserved[2];
|
||||
};
|
@ -267,19 +267,22 @@ i915 driver.
|
||||
Intel GPU Basics
|
||||
----------------
|
||||
|
||||
An Intel GPU has multiple engines. There are several engine types.
|
||||
An Intel GPU has multiple engines. There are several engine types:
|
||||
|
||||
- RCS engine is for rendering 3D and performing compute, this is named
|
||||
`I915_EXEC_RENDER` in user space.
|
||||
- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
|
||||
space.
|
||||
- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
|
||||
in user space
|
||||
- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
|
||||
space.
|
||||
- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
|
||||
instead it is to be used by user space to specify a default rendering
|
||||
engine (for 3D) that may or may not be the same as RCS.
|
||||
- Render Command Streamer (RCS). An engine for rendering 3D and
|
||||
performing compute.
|
||||
- Blitting Command Streamer (BCS). An engine for performing blitting and/or
|
||||
copying operations.
|
||||
- Video Command Streamer. An engine used for video encoding and decoding. Also
|
||||
sometimes called 'BSD' in hardware documentation.
|
||||
- Video Enhancement Command Streamer (VECS). An engine for video enhancement.
|
||||
Also sometimes called 'VEBOX' in hardware documentation.
|
||||
- Compute Command Streamer (CCS). An engine that has access to the media and
|
||||
GPGPU pipelines, but not the 3D pipeline.
|
||||
- Graphics Security Controller (GSCCS). A dedicated engine for internal
|
||||
communication with GSC controller on security related tasks like
|
||||
High-bandwidth Digital Content Protection (HDCP), Protected Xe Path (PXP),
|
||||
and HuC firmware authentication.
|
||||
|
||||
The Intel GPU family is a family of integrated GPU's using Unified
|
||||
Memory Access. For having the GPU "do work", user space will feed the
|
||||
|
9
Documentation/gpu/implementation_guidelines.rst
Normal file
9
Documentation/gpu/implementation_guidelines.rst
Normal file
@ -0,0 +1,9 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
|
||||
|
||||
===========================================================
|
||||
Misc DRM driver uAPI- and feature implementation guidelines
|
||||
===========================================================
|
||||
|
||||
.. toctree::
|
||||
|
||||
drm-vm-bind-async
|
@ -18,6 +18,7 @@ GPU Driver Developer's Guide
|
||||
vga-switcheroo
|
||||
vgaarbiter
|
||||
automated_testing
|
||||
implementation_guidelines
|
||||
todo
|
||||
rfc/index
|
||||
|
||||
|
40
Documentation/gpu/panfrost.rst
Normal file
40
Documentation/gpu/panfrost.rst
Normal file
@ -0,0 +1,40 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
=========================
|
||||
drm/Panfrost Mali Driver
|
||||
=========================
|
||||
|
||||
.. _panfrost-usage-stats:
|
||||
|
||||
Panfrost DRM client usage stats implementation
|
||||
==============================================
|
||||
|
||||
The drm/Panfrost driver implements the DRM client usage stats specification as
|
||||
documented in :ref:`drm-client-usage-stats`.
|
||||
|
||||
Example of the output showing the implemented key value pairs and entirety of
|
||||
the currently possible format options:
|
||||
|
||||
::
|
||||
pos: 0
|
||||
flags: 02400002
|
||||
mnt_id: 27
|
||||
ino: 531
|
||||
drm-driver: panfrost
|
||||
drm-client-id: 14
|
||||
drm-engine-fragment: 1846584880 ns
|
||||
drm-cycles-fragment: 1424359409
|
||||
drm-maxfreq-fragment: 799999987 Hz
|
||||
drm-curfreq-fragment: 799999987 Hz
|
||||
drm-engine-vertex-tiler: 71932239 ns
|
||||
drm-cycles-vertex-tiler: 52617357
|
||||
drm-maxfreq-vertex-tiler: 799999987 Hz
|
||||
drm-curfreq-vertex-tiler: 799999987 Hz
|
||||
drm-total-memory: 290 MiB
|
||||
drm-shared-memory: 0 MiB
|
||||
drm-active-memory: 226 MiB
|
||||
drm-resident-memory: 36496 KiB
|
||||
drm-purgeable-memory: 128 KiB
|
||||
|
||||
Possible `drm-engine-` key names are: `fragment`, and `vertex-tiler`.
|
||||
`drm-curfreq-` values convey the current operating frequency for that engine.
|
@ -67,14 +67,8 @@ platforms.
|
||||
|
||||
When the time comes for Xe, the protection will be lifted on Xe and kept in i915.
|
||||
|
||||
Xe driver will be protected with both STAGING Kconfig and force_probe. Changes in
|
||||
the uAPI are expected while the driver is behind these protections. STAGING will
|
||||
be removed when the driver uAPI gets to a mature state where we can guarantee the
|
||||
‘no regression’ rule. Then force_probe will be lifted only for future platforms
|
||||
that will be productized with Xe driver, but not with i915.
|
||||
|
||||
Xe – Pre-Merge Goals
|
||||
====================
|
||||
Xe – Pre-Merge Goals - Work-in-Progress
|
||||
=======================================
|
||||
|
||||
Drm_scheduler
|
||||
-------------
|
||||
@ -94,41 +88,6 @@ depend on any other patch touching drm_scheduler itself that was not yet merged
|
||||
through drm-misc. This, by itself, already includes the reach of an agreement for
|
||||
uniform 1 to 1 relationship implementation / usage across drivers.
|
||||
|
||||
GPU VA
|
||||
------
|
||||
Two main goals of Xe are meeting together here:
|
||||
|
||||
1) Have an uAPI that aligns with modern UMD needs.
|
||||
|
||||
2) Early upstream engagement.
|
||||
|
||||
RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
|
||||
track of GPU virtual address mappings. This is still not merged upstream, but
|
||||
this aligns very well with our goals and with our VM_BIND. The engagement with
|
||||
upstream and the port of Xe towards GPUVA is already ongoing.
|
||||
|
||||
As a key measurable result, Xe needs to be aligned with the GPU VA and working in
|
||||
our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
|
||||
related patch should be independent and present on dri-devel or acked by
|
||||
maintainers to go along with the first Xe pull request towards drm-next.
|
||||
|
||||
DRM_VM_BIND
|
||||
-----------
|
||||
Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
|
||||
fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
|
||||
development of a common new drm_infrastructure. However, the Xe team needs to
|
||||
engage with the community to explore the options of a common API.
|
||||
|
||||
As a key measurable result, the DRM_VM_BIND needs to be documented in this file
|
||||
below, or this entire block deleted if the consensus is for independent drivers
|
||||
vm_bind ioctls.
|
||||
|
||||
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
|
||||
Xe merged, it is mandatory to enforce the overall locking scheme for all major
|
||||
structs and list (so vm and vma). So, a consensus is needed, and possibly some
|
||||
common helpers. If helpers are needed, they should be also documented in this
|
||||
document.
|
||||
|
||||
ASYNC VM_BIND
|
||||
-------------
|
||||
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
|
||||
@ -138,8 +97,8 @@ memory fences. Ideally with helper support so people don't get it wrong in all
|
||||
possible ways.
|
||||
|
||||
As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
|
||||
various flavors, error handling and a sample API should be documented here or in
|
||||
a separate document pointed to by this document.
|
||||
various flavors, error handling and sample API suggestions are documented in
|
||||
:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
|
||||
|
||||
Userptr integration and vm_bind
|
||||
-------------------------------
|
||||
@ -212,6 +171,14 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support.
|
||||
As a key measurable result, we need to have a community consensus documented in
|
||||
this document and the Xe driver prepared for the changes, if necessary.
|
||||
|
||||
Xe – uAPI high level overview
|
||||
=============================
|
||||
|
||||
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
|
||||
|
||||
Xe – Pre-Merge Goals - Completed
|
||||
================================
|
||||
|
||||
Dev_coredump
|
||||
------------
|
||||
|
||||
@ -229,7 +196,37 @@ infrastructure with overall possible improvements, like multiple file support
|
||||
for better organization of the dumps, snapshot support, dmesg extra print,
|
||||
and whatever may make sense and help the overall infrastructure.
|
||||
|
||||
Xe – uAPI high level overview
|
||||
=============================
|
||||
DRM_VM_BIND
|
||||
-----------
|
||||
Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to
|
||||
fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
|
||||
development of a common new drm_infrastructure. However, the Xe team needs to
|
||||
engage with the community to explore the options of a common API.
|
||||
|
||||
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
|
||||
As a key measurable result, the DRM_VM_BIND needs to be documented in this file
|
||||
below, or this entire block deleted if the consensus is for independent drivers
|
||||
vm_bind ioctls.
|
||||
|
||||
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
|
||||
Xe merged, it is mandatory to enforce the overall locking scheme for all major
|
||||
structs and list (so vm and vma). So, a consensus is needed, and possibly some
|
||||
common helpers. If helpers are needed, they should be also documented in this
|
||||
document.
|
||||
|
||||
GPU VA
|
||||
------
|
||||
Two main goals of Xe are meeting together here:
|
||||
|
||||
1) Have an uAPI that aligns with modern UMD needs.
|
||||
|
||||
2) Early upstream engagement.
|
||||
|
||||
RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
|
||||
track of GPU virtual address mappings. This is still not merged upstream, but
|
||||
this aligns very well with our goals and with our VM_BIND. The engagement with
|
||||
upstream and the port of Xe towards GPUVA is already ongoing.
|
||||
|
||||
As a key measurable result, Xe needs to be aligned with the GPU VA and working in
|
||||
our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
|
||||
related patch should be independent and present on dri-devel or acked by
|
||||
maintainers to go along with the first Xe pull request towards drm-next.
|
||||
|
389
Documentation/userspace-api/dma-buf-alloc-exchange.rst
Normal file
389
Documentation/userspace-api/dma-buf-alloc-exchange.rst
Normal file
@ -0,0 +1,389 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. Copyright 2021-2023 Collabora Ltd.
|
||||
|
||||
========================
|
||||
Exchanging pixel buffers
|
||||
========================
|
||||
|
||||
As originally designed, the Linux graphics subsystem had extremely limited
|
||||
support for sharing pixel-buffer allocations between processes, devices, and
|
||||
subsystems. Modern systems require extensive integration between all three
|
||||
classes; this document details how applications and kernel subsystems should
|
||||
approach this sharing for two-dimensional image data.
|
||||
|
||||
It is written with reference to the DRM subsystem for GPU and display devices,
|
||||
V4L2 for media devices, and also to Vulkan, EGL and Wayland, for userspace
|
||||
support, however any other subsystems should also follow this design and advice.
|
||||
|
||||
|
||||
Glossary of terms
|
||||
=================
|
||||
|
||||
.. glossary::
|
||||
|
||||
image:
|
||||
Conceptually a two-dimensional array of pixels. The pixels may be stored
|
||||
in one or more memory buffers. Has width and height in pixels, pixel
|
||||
format and modifier (implicit or explicit).
|
||||
|
||||
row:
|
||||
A span along a single y-axis value, e.g. from co-ordinates (0,100) to
|
||||
(200,100).
|
||||
|
||||
scanline:
|
||||
Synonym for row.
|
||||
|
||||
column:
|
||||
A span along a single x-axis value, e.g. from co-ordinates (100,0) to
|
||||
(100,100).
|
||||
|
||||
memory buffer:
|
||||
A piece of memory for storing (parts of) pixel data. Has stride and size
|
||||
in bytes and at least one handle in some API. May contain one or more
|
||||
planes.
|
||||
|
||||
plane:
|
||||
A two-dimensional array of some or all of an image's color and alpha
|
||||
channel values.
|
||||
|
||||
pixel:
|
||||
A picture element. Has a single color value which is defined by one or
|
||||
more color channels values, e.g. R, G and B, or Y, Cb and Cr. May also
|
||||
have an alpha value as an additional channel.
|
||||
|
||||
pixel data:
|
||||
Bytes or bits that represent some or all of the color/alpha channel values
|
||||
of a pixel or an image. The data for one pixel may be spread over several
|
||||
planes or memory buffers depending on format and modifier.
|
||||
|
||||
color value:
|
||||
A tuple of numbers, representing a color. Each element in the tuple is a
|
||||
color channel value.
|
||||
|
||||
color channel:
|
||||
One of the dimensions in a color model. For example, RGB model has
|
||||
channels R, G, and B. Alpha channel is sometimes counted as a color
|
||||
channel as well.
|
||||
|
||||
pixel format:
|
||||
A description of how pixel data represents the pixel's color and alpha
|
||||
values.
|
||||
|
||||
modifier:
|
||||
A description of how pixel data is laid out in memory buffers.
|
||||
|
||||
alpha:
|
||||
A value that denotes the color coverage in a pixel. Sometimes used for
|
||||
translucency instead.
|
||||
|
||||
stride:
|
||||
A value that denotes the relationship between pixel-location co-ordinates
|
||||
and byte-offset values. Typically used as the byte offset between two
|
||||
pixels at the start of vertically-consecutive tiling blocks. For linear
|
||||
layouts, the byte offset between two vertically-adjacent pixels. For
|
||||
non-linear formats the stride must be computed in a consistent way, which
|
||||
usually is done as-if the layout was linear.
|
||||
|
||||
pitch:
|
||||
Synonym for stride.
|
||||
|
||||
|
||||
Formats and modifiers
|
||||
=====================
|
||||
|
||||
Each buffer must have an underlying format. This format describes the color
|
||||
values provided for each pixel. Although each subsystem has its own format
|
||||
descriptions (e.g. V4L2 and fbdev), the ``DRM_FORMAT_*`` tokens should be reused
|
||||
wherever possible, as they are the standard descriptions used for interchange.
|
||||
These tokens are described in the ``drm_fourcc.h`` file, which is a part of
|
||||
DRM's uAPI.
|
||||
|
||||
Each ``DRM_FORMAT_*`` token describes the translation between a pixel
|
||||
co-ordinate in an image, and the color values for that pixel contained within
|
||||
its memory buffers. The number and type of color channels are described:
|
||||
whether they are RGB or YUV, integer or floating-point, the size of each channel
|
||||
and their locations within the pixel memory, and the relationship between color
|
||||
planes.
|
||||
|
||||
For example, ``DRM_FORMAT_ARGB8888`` describes a format in which each pixel has
|
||||
a single 32-bit value in memory. Alpha, red, green, and blue, color channels are
|
||||
available at 8-bit precision per channel, ordered respectively from most to
|
||||
least significant bits in little-endian storage. ``DRM_FORMAT_*`` is not
|
||||
affected by either CPU or device endianness; the byte pattern in memory is
|
||||
always as described in the format definition, which is usually little-endian.
|
||||
|
||||
As a more complex example, ``DRM_FORMAT_NV12`` describes a format in which luma
|
||||
and chroma YUV samples are stored in separate planes, where the chroma plane is
|
||||
stored at half the resolution in both dimensions (i.e. one U/V chroma
|
||||
sample is stored for each 2x2 pixel grouping).
|
||||
|
||||
Format modifiers describe a translation mechanism between these per-pixel memory
|
||||
samples, and the actual memory storage for the buffer. The most straightforward
|
||||
modifier is ``DRM_FORMAT_MOD_LINEAR``, describing a scheme in which each plane
|
||||
is laid out row-sequentially, from the top-left to the bottom-right corner.
|
||||
This is considered the baseline interchange format, and most convenient for CPU
|
||||
access.
|
||||
|
||||
Modern hardware employs much more sophisticated access mechanisms, typically
|
||||
making use of tiled access and possibly also compression. For example, the
|
||||
``DRM_FORMAT_MOD_VIVANTE_TILED`` modifier describes memory storage where pixels
|
||||
are stored in 4x4 blocks arranged in row-major ordering, i.e. the first tile in
|
||||
a plane stores pixels (0,0) to (3,3) inclusive, and the second tile in a plane
|
||||
stores pixels (4,0) to (7,3) inclusive.
|
||||
|
||||
Some modifiers may modify the number of planes required for an image; for
|
||||
example, the ``I915_FORMAT_MOD_Y_TILED_CCS`` modifier adds a second plane to RGB
|
||||
formats in which it stores data about the status of every tile, notably
|
||||
including whether the tile is fully populated with pixel data, or can be
|
||||
expanded from a single solid color.
|
||||
|
||||
These extended layouts are highly vendor-specific, and even specific to
|
||||
particular generations or configurations of devices per-vendor. For this reason,
|
||||
support of modifiers must be explicitly enumerated and negotiated by all users
|
||||
in order to ensure a compatible and optimal pipeline, as discussed below.
|
||||
|
||||
|
||||
Dimensions and size
|
||||
===================
|
||||
|
||||
Each pixel buffer must be accompanied by logical pixel dimensions. This refers
|
||||
to the number of unique samples which can be extracted from, or stored to, the
|
||||
underlying memory storage. For example, even though a 1920x1080
|
||||
``DRM_FORMAT_NV12`` buffer has a luma plane containing 1920x1080 samples for the Y
|
||||
component, and 960x540 samples for the U and V components, the overall buffer is
|
||||
still described as having dimensions of 1920x1080.
|
||||
|
||||
The in-memory storage of a buffer is not guaranteed to begin immediately at the
|
||||
base address of the underlying memory, nor is it guaranteed that the memory
|
||||
storage is tightly clipped to either dimension.
|
||||
|
||||
Each plane must therefore be described with an ``offset`` in bytes, which will be
|
||||
added to the base address of the memory storage before performing any per-pixel
|
||||
calculations. This may be used to combine multiple planes into a single memory
|
||||
buffer; for example, ``DRM_FORMAT_NV12`` may be stored in a single memory buffer
|
||||
where the luma plane's storage begins immediately at the start of the buffer
|
||||
with an offset of 0, and the chroma plane's storage follows within the same buffer
|
||||
beginning from the byte offset for that plane.
|
||||
|
||||
Each plane must also have a ``stride`` in bytes, expressing the offset in memory
|
||||
between two contiguous row. For example, a ``DRM_FORMAT_MOD_LINEAR`` buffer
|
||||
with dimensions of 1000x1000 may have been allocated as if it were 1024x1000, in
|
||||
order to allow for aligned access patterns. In this case, the buffer will still
|
||||
be described with a width of 1000, however the stride will be ``1024 * bpp``,
|
||||
indicating that there are 24 pixels at the positive extreme of the x axis whose
|
||||
values are not significant.
|
||||
|
||||
Buffers may also be padded further in the y dimension, simply by allocating a
|
||||
larger area than would ordinarily be required. For example, many media decoders
|
||||
are not able to natively output buffers of height 1080, but instead require an
|
||||
effective height of 1088 pixels. In this case, the buffer continues to be
|
||||
described as having a height of 1080, with the memory allocation for each buffer
|
||||
being increased to account for the extra padding.
|
||||
|
||||
|
||||
Enumeration
|
||||
===========
|
||||
|
||||
Every user of pixel buffers must be able to enumerate a set of supported formats
|
||||
and modifiers, described together. Within KMS, this is achieved with the
|
||||
``IN_FORMATS`` property on each DRM plane, listing the supported DRM formats, and
|
||||
the modifiers supported for each format. In userspace, this is supported through
|
||||
the `EGL_EXT_image_dma_buf_import_modifiers`_ extension entrypoints for EGL, the
|
||||
`VK_EXT_image_drm_format_modifier`_ extension for Vulkan, and the
|
||||
`zwp_linux_dmabuf_v1`_ extension for Wayland.
|
||||
|
||||
Each of these interfaces allows users to query a set of supported
|
||||
format+modifier combinations.
|
||||
|
||||
|
||||
Negotiation
|
||||
===========
|
||||
|
||||
It is the responsibility of userspace to negotiate an acceptable format+modifier
|
||||
combination for its usage. This is performed through a simple intersection of
|
||||
lists. For example, if a user wants to use Vulkan to render an image to be
|
||||
displayed on a KMS plane, it must:
|
||||
|
||||
- query KMS for the ``IN_FORMATS`` property for the given plane
|
||||
- query Vulkan for the supported formats for its physical device, making sure
|
||||
to pass the ``VkImageUsageFlagBits`` and ``VkImageCreateFlagBits``
|
||||
corresponding to the intended rendering use
|
||||
- intersect these formats to determine the most appropriate one
|
||||
- for this format, intersect the lists of supported modifiers for both KMS and
|
||||
Vulkan, to obtain a final list of acceptable modifiers for that format
|
||||
|
||||
This intersection must be performed for all usages. For example, if the user
|
||||
also wishes to encode the image to a video stream, it must query the media API
|
||||
it intends to use for encoding for the set of modifiers it supports, and
|
||||
additionally intersect against this list.
|
||||
|
||||
If the intersection of all lists is an empty list, it is not possible to share
|
||||
buffers in this way, and an alternate strategy must be considered (e.g. using
|
||||
CPU access routines to copy data between the different uses, with the
|
||||
corresponding performance cost).
|
||||
|
||||
The resulting modifier list is unsorted; the order is not significant.
|
||||
|
||||
|
||||
Allocation
|
||||
==========
|
||||
|
||||
Once userspace has determined an appropriate format, and corresponding list of
|
||||
acceptable modifiers, it must allocate the buffer. As there is no universal
|
||||
buffer-allocation interface available at either kernel or userspace level, the
|
||||
client makes an arbitrary choice of allocation interface such as Vulkan, GBM, or
|
||||
a media API.
|
||||
|
||||
Each allocation request must take, at a minimum: the pixel format, a list of
|
||||
acceptable modifiers, and the buffer's width and height. Each API may extend
|
||||
this set of properties in different ways, such as allowing allocation in more
|
||||
than two dimensions, intended usage patterns, etc.
|
||||
|
||||
The component which allocates the buffer will make an arbitrary choice of what
|
||||
it considers the 'best' modifier within the acceptable list for the requested
|
||||
allocation, any padding required, and further properties of the underlying
|
||||
memory buffers such as whether they are stored in system or device-specific
|
||||
memory, whether or not they are physically contiguous, and their cache mode.
|
||||
These properties of the memory buffer are not visible to userspace, however the
|
||||
``dma-heaps`` API is an effort to address this.
|
||||
|
||||
After allocation, the client must query the allocator to determine the actual
|
||||
modifier selected for the buffer, as well as the per-plane offset and stride.
|
||||
Allocators are not permitted to vary the format in use, to select a modifier not
|
||||
provided within the acceptable list, nor to vary the pixel dimensions other than
|
||||
the padding expressed through offset, stride, and size.
|
||||
|
||||
Communicating additional constraints, such as alignment of stride or offset,
|
||||
placement within a particular memory area, etc, is out of scope of dma-buf,
|
||||
and is not solved by format and modifier tokens.
|
||||
|
||||
|
||||
Import
|
||||
======
|
||||
|
||||
To use a buffer within a different context, device, or subsystem, the user
|
||||
passes these parameters (format, modifier, width, height, and per-plane offset
|
||||
and stride) to an importing API.
|
||||
|
||||
Each memory buffer is referred to by a buffer handle, which may be unique or
|
||||
duplicated within an image. For example, a ``DRM_FORMAT_NV12`` buffer may have
|
||||
the luma and chroma buffers combined into a single memory buffer by use of the
|
||||
per-plane offset parameters, or they may be completely separate allocations in
|
||||
memory. For this reason, each import and allocation API must provide a separate
|
||||
handle for each plane.
|
||||
|
||||
Each kernel subsystem has its own types and interfaces for buffer management.
|
||||
DRM uses GEM buffer objects (BOs), V4L2 has its own references, etc. These types
|
||||
are not portable between contexts, processes, devices, or subsystems.
|
||||
|
||||
To address this, ``dma-buf`` handles are used as the universal interchange for
|
||||
buffers. Subsystem-specific operations are used to export native buffer handles
|
||||
to a ``dma-buf`` file descriptor, and to import those file descriptors into a
|
||||
native buffer handle. dma-buf file descriptors can be transferred between
|
||||
contexts, processes, devices, and subsystems.
|
||||
|
||||
For example, a Wayland media player may use V4L2 to decode a video frame into a
|
||||
``DRM_FORMAT_NV12`` buffer. This will result in two memory planes (luma and
|
||||
chroma) being dequeued by the user from V4L2. These planes are then exported to
|
||||
one dma-buf file descriptor per plane, these descriptors are then sent along
|
||||
with the metadata (format, modifier, width, height, per-plane offset and stride)
|
||||
to the Wayland server. The Wayland server will then import these file
|
||||
descriptors as an EGLImage for use through EGL/OpenGL (ES), a VkImage for use
|
||||
through Vulkan, or a KMS framebuffer object; each of these import operations
|
||||
will take the same metadata and convert the dma-buf file descriptors into their
|
||||
native buffer handles.
|
||||
|
||||
Having a non-empty intersection of supported modifiers does not guarantee that
|
||||
import will succeed into all consumers; they may have constraints beyond those
|
||||
implied by modifiers which must be satisfied.
|
||||
|
||||
|
||||
Implicit modifiers
|
||||
==================
|
||||
|
||||
The concept of modifiers post-dates all of the subsystems mentioned above. As
|
||||
such, it has been retrofitted into all of these APIs, and in order to ensure
|
||||
backwards compatibility, support is needed for drivers and userspace which do
|
||||
not (yet) support modifiers.
|
||||
|
||||
As an example, GBM is used to allocate buffers to be shared between EGL for
|
||||
rendering and KMS for display. It has two entrypoints for allocating buffers:
|
||||
``gbm_bo_create`` which only takes the format, width, height, and a usage token,
|
||||
and ``gbm_bo_create_with_modifiers`` which extends this with a list of modifiers.
|
||||
|
||||
In the latter case, the allocation is as discussed above, being provided with a
|
||||
list of acceptable modifiers that the implementation can choose from (or fail if
|
||||
it is not possible to allocate within those constraints). In the former case
|
||||
where modifiers are not provided, the GBM implementation must make its own
|
||||
choice as to what is likely to be the 'best' layout. Such a choice is entirely
|
||||
implementation-specific: some will internally use tiled layouts which are not
|
||||
CPU-accessible if the implementation decides that is a good idea through
|
||||
whatever heuristic. It is the implementation's responsibility to ensure that
|
||||
this choice is appropriate.
|
||||
|
||||
To support this case where the layout is not known because there is no awareness
|
||||
of modifiers, a special ``DRM_FORMAT_MOD_INVALID`` token has been defined. This
|
||||
pseudo-modifier declares that the layout is not known, and that the driver
|
||||
should use its own logic to determine what the underlying layout may be.
|
||||
|
||||
.. note::
|
||||
|
||||
``DRM_FORMAT_MOD_INVALID`` is a non-zero value. The modifier value zero is
|
||||
``DRM_FORMAT_MOD_LINEAR``, which is an explicit guarantee that the image
|
||||
has the linear layout. Care and attention should be taken to ensure that
|
||||
zero as a default value is not mixed up with either no modifier or the linear
|
||||
modifier. Also note that in some APIs the invalid modifier value is specified
|
||||
with an out-of-band flag, like in ``DRM_IOCTL_MODE_ADDFB2``.
|
||||
|
||||
There are four cases where this token may be used:
|
||||
- during enumeration, an interface may return ``DRM_FORMAT_MOD_INVALID``, either
|
||||
as the sole member of a modifier list to declare that explicit modifiers are
|
||||
not supported, or as part of a larger list to declare that implicit modifiers
|
||||
may be used
|
||||
- during allocation, a user may supply ``DRM_FORMAT_MOD_INVALID``, either as the
|
||||
sole member of a modifier list (equivalent to not supplying a modifier list
|
||||
at all) to declare that explicit modifiers are not supported and must not be
|
||||
used, or as part of a larger list to declare that an allocation using implicit
|
||||
modifiers is acceptable
|
||||
- in a post-allocation query, an implementation may return
|
||||
``DRM_FORMAT_MOD_INVALID`` as the modifier of the allocated buffer to declare
|
||||
that the underlying layout is implementation-defined and that an explicit
|
||||
modifier description is not available; per the above rules, this may only be
|
||||
returned when the user has included ``DRM_FORMAT_MOD_INVALID`` as part of the
|
||||
list of acceptable modifiers, or not provided a list
|
||||
- when importing a buffer, the user may supply ``DRM_FORMAT_MOD_INVALID`` as the
|
||||
buffer modifier (or not supply a modifier) to indicate that the modifier is
|
||||
unknown for whatever reason; this is only acceptable when the buffer has
|
||||
not been allocated with an explicit modifier
|
||||
|
||||
It follows from this that for any single buffer, the complete chain of operations
|
||||
formed by the producer and all the consumers must be either fully implicit or fully
|
||||
explicit. For example, if a user wishes to allocate a buffer for use between
|
||||
GPU, display, and media, but the media API does not support modifiers, then the
|
||||
user **must not** allocate the buffer with explicit modifiers and attempt to
|
||||
import the buffer into the media API with no modifier, but either perform the
|
||||
allocation using implicit modifiers, or allocate the buffer for media use
|
||||
separately and copy between the two buffers.
|
||||
|
||||
As one exception to the above, allocations may be 'upgraded' from implicit
|
||||
to explicit modifiers. For example, if the buffer is allocated with
|
||||
``gbm_bo_create`` (taking no modifiers), the user may then query the modifier with
|
||||
``gbm_bo_get_modifier`` and then use this modifier as an explicit modifier token
|
||||
if a valid modifier is returned.
|
||||
|
||||
When allocating buffers for exchange between different users and modifiers are
|
||||
not available, implementations are strongly encouraged to use
|
||||
``DRM_FORMAT_MOD_LINEAR`` for their allocation, as this is the universal baseline
|
||||
for exchange. However, it is not guaranteed that this will result in the correct
|
||||
interpretation of buffer content, as implicit modifier operation may still be
|
||||
subject to driver-specific heuristics.
|
||||
|
||||
Any new users - userspace programs and protocols, kernel subsystems, etc -
|
||||
wishing to exchange buffers must offer interoperability through dma-buf file
|
||||
descriptors for memory planes, DRM format tokens to describe the format, DRM
|
||||
format modifiers to describe the layout in memory, at least width and height for
|
||||
dimensions, and at least offset and stride for each memory plane.
|
||||
|
||||
.. _zwp_linux_dmabuf_v1: https://gitlab.freedesktop.org/wayland/wayland-protocols/-/blob/main/unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml
|
||||
.. _VK_EXT_image_drm_format_modifier: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_image_drm_format_modifier.html
|
||||
.. _EGL_EXT_image_dma_buf_import_modifiers: https://registry.khronos.org/EGL/extensions/EXT/EGL_EXT_image_dma_buf_import_modifiers.txt
|
@ -22,6 +22,7 @@ place where this information is gathered.
|
||||
unshare
|
||||
spec_ctrl
|
||||
accelerators/ocxl
|
||||
dma-buf-alloc-exchange
|
||||
ebpf/index
|
||||
ELF
|
||||
ioctl/index
|
||||
|
50
MAINTAINERS
50
MAINTAINERS
@ -1636,13 +1636,13 @@ F: drivers/gpu/drm/arm/display/include/
|
||||
F: drivers/gpu/drm/arm/display/komeda/
|
||||
|
||||
ARM MALI PANFROST DRM DRIVER
|
||||
M: Boris Brezillon <boris.brezillon@collabora.com>
|
||||
M: Rob Herring <robh@kernel.org>
|
||||
M: Tomeu Vizoso <tomeu.vizoso@collabora.com>
|
||||
R: Steven Price <steven.price@arm.com>
|
||||
R: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Supported
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/gpu/panfrost.rst
|
||||
F: drivers/gpu/drm/panfrost/
|
||||
F: include/uapi/drm/panfrost_drm.h
|
||||
|
||||
@ -6175,6 +6175,7 @@ L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/driver-api/dma-buf.rst
|
||||
F: Documentation/userspace-api/dma-buf-alloc-exchange.rst
|
||||
F: drivers/dma-buf/
|
||||
F: include/linux/*fence.h
|
||||
F: include/linux/dma-buf.h
|
||||
@ -6667,6 +6668,7 @@ S: Maintained
|
||||
B: https://gitlab.freedesktop.org/drm/msm/-/issues
|
||||
T: git https://gitlab.freedesktop.org/drm/msm.git
|
||||
F: Documentation/devicetree/bindings/display/msm/
|
||||
F: drivers/gpu/drm/ci/xfails/msm*
|
||||
F: drivers/gpu/drm/msm/
|
||||
F: include/uapi/drm/msm_drm.h
|
||||
|
||||
@ -6818,7 +6820,8 @@ DRM DRIVER FOR SOLOMON SSD130X OLED DISPLAYS
|
||||
M: Javier Martinez Canillas <javierm@redhat.com>
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml
|
||||
F: Documentation/devicetree/bindings/display/solomon,ssd-common.yaml
|
||||
F: Documentation/devicetree/bindings/display/solomon,ssd13*.yaml
|
||||
F: drivers/gpu/drm/solomon/ssd130x*
|
||||
|
||||
DRM DRIVER FOR ST-ERICSSON MCDE
|
||||
@ -6913,12 +6916,26 @@ M: Thomas Zimmermann <tzimmermann@suse.de>
|
||||
S: Maintained
|
||||
W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/
|
||||
F: Documentation/devicetree/bindings/gpu/
|
||||
F: Documentation/gpu/
|
||||
F: drivers/gpu/drm/*
|
||||
F: drivers/gpu/drm/
|
||||
F: drivers/gpu/vga/
|
||||
F: include/drm/drm*
|
||||
F: include/drm/drm
|
||||
F: include/linux/vga*
|
||||
F: include/uapi/drm/drm*
|
||||
F: include/uapi/drm/
|
||||
X: drivers/gpu/drm/amd/
|
||||
X: drivers/gpu/drm/armada/
|
||||
X: drivers/gpu/drm/etnaviv/
|
||||
X: drivers/gpu/drm/exynos/
|
||||
X: drivers/gpu/drm/i915/
|
||||
X: drivers/gpu/drm/kmb/
|
||||
X: drivers/gpu/drm/mediatek/
|
||||
X: drivers/gpu/drm/msm/
|
||||
X: drivers/gpu/drm/nouveau/
|
||||
X: drivers/gpu/drm/radeon/
|
||||
X: drivers/gpu/drm/renesas/
|
||||
X: drivers/gpu/drm/tegra/
|
||||
|
||||
DRM DRIVERS FOR ALLWINNER A10
|
||||
M: Maxime Ripard <mripard@kernel.org>
|
||||
@ -6939,6 +6956,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
|
||||
F: Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
|
||||
F: Documentation/gpu/meson.rst
|
||||
F: drivers/gpu/drm/ci/xfails/meson*
|
||||
F: drivers/gpu/drm/meson/
|
||||
|
||||
DRM DRIVERS FOR ATMEL HLCDC
|
||||
@ -6962,7 +6980,9 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/bridge/
|
||||
F: drivers/gpu/drm/bridge/
|
||||
F: drivers/gpu/drm/drm_bridge.c
|
||||
F: drivers/gpu/drm/drm_bridge_connector.c
|
||||
F: include/drm/drm_bridge.h
|
||||
F: include/drm/drm_bridge_connector.h
|
||||
|
||||
DRM DRIVERS FOR EXYNOS
|
||||
M: Inki Dae <inki.dae@samsung.com>
|
||||
@ -6986,10 +7006,12 @@ F: Documentation/devicetree/bindings/display/fsl,dcu.txt
|
||||
F: Documentation/devicetree/bindings/display/fsl,tcon.txt
|
||||
F: drivers/gpu/drm/fsl-dcu/
|
||||
|
||||
DRM DRIVERS FOR FREESCALE IMX
|
||||
DRM DRIVERS FOR FREESCALE IMX 5/6
|
||||
M: Philipp Zabel <p.zabel@pengutronix.de>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
T: git git://git.pengutronix.de/git/pza/linux
|
||||
F: Documentation/devicetree/bindings/display/imx/
|
||||
F: drivers/gpu/drm/imx/ipuv3/
|
||||
F: drivers/gpu/ipu-v3/
|
||||
@ -7008,7 +7030,7 @@ DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
|
||||
M: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git git://github.com/patjak/drm-gma500
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: drivers/gpu/drm/gma500/
|
||||
|
||||
DRM DRIVERS FOR HISILICON
|
||||
@ -7047,6 +7069,7 @@ L: dri-devel@lists.freedesktop.org
|
||||
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/display/mediatek/
|
||||
F: drivers/gpu/drm/ci/xfails/mediatek*
|
||||
F: drivers/gpu/drm/mediatek/
|
||||
F: drivers/phy/mediatek/phy-mtk-dp.c
|
||||
F: drivers/phy/mediatek/phy-mtk-hdmi*
|
||||
@ -7087,6 +7110,7 @@ L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/rockchip/
|
||||
F: drivers/gpu/drm/ci/xfails/rockchip*
|
||||
F: drivers/gpu/drm/rockchip/
|
||||
|
||||
DRM DRIVERS FOR STI
|
||||
@ -7183,7 +7207,7 @@ F: Documentation/devicetree/bindings/display/xlnx/
|
||||
F: drivers/gpu/drm/xlnx/
|
||||
|
||||
DRM GPU SCHEDULER
|
||||
M: Luben Tuikov <luben.tuikov@amd.com>
|
||||
M: Luben Tuikov <ltuikov89@gmail.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
@ -7192,6 +7216,7 @@ F: include/drm/gpu_scheduler.h
|
||||
|
||||
DRM PANEL DRIVERS
|
||||
M: Neil Armstrong <neil.armstrong@linaro.org>
|
||||
R: Jessica Zhang <quic_jesszhan@quicinc.com>
|
||||
R: Sam Ravnborg <sam@ravnborg.org>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
@ -9129,6 +9154,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
|
||||
F: Documentation/ABI/testing/debugfs-driver-habanalabs
|
||||
F: Documentation/ABI/testing/sysfs-driver-habanalabs
|
||||
F: drivers/accel/habanalabs/
|
||||
F: include/linux/habanalabs/
|
||||
F: include/trace/events/habanalabs.h
|
||||
F: include/uapi/drm/habanalabs_accel.h
|
||||
|
||||
@ -10535,6 +10561,7 @@ C: irc://irc.oftc.net/intel-gfx
|
||||
T: git git://anongit.freedesktop.org/drm-intel
|
||||
F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
|
||||
F: Documentation/gpu/i915.rst
|
||||
F: drivers/gpu/drm/ci/xfails/i915*
|
||||
F: drivers/gpu/drm/i915/
|
||||
F: include/drm/i915*
|
||||
F: include/uapi/drm/i915_drm.h
|
||||
@ -13577,7 +13604,7 @@ F: drivers/usb/mtu3/
|
||||
|
||||
MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
|
||||
M: Peter Senna Tschudin <peter.senna@gmail.com>
|
||||
M: Martin Donnelly <martin.donnelly@ge.com>
|
||||
M: Ian Ray <ian.ray@ge.com>
|
||||
M: Martyn Welch <martyn.welch@collabora.co.uk>
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
|
||||
@ -15414,6 +15441,7 @@ M: Laurentiu Palcu <laurentiu.palcu@oss.nxp.com>
|
||||
R: Lucas Stach <l.stach@pengutronix.de>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: Documentation/devicetree/bindings/display/imx/nxp,imx8mq-dcss.yaml
|
||||
F: drivers/gpu/drm/imx/dcss/
|
||||
|
||||
@ -17927,6 +17955,7 @@ C: irc://irc.oftc.net/radeon
|
||||
T: git https://gitlab.freedesktop.org/agd5f/linux.git
|
||||
F: Documentation/gpu/amdgpu/
|
||||
F: drivers/gpu/drm/amd/
|
||||
F: drivers/gpu/drm/ci/xfails/amd*
|
||||
F: drivers/gpu/drm/radeon/
|
||||
F: include/uapi/drm/amdgpu_drm.h
|
||||
F: include/uapi/drm/radeon_drm.h
|
||||
@ -22907,6 +22936,7 @@ L: dri-devel@lists.freedesktop.org
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
S: Maintained
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
F: drivers/gpu/drm/ci/xfails/virtio*
|
||||
F: drivers/gpu/drm/virtio/
|
||||
F: include/uapi/linux/virtio_gpu.h
|
||||
|
||||
|
@ -835,6 +835,7 @@ CONFIG_DRM_PANEL_BOE_TV101WUM_NL6=m
|
||||
CONFIG_DRM_PANEL_LVDS=m
|
||||
CONFIG_DRM_PANEL_SIMPLE=m
|
||||
CONFIG_DRM_PANEL_EDP=m
|
||||
CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
|
||||
CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m
|
||||
CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
|
||||
CONFIG_DRM_PANEL_SITRONIX_ST7703=m
|
||||
|
@ -8,17 +8,16 @@
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
struct file;
|
||||
|
||||
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
unsigned long off)
|
||||
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
|
||||
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
|
||||
if (efi_range_is_wc(vm_start, vm_end - vm_start))
|
||||
return pgprot_writecombine(prot);
|
||||
else
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
return pgprot_noncached(prot);
|
||||
}
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
|
||||
static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
|
||||
{
|
||||
|
@ -5,26 +5,27 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
struct file;
|
||||
|
||||
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
unsigned long off)
|
||||
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
#ifdef CONFIG_SUN3
|
||||
pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
|
||||
pgprot_val(prot) |= SUN3_PAGE_NOCACHE;
|
||||
#else
|
||||
if (CPU_IS_020_OR_030)
|
||||
pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
|
||||
pgprot_val(prot) |= _PAGE_NOCACHE030;
|
||||
if (CPU_IS_040_OR_060) {
|
||||
pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
|
||||
pgprot_val(prot) &= _CACHEMASK040;
|
||||
/* Use no-cache mode, serialized */
|
||||
pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
|
||||
pgprot_val(prot) |= _PAGE_NOCACHE_S;
|
||||
}
|
||||
#endif /* CONFIG_SUN3 */
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
return prot;
|
||||
}
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
|
||||
#include <asm-generic/fb.h>
|
||||
|
||||
|
@ -3,14 +3,13 @@
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
struct file;
|
||||
|
||||
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
unsigned long off)
|
||||
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
return pgprot_noncached(prot);
|
||||
}
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
|
||||
/*
|
||||
* MIPS doesn't define __raw_ I/O macros, so the helpers
|
||||
|
@ -2,18 +2,20 @@
|
||||
#ifndef _ASM_FB_H_
|
||||
#define _ASM_FB_H_
|
||||
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
unsigned long off)
|
||||
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
|
||||
vma->vm_end - vma->vm_start,
|
||||
vma->vm_page_prot);
|
||||
/*
|
||||
* PowerPC's implementation of phys_mem_access_prot() does
|
||||
* not use the file argument. Set it to NULL in preparation
|
||||
* of later updates to the interface.
|
||||
*/
|
||||
return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot);
|
||||
}
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
|
||||
#include <asm-generic/fb.h>
|
||||
|
||||
|
@ -4,15 +4,18 @@
|
||||
|
||||
#include <linux/io.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
struct fb_info;
|
||||
struct file;
|
||||
struct vm_area_struct;
|
||||
|
||||
#ifdef CONFIG_SPARC32
|
||||
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
unsigned long off)
|
||||
{ }
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
return prot;
|
||||
}
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
#endif
|
||||
|
||||
int fb_is_primary_device(struct fb_info *info);
|
||||
|
@ -2,12 +2,14 @@
|
||||
#ifndef _ASM_X86_FB_H
|
||||
#define _ASM_X86_FB_H
|
||||
|
||||
struct fb_info;
|
||||
struct file;
|
||||
struct vm_area_struct;
|
||||
#include <asm/page.h>
|
||||
|
||||
void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off);
|
||||
#define fb_pgprotect fb_pgprotect
|
||||
struct fb_info;
|
||||
|
||||
pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset);
|
||||
#define pgprot_framebuffer pgprot_framebuffer
|
||||
|
||||
int fb_is_primary_device(struct fb_info *info);
|
||||
#define fb_is_primary_device fb_is_primary_device
|
||||
|
@ -13,16 +13,17 @@
|
||||
#include <linux/vgaarb.h>
|
||||
#include <asm/fb.h>
|
||||
|
||||
void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off)
|
||||
pgprot_t pgprot_framebuffer(pgprot_t prot,
|
||||
unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long offset)
|
||||
{
|
||||
unsigned long prot;
|
||||
|
||||
prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
|
||||
pgprot_val(prot) &= ~_PAGE_CACHE_MASK;
|
||||
if (boot_cpu_data.x86 > 3)
|
||||
pgprot_val(vma->vm_page_prot) =
|
||||
prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
|
||||
pgprot_val(prot) |= cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
|
||||
|
||||
return prot;
|
||||
}
|
||||
EXPORT_SYMBOL(fb_pgprotect);
|
||||
EXPORT_SYMBOL(pgprot_framebuffer);
|
||||
|
||||
int fb_is_primary_device(struct fb_info *info)
|
||||
{
|
||||
|
@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
|
||||
static struct idr accel_minors_idr;
|
||||
|
||||
static struct dentry *accel_debugfs_root;
|
||||
static struct class *accel_class;
|
||||
|
||||
static struct device_type accel_sysfs_device_minor = {
|
||||
.name = "accel_minor"
|
||||
@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
|
||||
return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
|
||||
}
|
||||
|
||||
static const struct class accel_class = {
|
||||
.name = "accel",
|
||||
.devnode = accel_devnode,
|
||||
};
|
||||
|
||||
static int accel_sysfs_init(void)
|
||||
{
|
||||
accel_class = class_create("accel");
|
||||
if (IS_ERR(accel_class))
|
||||
return PTR_ERR(accel_class);
|
||||
|
||||
accel_class->devnode = accel_devnode;
|
||||
|
||||
return 0;
|
||||
return class_register(&accel_class);
|
||||
}
|
||||
|
||||
static void accel_sysfs_destroy(void)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(accel_class))
|
||||
return;
|
||||
class_destroy(accel_class);
|
||||
accel_class = NULL;
|
||||
class_unregister(&accel_class);
|
||||
}
|
||||
|
||||
static int accel_name_info(struct seq_file *m, void *data)
|
||||
@ -79,29 +74,30 @@ static const struct drm_info_list accel_debugfs_list[] = {
|
||||
#define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
|
||||
|
||||
/**
|
||||
* accel_debugfs_init() - Initialize debugfs for accel minor
|
||||
* @minor: Pointer to the drm_minor instance.
|
||||
* @minor_id: The minor's id
|
||||
* accel_debugfs_init() - Initialize debugfs for device
|
||||
* @dev: Pointer to the device instance.
|
||||
*
|
||||
* This function initializes the drm minor's debugfs members and creates
|
||||
* a root directory for the minor in debugfs. It also creates common files
|
||||
* for accelerators and calls the driver's debugfs init callback.
|
||||
* This function creates a root directory for the device in debugfs.
|
||||
*/
|
||||
void accel_debugfs_init(struct drm_minor *minor, int minor_id)
|
||||
void accel_debugfs_init(struct drm_device *dev)
|
||||
{
|
||||
struct drm_device *dev = minor->dev;
|
||||
char name[64];
|
||||
drm_debugfs_dev_init(dev, accel_debugfs_root);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&minor->debugfs_list);
|
||||
mutex_init(&minor->debugfs_lock);
|
||||
sprintf(name, "%d", minor_id);
|
||||
minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root);
|
||||
/**
|
||||
* accel_debugfs_register() - Register debugfs for device
|
||||
* @dev: Pointer to the device instance.
|
||||
*
|
||||
* Creates common files for accelerators.
|
||||
*/
|
||||
void accel_debugfs_register(struct drm_device *dev)
|
||||
{
|
||||
struct drm_minor *minor = dev->accel;
|
||||
|
||||
minor->debugfs_root = dev->debugfs_root;
|
||||
|
||||
drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
|
||||
minor->debugfs_root, minor);
|
||||
|
||||
if (dev->driver->debugfs_init)
|
||||
dev->driver->debugfs_init(minor);
|
||||
dev->debugfs_root, minor);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -116,7 +112,7 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id)
|
||||
void accel_set_device_instance_params(struct device *kdev, int index)
|
||||
{
|
||||
kdev->devt = MKDEV(ACCEL_MAJOR, index);
|
||||
kdev->class = accel_class;
|
||||
kdev->class = &accel_class;
|
||||
kdev->type = &accel_sysfs_device_minor;
|
||||
}
|
||||
|
||||
|
@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
union hl_cb_args *args = data;
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
union hl_cb_args *args = data;
|
||||
u64 handle = 0, device_va = 0;
|
||||
enum hl_device_status status;
|
||||
u32 usage_cnt = 0;
|
||||
|
@ -31,6 +31,24 @@ enum hl_cs_wait_status {
|
||||
CS_WAIT_STATUS_GONE
|
||||
};
|
||||
|
||||
/*
|
||||
* Data used while handling wait/timestamp nodes.
|
||||
* The purpose of this struct is to store the needed data for both operations
|
||||
* in one variable instead of passing large number of arguments to functions.
|
||||
*/
|
||||
struct wait_interrupt_data {
|
||||
struct hl_user_interrupt *interrupt;
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
struct hl_mem_mgr *mmg;
|
||||
struct hl_cb *cq_cb;
|
||||
u64 ts_handle;
|
||||
u64 ts_offset;
|
||||
u64 cq_handle;
|
||||
u64 cq_offset;
|
||||
u64 target_value;
|
||||
u64 intr_timeout_us;
|
||||
};
|
||||
|
||||
static void job_wq_completion(struct work_struct *work);
|
||||
static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
|
||||
enum hl_cs_wait_status *status, s64 *timestamp);
|
||||
@ -1079,19 +1097,22 @@ static void
|
||||
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend, *temp;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
|
||||
if (pend->ts_reg_info.buf) {
|
||||
list_del(&pend->wait_list_node);
|
||||
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
|
||||
hl_cb_put(pend->ts_reg_info.cq_cb);
|
||||
} else {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
|
||||
list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
|
||||
list_del(&pend->list_node);
|
||||
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
|
||||
hl_cb_put(pend->ts_reg_info.cq_cb);
|
||||
}
|
||||
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
|
||||
}
|
||||
|
||||
void hl_release_pending_user_interrupts(struct hl_device *hdev)
|
||||
@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
|
||||
/* Need to wait for restore completion before execution phase */
|
||||
if (num_chunks) {
|
||||
enum hl_cs_wait_status status;
|
||||
wait_again:
|
||||
|
||||
ret = _hl_cs_wait_ioctl(hdev, ctx,
|
||||
jiffies_to_usecs(hdev->timeout_jiffies),
|
||||
*cs_seq, &status, NULL);
|
||||
if (ret) {
|
||||
if (ret == -ERESTARTSYS) {
|
||||
usleep_range(100, 200);
|
||||
goto wait_again;
|
||||
}
|
||||
|
||||
dev_err(hdev->dev,
|
||||
"Restore CS for context %d failed to complete %d\n",
|
||||
ctx->asid, ret);
|
||||
@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
union hl_cs_args *args = data;
|
||||
enum hl_cs_type cs_type = 0;
|
||||
u64 cs_seq = ULONG_MAX;
|
||||
@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
|
||||
struct hl_cb *cq_cb,
|
||||
u64 ts_offset, u64 cq_offset, u64 target_value,
|
||||
spinlock_t *wait_list_lock,
|
||||
struct hl_user_pending_interrupt **pend)
|
||||
static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
|
||||
struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
|
||||
{
|
||||
struct hl_ts_buff *ts_buff = buf->private;
|
||||
struct hl_user_pending_interrupt *requested_offset_record =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
ts_offset;
|
||||
struct hl_user_pending_interrupt *cb_last =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
record->ts_reg_info.cq_cb = cq_cb;
|
||||
record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
|
||||
record->cq_target_value = target_value;
|
||||
}
|
||||
|
||||
static int validate_and_get_ts_record(struct device *dev,
|
||||
struct hl_ts_buff *ts_buff, u64 ts_offset,
|
||||
struct hl_user_pending_interrupt **req_event_record)
|
||||
{
|
||||
struct hl_user_pending_interrupt *ts_cb_last;
|
||||
|
||||
*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
ts_offset;
|
||||
ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
|
||||
unsigned long iter_counter = 0;
|
||||
u64 current_cq_counter;
|
||||
ktime_t timestamp;
|
||||
|
||||
/* Validate ts_offset not exceeding last max */
|
||||
if (requested_offset_record >= cb_last) {
|
||||
dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
|
||||
(u64)(uintptr_t)cb_last);
|
||||
if (*req_event_record >= ts_cb_last) {
|
||||
dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
|
||||
ts_offset, (u64)(uintptr_t)ts_cb_last);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
timestamp = ktime_get();
|
||||
|
||||
start_over:
|
||||
spin_lock(wait_list_lock);
|
||||
|
||||
/* Unregister only if we didn't reach the target value
|
||||
* since in this case there will be no handling in irq context
|
||||
* and then it's safe to delete the node out of the interrupt list
|
||||
* then re-use it on other interrupt
|
||||
*/
|
||||
if (requested_offset_record->ts_reg_info.in_use) {
|
||||
current_cq_counter = *requested_offset_record->cq_kernel_addr;
|
||||
if (current_cq_counter < requested_offset_record->cq_target_value) {
|
||||
list_del(&requested_offset_record->wait_list_node);
|
||||
spin_unlock(wait_list_lock);
|
||||
|
||||
hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
|
||||
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
|
||||
|
||||
dev_dbg(buf->mmg->dev,
|
||||
"ts node removed from interrupt list now can re-use\n");
|
||||
} else {
|
||||
dev_dbg(buf->mmg->dev,
|
||||
"ts node in middle of irq handling\n");
|
||||
|
||||
/* irq thread handling in the middle give it time to finish */
|
||||
spin_unlock(wait_list_lock);
|
||||
usleep_range(100, 1000);
|
||||
if (++iter_counter == MAX_TS_ITER_NUM) {
|
||||
dev_err(buf->mmg->dev,
|
||||
"Timestamp offset processing reached timeout of %lld ms\n",
|
||||
ktime_ms_delta(ktime_get(), timestamp));
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
goto start_over;
|
||||
}
|
||||
} else {
|
||||
/* Fill up the new registration node info */
|
||||
requested_offset_record->ts_reg_info.buf = buf;
|
||||
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
|
||||
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
|
||||
(u64 *) ts_buff->user_buff_address + ts_offset;
|
||||
requested_offset_record->cq_kernel_addr =
|
||||
(u64 *) cq_cb->kernel_address + cq_offset;
|
||||
requested_offset_record->cq_target_value = target_value;
|
||||
|
||||
spin_unlock(wait_list_lock);
|
||||
}
|
||||
|
||||
*pend = requested_offset_record;
|
||||
|
||||
dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
|
||||
requested_offset_record);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
|
||||
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
|
||||
u64 target_value, struct hl_user_interrupt *interrupt,
|
||||
bool register_ts_record, u64 ts_handle, u64 ts_offset,
|
||||
static void unregister_timestamp_node(struct hl_device *hdev,
|
||||
struct hl_user_pending_interrupt *record, bool need_lock)
|
||||
{
|
||||
struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
|
||||
bool ts_rec_found = false;
|
||||
unsigned long flags;
|
||||
|
||||
if (need_lock)
|
||||
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
|
||||
|
||||
if (record->ts_reg_info.in_use) {
|
||||
record->ts_reg_info.in_use = false;
|
||||
list_del(&record->list_node);
|
||||
ts_rec_found = true;
|
||||
}
|
||||
|
||||
if (need_lock)
|
||||
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
|
||||
|
||||
/* Put refcounts that were taken when we registered the event */
|
||||
if (ts_rec_found) {
|
||||
hl_mmap_mem_buf_put(record->ts_reg_info.buf);
|
||||
hl_cb_put(record->ts_reg_info.cq_cb);
|
||||
}
|
||||
}
|
||||
|
||||
static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct wait_interrupt_data *data, unsigned long *flags,
|
||||
struct hl_user_pending_interrupt **pend)
|
||||
{
|
||||
struct hl_user_pending_interrupt *req_offset_record;
|
||||
struct hl_ts_buff *ts_buff = data->buf->private;
|
||||
bool need_lock = false;
|
||||
int rc;
|
||||
|
||||
rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
|
||||
&req_offset_record);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* In case the node already registered, need to unregister first then re-use */
|
||||
if (req_offset_record->ts_reg_info.in_use) {
|
||||
dev_dbg(data->buf->mmg->dev,
|
||||
"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
|
||||
req_offset_record,
|
||||
req_offset_record->ts_reg_info.interrupt->interrupt_id,
|
||||
req_offset_record->ts_reg_info.timestamp_kernel_addr,
|
||||
data->interrupt->interrupt_id);
|
||||
/*
|
||||
* Since interrupt here can be different than the one the node currently registered
|
||||
* on, and we don't want to lock two lists while we're doing unregister, so
|
||||
* unlock the new interrupt wait list here and acquire the lock again after you done
|
||||
*/
|
||||
if (data->interrupt->interrupt_id !=
|
||||
req_offset_record->ts_reg_info.interrupt->interrupt_id) {
|
||||
|
||||
need_lock = true;
|
||||
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
|
||||
}
|
||||
|
||||
unregister_timestamp_node(hdev, req_offset_record, need_lock);
|
||||
|
||||
if (need_lock)
|
||||
spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
|
||||
}
|
||||
|
||||
/* Fill up the new registration node info and add it to the list */
|
||||
req_offset_record->ts_reg_info.in_use = true;
|
||||
req_offset_record->ts_reg_info.buf = data->buf;
|
||||
req_offset_record->ts_reg_info.timestamp_kernel_addr =
|
||||
(u64 *) ts_buff->user_buff_address + data->ts_offset;
|
||||
req_offset_record->ts_reg_info.interrupt = data->interrupt;
|
||||
set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
|
||||
data->target_value);
|
||||
|
||||
*pend = req_offset_record;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct wait_interrupt_data *data,
|
||||
u32 *status, u64 *timestamp)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
struct hl_cb *cq_cb;
|
||||
unsigned long timeout;
|
||||
long completion_rc;
|
||||
unsigned long flags;
|
||||
int rc = 0;
|
||||
|
||||
timeout = hl_usecs64_to_jiffies(timeout_us);
|
||||
|
||||
hl_ctx_get(ctx);
|
||||
|
||||
cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
|
||||
if (!cq_cb) {
|
||||
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
|
||||
if (!data->cq_cb) {
|
||||
rc = -EINVAL;
|
||||
goto put_ctx;
|
||||
}
|
||||
|
||||
/* Validate the cq offset */
|
||||
if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
|
||||
((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
|
||||
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
|
||||
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
if (register_ts_record) {
|
||||
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
|
||||
interrupt->interrupt_id, ts_offset, cq_counters_offset);
|
||||
buf = hl_mmap_mem_buf_get(mmg, ts_handle);
|
||||
if (!buf) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
|
||||
data->interrupt->interrupt_id, data->ts_handle,
|
||||
data->ts_offset, data->cq_offset);
|
||||
|
||||
/* get ts buffer record */
|
||||
rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
|
||||
cq_counters_offset, target_value,
|
||||
&interrupt->wait_list_lock, &pend);
|
||||
if (rc)
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
rc = -ENOMEM;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
|
||||
pend->cq_target_value = target_value;
|
||||
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
|
||||
if (!data->buf) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
|
||||
|
||||
/* get ts buffer record */
|
||||
rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
|
||||
if (rc) {
|
||||
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
|
||||
goto put_ts_buff;
|
||||
}
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
* before we add the timestamp node to the ts list.
|
||||
*/
|
||||
if (*pend->cq_kernel_addr >= target_value) {
|
||||
if (register_ts_record)
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
if (*pend->cq_kernel_addr >= data->target_value) {
|
||||
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
|
||||
|
||||
dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
|
||||
pend, data->ts_offset, data->interrupt->interrupt_id);
|
||||
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
|
||||
|
||||
goto put_ts_buff;
|
||||
}
|
||||
|
||||
list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
|
||||
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
|
||||
|
||||
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
|
||||
put_ts_buff:
|
||||
hl_mmap_mem_buf_put(data->buf);
|
||||
put_cq_cb:
|
||||
hl_cb_put(data->cq_cb);
|
||||
put_ctx:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct wait_interrupt_data *data,
|
||||
u32 *status, u64 *timestamp)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
unsigned long timeout, flags;
|
||||
long completion_rc;
|
||||
int rc = 0;
|
||||
|
||||
timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
|
||||
|
||||
hl_ctx_get(ctx);
|
||||
|
||||
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
|
||||
if (!data->cq_cb) {
|
||||
rc = -EINVAL;
|
||||
goto put_ctx;
|
||||
}
|
||||
|
||||
/* Validate the cq offset */
|
||||
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
|
||||
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
rc = -ENOMEM;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
|
||||
pend->cq_target_value = data->target_value;
|
||||
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
|
||||
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we add the wait node to the wait list.
|
||||
*/
|
||||
if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
|
||||
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
|
||||
|
||||
if (*pend->cq_kernel_addr >= data->target_value)
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
else
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
|
||||
if (register_ts_record) {
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
}
|
||||
} else if (!timeout_us) {
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
}
|
||||
@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
* Note that we cannot have sorted list by target value,
|
||||
* in order to shorten the list pass loop, since
|
||||
* same list could have nodes for different cq counter handle.
|
||||
* Note:
|
||||
* Mark ts buff offset as in use here in the spinlock protection area
|
||||
* to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
|
||||
* before adding the node to the list. this scenario might happen when
|
||||
* multiple threads are racing on same offset and one thread could
|
||||
* set the ts buff in ts_buff_get_kernel_ts_record then the other thread
|
||||
* takes over and get to ts_buff_get_kernel_ts_record and then we will try
|
||||
* to re-use the same ts buff offset, and will try to delete a non existing
|
||||
* node from the list.
|
||||
*/
|
||||
if (register_ts_record)
|
||||
pend->ts_reg_info.in_use = 1;
|
||||
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
|
||||
if (register_ts_record) {
|
||||
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
goto ts_registration_exit;
|
||||
}
|
||||
list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
|
||||
|
||||
/* Wait for interrupt handler to signal completion */
|
||||
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
|
||||
timeout);
|
||||
if (completion_rc > 0) {
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
if (pend->fence.error == -EIO) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
|
||||
pend->fence.error);
|
||||
rc = -EIO;
|
||||
*status = HL_WAIT_CS_STATUS_ABORTED;
|
||||
} else {
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
}
|
||||
} else {
|
||||
if (completion_rc == -ERESTARTSYS) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"user process got signal while waiting for interrupt ID %d\n",
|
||||
interrupt->interrupt_id);
|
||||
data->interrupt->interrupt_id);
|
||||
rc = -EINTR;
|
||||
*status = HL_WAIT_CS_STATUS_ABORTED;
|
||||
} else {
|
||||
if (pend->fence.error == -EIO) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
|
||||
pend->fence.error);
|
||||
rc = -EIO;
|
||||
*status = HL_WAIT_CS_STATUS_ABORTED;
|
||||
} else {
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload wasn't submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload was not submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
* for ts record, the node will be deleted in the irq handler after
|
||||
* we reach the target value.
|
||||
*/
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
list_del(&pend->wait_list_node);
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
|
||||
list_del(&pend->list_node);
|
||||
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
|
||||
|
||||
set_timestamp:
|
||||
*timestamp = ktime_to_ns(pend->fence.timestamp);
|
||||
kfree(pend);
|
||||
hl_cb_put(cq_cb);
|
||||
ts_registration_exit:
|
||||
hl_cb_put(data->cq_cb);
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
|
||||
put_ts_buff:
|
||||
hl_mmap_mem_buf_put(buf);
|
||||
put_cq_cb:
|
||||
hl_cb_put(cq_cb);
|
||||
hl_cb_put(data->cq_cb);
|
||||
put_ctx:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
|
||||
u64 *timestamp)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
unsigned long timeout;
|
||||
unsigned long timeout, flags;
|
||||
u64 completion_value;
|
||||
long completion_rc;
|
||||
int rc = 0;
|
||||
@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
|
||||
/* Add pending user interrupt to relevant list for the interrupt
|
||||
* handler to monitor
|
||||
*/
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_add_tail(&pend->list_node, &interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
@ -3507,14 +3579,14 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
|
||||
* If comparison fails, keep waiting until timeout expires
|
||||
*/
|
||||
if (completion_rc > 0) {
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
/* reinit_completion must be called before we check for user
|
||||
* completion value, otherwise, if interrupt is received after
|
||||
* the comparison and before the next wait_for_completion,
|
||||
* we will reach timeout and fail
|
||||
*/
|
||||
reinit_completion(&pend->fence.completion);
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
|
||||
dev_err(hdev->dev, "Failed to copy completion value from user\n");
|
||||
@ -3551,9 +3623,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
|
||||
}
|
||||
|
||||
remove_pending_user_interrupt:
|
||||
spin_lock(&interrupt->wait_list_lock);
|
||||
list_del(&pend->wait_list_node);
|
||||
spin_unlock(&interrupt->wait_list_lock);
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_del(&pend->list_node);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
*timestamp = ktime_to_ns(pend->fence.timestamp);
|
||||
|
||||
@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
|
||||
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
|
||||
args->in.cq_counters_offset,
|
||||
args->in.target, interrupt,
|
||||
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
|
||||
args->in.timestamp_handle, args->in.timestamp_offset,
|
||||
&status, ×tamp);
|
||||
else
|
||||
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
|
||||
struct wait_interrupt_data wait_intr_data = {0};
|
||||
|
||||
wait_intr_data.interrupt = interrupt;
|
||||
wait_intr_data.mmg = &hpriv->mem_mgr;
|
||||
wait_intr_data.cq_handle = args->in.cq_counters_handle;
|
||||
wait_intr_data.cq_offset = args->in.cq_counters_offset;
|
||||
wait_intr_data.ts_handle = args->in.timestamp_handle;
|
||||
wait_intr_data.ts_offset = args->in.timestamp_offset;
|
||||
wait_intr_data.target_value = args->in.target;
|
||||
wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
|
||||
|
||||
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
|
||||
/*
|
||||
* Allow only one registration at a time. this is needed in order to prevent
|
||||
* issues while handling the flow of re-use of the same offset.
|
||||
* Since the registration flow is protected only by the interrupt lock,
|
||||
* re-use flow might request to move ts node to another interrupt list,
|
||||
* and in such case we're not protected.
|
||||
*/
|
||||
mutex_lock(&hpriv->ctx->ts_reg_lock);
|
||||
|
||||
rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
|
||||
&status, ×tamp);
|
||||
|
||||
mutex_unlock(&hpriv->ctx->ts_reg_lock);
|
||||
} else
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
|
||||
&status, ×tamp);
|
||||
} else {
|
||||
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
|
||||
args->in.interrupt_timeout_us, args->in.addr,
|
||||
args->in.target, interrupt, &status,
|
||||
×tamp);
|
||||
}
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
union hl_wait_cs_args *args = data;
|
||||
u32 flags = args->in.flags;
|
||||
|
@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
kfree(ctx->cs_pending);
|
||||
|
||||
if (ctx->asid != HL_KERNEL_ASID_ID) {
|
||||
dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
|
||||
dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
|
||||
|
||||
/* The engines are stopped as there is no executing CS, but the
|
||||
* Coresight might be still working by accessing addresses
|
||||
@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
|
||||
mutex_destroy(&ctx->ts_reg_lock);
|
||||
} else {
|
||||
dev_dbg(hdev->dev, "closing kernel context\n");
|
||||
hdev->asic_funcs->ctx_fini(ctx);
|
||||
@ -198,6 +199,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
|
||||
|
||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
{
|
||||
char task_comm[TASK_COMM_LEN];
|
||||
int rc = 0, i;
|
||||
|
||||
ctx->hdev = hdev;
|
||||
@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
|
||||
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
|
||||
|
||||
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
|
||||
mutex_init(&ctx->ts_reg_lock);
|
||||
|
||||
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
|
||||
get_task_comm(task_comm, current), ctx->asid);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -18,8 +18,6 @@
|
||||
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
|
||||
#define I2C_MAX_TRANSACTION_LEN 8
|
||||
|
||||
static struct dentry *hl_debug_root;
|
||||
|
||||
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
|
||||
u8 i2c_reg, u8 i2c_len, u64 *val)
|
||||
{
|
||||
@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
|
||||
|
||||
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
|
||||
dev_entry->root = hdev->drm.accel->debugfs_root;
|
||||
|
||||
add_files_to_device(hdev, dev_entry, dev_entry->root);
|
||||
|
||||
if (!hdev->asic_prop.fw_security_enabled)
|
||||
add_secured_nodes(dev_entry, dev_entry->root);
|
||||
}
|
||||
|
||||
void hl_debugfs_remove_device(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
|
||||
|
||||
debugfs_remove_recursive(entry->root);
|
||||
}
|
||||
|
||||
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
|
||||
{
|
||||
struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
|
||||
@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
|
||||
up_write(&dev_entry->state_dump_sem);
|
||||
}
|
||||
|
||||
void __init hl_debugfs_init(void)
|
||||
{
|
||||
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
|
||||
}
|
||||
|
||||
void hl_debugfs_fini(void)
|
||||
{
|
||||
debugfs_remove_recursive(hl_debug_root);
|
||||
}
|
||||
|
@ -14,11 +14,14 @@
|
||||
#include <linux/hwmon.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <drm/drm_accel.h>
|
||||
#include <drm/drm_drv.h>
|
||||
|
||||
#include <trace/events/habanalabs.h>
|
||||
|
||||
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
|
||||
|
||||
#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5
|
||||
#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30
|
||||
|
||||
enum dma_alloc_type {
|
||||
DMA_ALLOC_COHERENT,
|
||||
@ -185,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *
|
||||
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
|
||||
}
|
||||
|
||||
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
|
||||
int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir, const char *caller)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct scatterlist *sg;
|
||||
int rc, i;
|
||||
|
||||
rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (!trace_habanalabs_dma_map_page_enabled())
|
||||
return 0;
|
||||
|
||||
for_each_sgtable_dma_sg(sgt, sg, i)
|
||||
trace_habanalabs_dma_map_page(hdev->dev,
|
||||
page_to_phys(sg_page(sg)),
|
||||
sg->dma_address - prop->device_dma_offset_for_host_access,
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->dma_length,
|
||||
#else
|
||||
sg->length,
|
||||
#endif
|
||||
dir, caller);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct scatterlist *sg;
|
||||
@ -203,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
|
||||
void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir, const char *caller)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
|
||||
|
||||
if (trace_habanalabs_dma_unmap_page_enabled()) {
|
||||
for_each_sgtable_dma_sg(sgt, sg, i)
|
||||
trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
|
||||
sg->dma_address - prop->device_dma_offset_for_host_access,
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->dma_length,
|
||||
#else
|
||||
sg->length,
|
||||
#endif
|
||||
dir, caller);
|
||||
}
|
||||
}
|
||||
|
||||
void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct scatterlist *sg;
|
||||
@ -315,7 +370,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||
{
|
||||
enum hl_device_status status;
|
||||
|
||||
if (hdev->reset_info.in_reset) {
|
||||
if (hdev->device_fini_pending) {
|
||||
status = HL_DEVICE_STATUS_MALFUNCTION;
|
||||
} else if (hdev->reset_info.in_reset) {
|
||||
if (hdev->reset_info.in_compute_reset)
|
||||
status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
|
||||
else
|
||||
@ -343,9 +400,9 @@ bool hl_device_operational(struct hl_device *hdev,
|
||||
*status = current_status;
|
||||
|
||||
switch (current_status) {
|
||||
case HL_DEVICE_STATUS_MALFUNCTION:
|
||||
case HL_DEVICE_STATUS_IN_RESET:
|
||||
case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
|
||||
case HL_DEVICE_STATUS_MALFUNCTION:
|
||||
case HL_DEVICE_STATUS_NEEDS_RESET:
|
||||
return false;
|
||||
case HL_DEVICE_STATUS_OPERATIONAL:
|
||||
@ -406,8 +463,6 @@ static void hpriv_release(struct kref *ref)
|
||||
|
||||
hdev->asic_funcs->send_device_activity(hdev, false);
|
||||
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
hl_debugfs_remove_file(hpriv);
|
||||
|
||||
mutex_destroy(&hpriv->ctx_lock);
|
||||
@ -424,7 +479,7 @@ static void hpriv_release(struct kref *ref)
|
||||
/* Check the device idle status and reset if not idle.
|
||||
* Skip it if already in reset, or if device is going to be reset in any case.
|
||||
*/
|
||||
if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm)
|
||||
if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
|
||||
device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
|
||||
HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
|
||||
if (!device_is_idle) {
|
||||
@ -446,14 +501,18 @@ static void hpriv_release(struct kref *ref)
|
||||
list_del(&hpriv->dev_node);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
if (reset_device) {
|
||||
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
|
||||
} else {
|
||||
/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
|
||||
int rc = hdev->asic_funcs->scrub_device_mem(hdev);
|
||||
|
||||
if (rc)
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
|
||||
@ -516,24 +575,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_release - release function for habanalabs device
|
||||
*
|
||||
* @inode: pointer to inode structure
|
||||
* @filp: pointer to file structure
|
||||
* hl_device_release() - release function for habanalabs device.
|
||||
* @ddev: pointer to DRM device structure.
|
||||
* @file: pointer to DRM file private data structure.
|
||||
*
|
||||
* Called when process closes an habanalabs device
|
||||
*/
|
||||
static int hl_device_release(struct inode *inode, struct file *filp)
|
||||
void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filp->private_data;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
|
||||
filp->private_data = NULL;
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
struct hl_device *hdev = to_hl_device(ddev);
|
||||
|
||||
if (!hdev) {
|
||||
pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
|
||||
put_pid(hpriv->taskpid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
||||
@ -551,8 +606,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
||||
}
|
||||
|
||||
hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
|
||||
@ -571,11 +624,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
|
||||
list_del(&hpriv->dev_node);
|
||||
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
|
||||
out:
|
||||
/* release the eventfd */
|
||||
if (hpriv->notifier_event.eventfd)
|
||||
eventfd_ctx_put(hpriv->notifier_event.eventfd);
|
||||
|
||||
mutex_destroy(&hpriv->notifier_event.lock);
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
kfree(hpriv);
|
||||
@ -583,18 +631,8 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmap - mmap function for habanalabs device
|
||||
*
|
||||
* @*filp: pointer to file structure
|
||||
* @*vma: pointer to vm_area_struct of the process
|
||||
*
|
||||
* Called when process does an mmap on habanalabs device. Call the relevant mmap
|
||||
* function at the end of the common code.
|
||||
*/
|
||||
static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filp->private_data;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
unsigned long vm_pgoff;
|
||||
|
||||
@ -617,14 +655,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static const struct file_operations hl_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = hl_device_open,
|
||||
.release = hl_device_release,
|
||||
.mmap = hl_mmap,
|
||||
.unlocked_ioctl = hl_ioctl,
|
||||
.compat_ioctl = hl_ioctl
|
||||
};
|
||||
/*
|
||||
* hl_mmap - mmap function for habanalabs device
|
||||
*
|
||||
* @*filp: pointer to file structure
|
||||
* @*vma: pointer to vm_area_struct of the process
|
||||
*
|
||||
* Called when process does an mmap on habanalabs device. Call the relevant mmap
|
||||
* function at the end of the common code.
|
||||
*/
|
||||
int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
|
||||
return __hl_mmap(hpriv, vma);
|
||||
}
|
||||
|
||||
static const struct file_operations hl_ctrl_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
@ -645,14 +691,14 @@ static void device_release_func(struct device *dev)
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @class: pointer to the class object of the device
|
||||
* @minor: minor number of the specific device
|
||||
* @fpos: file operations to install for this device
|
||||
* @fops: file operations to install for this device
|
||||
* @name: name of the device as it will appear in the filesystem
|
||||
* @cdev: pointer to the char device object that will be initialized
|
||||
* @dev: pointer to the device object that will be initialized
|
||||
*
|
||||
* Initialize a cdev and a Linux device for habanalabs's device.
|
||||
*/
|
||||
static int device_init_cdev(struct hl_device *hdev, struct class *class,
|
||||
static int device_init_cdev(struct hl_device *hdev, const struct class *class,
|
||||
int minor, const struct file_operations *fops,
|
||||
char *name, struct cdev *cdev,
|
||||
struct device **dev)
|
||||
@ -676,23 +722,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,
|
||||
|
||||
static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
|
||||
{
|
||||
const struct class *accel_class = hdev->drm.accel->kdev->class;
|
||||
char name[32];
|
||||
int rc;
|
||||
|
||||
rc = cdev_device_add(&hdev->cdev, hdev->dev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to add a char device to the system\n");
|
||||
hdev->cdev_idx = hdev->drm.accel->index;
|
||||
|
||||
/* Initialize cdev and device structures for the control device */
|
||||
snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx);
|
||||
rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name,
|
||||
&hdev->cdev_ctrl, &hdev->dev_ctrl);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to add a control char device to the system\n");
|
||||
goto delete_cdev_device;
|
||||
dev_err(hdev->dev_ctrl,
|
||||
"failed to add an accel control char device to the system\n");
|
||||
goto free_ctrl_device;
|
||||
}
|
||||
|
||||
/* hl_sysfs_init() must be done after adding the device to the system */
|
||||
rc = hl_sysfs_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize sysfs\n");
|
||||
@ -707,23 +756,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
|
||||
|
||||
delete_ctrl_cdev_device:
|
||||
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
|
||||
delete_cdev_device:
|
||||
cdev_device_del(&hdev->cdev, hdev->dev);
|
||||
free_ctrl_device:
|
||||
put_device(hdev->dev_ctrl);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
|
||||
{
|
||||
if (!hdev->cdev_sysfs_debugfs_created)
|
||||
goto put_devices;
|
||||
return;
|
||||
|
||||
hl_debugfs_remove_device(hdev);
|
||||
hl_sysfs_fini(hdev);
|
||||
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
|
||||
cdev_device_del(&hdev->cdev, hdev->dev);
|
||||
|
||||
put_devices:
|
||||
put_device(hdev->dev);
|
||||
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
|
||||
put_device(hdev->dev_ctrl);
|
||||
}
|
||||
|
||||
@ -996,6 +1041,20 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
|
||||
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
|
||||
}
|
||||
|
||||
static void hl_device_eq_heartbeat(struct hl_device *hdev)
|
||||
{
|
||||
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
||||
if (!prop->cpucp_info.eq_health_check_supported)
|
||||
return;
|
||||
|
||||
if (hdev->eq_heartbeat_received)
|
||||
hdev->eq_heartbeat_received = false;
|
||||
else
|
||||
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
|
||||
}
|
||||
|
||||
static void hl_device_heartbeat(struct work_struct *work)
|
||||
{
|
||||
struct hl_device *hdev = container_of(work, struct hl_device,
|
||||
@ -1003,9 +1062,16 @@ static void hl_device_heartbeat(struct work_struct *work)
|
||||
struct hl_info_fw_err_info info = {0};
|
||||
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
/* Start heartbeat checks only after driver has enabled events from FW */
|
||||
if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
|
||||
goto reschedule;
|
||||
|
||||
/*
|
||||
* For EQ health check need to check if driver received the heartbeat eq event
|
||||
* in order to validate the eq is working.
|
||||
*/
|
||||
hl_device_eq_heartbeat(hdev);
|
||||
|
||||
if (!hdev->asic_funcs->send_heartbeat(hdev))
|
||||
goto reschedule;
|
||||
|
||||
@ -1062,7 +1128,15 @@ static int device_late_init(struct hl_device *hdev)
|
||||
hdev->high_pll = hdev->asic_prop.high_pll;
|
||||
|
||||
if (hdev->heartbeat) {
|
||||
/*
|
||||
* Before scheduling the heartbeat driver will check if eq event has received.
|
||||
* for the first schedule we need to set the indication as true then for the next
|
||||
* one this indication will be true only if eq event was sent by FW.
|
||||
*/
|
||||
hdev->eq_heartbeat_received = true;
|
||||
|
||||
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
|
||||
|
||||
schedule_delayed_work(&hdev->work_heartbeat,
|
||||
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
|
||||
}
|
||||
@ -1302,18 +1376,18 @@ int hl_device_resume(struct hl_device *hdev)
|
||||
static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
struct list_head *fd_list;
|
||||
struct hl_fpriv *hpriv;
|
||||
struct mutex *fd_lock;
|
||||
struct list_head *hpriv_list;
|
||||
struct hl_fpriv *hpriv;
|
||||
struct mutex *hpriv_lock;
|
||||
u32 pending_cnt;
|
||||
|
||||
fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
|
||||
fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
|
||||
hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
|
||||
hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
|
||||
|
||||
/* Giving time for user to close FD, and for processes that are inside
|
||||
* hl_device_open to finish
|
||||
*/
|
||||
if (!list_empty(fd_list))
|
||||
if (!list_empty(hpriv_list))
|
||||
ssleep(1);
|
||||
|
||||
if (timeout) {
|
||||
@ -1329,12 +1403,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(fd_lock);
|
||||
mutex_lock(hpriv_lock);
|
||||
|
||||
/* This section must be protected because we are dereferencing
|
||||
* pointers that are freed if the process exits
|
||||
*/
|
||||
list_for_each_entry(hpriv, fd_list, dev_node) {
|
||||
list_for_each_entry(hpriv, hpriv_list, dev_node) {
|
||||
task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
|
||||
if (task) {
|
||||
dev_info(hdev->dev, "Killing user process pid=%d\n",
|
||||
@ -1344,17 +1418,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
|
||||
|
||||
put_task_struct(task);
|
||||
} else {
|
||||
/*
|
||||
* If we got here, it means that process was killed from outside the driver
|
||||
* right after it started looping on fd_list and before get_pid_task, thus
|
||||
* we don't need to kill it.
|
||||
*/
|
||||
dev_dbg(hdev->dev,
|
||||
"Can't get task struct for user process, assuming process was killed from outside the driver\n");
|
||||
"Can't get task struct for user process %d, process was killed from outside the driver\n",
|
||||
pid_nr(hpriv->taskpid));
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(fd_lock);
|
||||
mutex_unlock(hpriv_lock);
|
||||
|
||||
/*
|
||||
* We killed the open users, but that doesn't mean they are closed.
|
||||
@ -1366,7 +1436,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
|
||||
*/
|
||||
|
||||
wait_for_processes:
|
||||
while ((!list_empty(fd_list)) && (pending_cnt)) {
|
||||
while ((!list_empty(hpriv_list)) && (pending_cnt)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Waiting for all unmap operations to finish before hard reset\n");
|
||||
|
||||
@ -1376,7 +1446,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
|
||||
}
|
||||
|
||||
/* All processes exited successfully */
|
||||
if (list_empty(fd_list))
|
||||
if (list_empty(hpriv_list))
|
||||
return 0;
|
||||
|
||||
/* Give up waiting for processes to exit */
|
||||
@ -1390,17 +1460,17 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
|
||||
|
||||
static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
|
||||
{
|
||||
struct list_head *fd_list;
|
||||
struct list_head *hpriv_list;
|
||||
struct hl_fpriv *hpriv;
|
||||
struct mutex *fd_lock;
|
||||
struct mutex *hpriv_lock;
|
||||
|
||||
fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
|
||||
fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
|
||||
hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
|
||||
hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
|
||||
|
||||
mutex_lock(fd_lock);
|
||||
list_for_each_entry(hpriv, fd_list, dev_node)
|
||||
mutex_lock(hpriv_lock);
|
||||
list_for_each_entry(hpriv, hpriv_list, dev_node)
|
||||
hpriv->hdev = NULL;
|
||||
mutex_unlock(fd_lock);
|
||||
mutex_unlock(hpriv_lock);
|
||||
}
|
||||
|
||||
static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
|
||||
@ -1916,7 +1986,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
|
||||
}
|
||||
|
||||
ctx = hl_get_compute_ctx(hdev);
|
||||
if (!ctx || !ctx->hpriv->notifier_event.eventfd)
|
||||
if (!ctx)
|
||||
goto device_reset;
|
||||
|
||||
/*
|
||||
* There is no point in postponing the reset if user is not registered for events.
|
||||
* However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
|
||||
* just implies that user has unregistered as part of handling a previous event. In this
|
||||
* case an immediate reset is not required.
|
||||
*/
|
||||
if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
|
||||
goto device_reset;
|
||||
|
||||
/* Schedule the device release watchdog work unless reset is already in progress or if the
|
||||
@ -1928,8 +2007,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
|
||||
goto device_reset;
|
||||
}
|
||||
|
||||
if (hdev->reset_info.watchdog_active)
|
||||
if (hdev->reset_info.watchdog_active) {
|
||||
hdev->device_release_watchdog_work.flags |= flags;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdev->device_release_watchdog_work.flags = flags;
|
||||
dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
|
||||
@ -1990,59 +2071,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
|
||||
hl_notifier_event_send(&hpriv->notifier_event, event_mask);
|
||||
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
/* control device */
|
||||
mutex_lock(&hdev->fpriv_ctrl_list_lock);
|
||||
|
||||
list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
|
||||
hl_notifier_event_send(&hpriv->notifier_event, event_mask);
|
||||
|
||||
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
|
||||
}
|
||||
|
||||
static int create_cdev(struct hl_device *hdev)
|
||||
{
|
||||
char *name;
|
||||
int rc;
|
||||
|
||||
hdev->cdev_idx = hdev->id / 2;
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
|
||||
if (!name) {
|
||||
rc = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Initialize cdev and device structures */
|
||||
rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
|
||||
&hdev->cdev, &hdev->dev);
|
||||
|
||||
kfree(name);
|
||||
|
||||
if (rc)
|
||||
goto out_err;
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
|
||||
if (!name) {
|
||||
rc = -ENOMEM;
|
||||
goto free_dev;
|
||||
}
|
||||
|
||||
/* Initialize cdev and device structures for control device */
|
||||
rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
|
||||
name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
|
||||
|
||||
kfree(name);
|
||||
|
||||
if (rc)
|
||||
goto free_dev;
|
||||
|
||||
return 0;
|
||||
|
||||
free_dev:
|
||||
put_device(hdev->dev);
|
||||
out_err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2057,16 +2085,14 @@ static int create_cdev(struct hl_device *hdev)
|
||||
int hl_device_init(struct hl_device *hdev)
|
||||
{
|
||||
int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
|
||||
struct hl_ts_free_jobs *free_jobs_data;
|
||||
bool expose_interfaces_on_err = false;
|
||||
|
||||
rc = create_cdev(hdev);
|
||||
if (rc)
|
||||
goto out_disabled;
|
||||
void *p;
|
||||
|
||||
/* Initialize ASIC function pointers and perform early init */
|
||||
rc = device_early_init(hdev);
|
||||
if (rc)
|
||||
goto free_dev;
|
||||
goto out_disabled;
|
||||
|
||||
user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
|
||||
hdev->asic_prop.user_interrupt_count;
|
||||
@ -2078,15 +2104,43 @@ int hl_device_init(struct hl_device *hdev)
|
||||
rc = -ENOMEM;
|
||||
goto early_fini;
|
||||
}
|
||||
|
||||
/* Timestamp records supported only if CQ supported in device */
|
||||
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
|
||||
for (i = 0 ; i < user_interrupt_cnt ; i++) {
|
||||
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
|
||||
sizeof(struct timestamp_reg_free_node));
|
||||
if (!p) {
|
||||
rc = -ENOMEM;
|
||||
goto free_usr_intr_mem;
|
||||
}
|
||||
free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
|
||||
free_jobs_data->free_nodes_pool = p;
|
||||
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
|
||||
free_jobs_data->next_avail_free_node_idx = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
|
||||
p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
|
||||
sizeof(struct timestamp_reg_free_node));
|
||||
if (!p) {
|
||||
rc = -ENOMEM;
|
||||
goto free_usr_intr_mem;
|
||||
}
|
||||
|
||||
free_jobs_data->free_nodes_pool = p;
|
||||
free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
|
||||
free_jobs_data->next_avail_free_node_idx = 0;
|
||||
|
||||
/*
|
||||
* Start calling ASIC initialization. First S/W then H/W and finally
|
||||
* late init
|
||||
*/
|
||||
rc = hdev->asic_funcs->sw_init(hdev);
|
||||
if (rc)
|
||||
goto free_usr_intr_mem;
|
||||
goto free_common_usr_intr_mem;
|
||||
|
||||
|
||||
/* initialize completion structure for multi CS wait */
|
||||
@ -2253,6 +2307,14 @@ int hl_device_init(struct hl_device *hdev)
|
||||
* From here there is no need to expose them in case of an error.
|
||||
*/
|
||||
expose_interfaces_on_err = false;
|
||||
|
||||
rc = drm_dev_register(&hdev->drm, 0);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
|
||||
rc = 0;
|
||||
goto out_disabled;
|
||||
}
|
||||
|
||||
rc = cdev_sysfs_debugfs_add(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
|
||||
@ -2284,8 +2346,6 @@ int hl_device_init(struct hl_device *hdev)
|
||||
"Successfully added device %s to habanalabs driver\n",
|
||||
dev_name(&(hdev)->pdev->dev));
|
||||
|
||||
hdev->init_done = true;
|
||||
|
||||
/* After initialization is done, we are ready to receive events from
|
||||
* the F/W. We can't do it before because we will ignore events and if
|
||||
* those events are fatal, we won't know about it and the device will
|
||||
@ -2293,6 +2353,8 @@ int hl_device_init(struct hl_device *hdev)
|
||||
*/
|
||||
hdev->asic_funcs->enable_events_from_fw(hdev);
|
||||
|
||||
hdev->init_done = true;
|
||||
|
||||
return 0;
|
||||
|
||||
cb_pool_fini:
|
||||
@ -2317,19 +2379,27 @@ int hl_device_init(struct hl_device *hdev)
|
||||
hl_hw_queues_destroy(hdev);
|
||||
sw_fini:
|
||||
hdev->asic_funcs->sw_fini(hdev);
|
||||
free_common_usr_intr_mem:
|
||||
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
|
||||
free_usr_intr_mem:
|
||||
kfree(hdev->user_interrupt);
|
||||
if (user_interrupt_cnt) {
|
||||
for (i = 0 ; i < user_interrupt_cnt ; i++) {
|
||||
if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
|
||||
break;
|
||||
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
|
||||
}
|
||||
kfree(hdev->user_interrupt);
|
||||
}
|
||||
early_fini:
|
||||
device_early_fini(hdev);
|
||||
free_dev:
|
||||
put_device(hdev->dev_ctrl);
|
||||
put_device(hdev->dev);
|
||||
out_disabled:
|
||||
hdev->disabled = true;
|
||||
if (expose_interfaces_on_err)
|
||||
if (expose_interfaces_on_err) {
|
||||
drm_dev_register(&hdev->drm, 0);
|
||||
cdev_sysfs_debugfs_add(hdev);
|
||||
dev_err(&hdev->pdev->dev,
|
||||
"Failed to initialize hl%d. Device %s is NOT usable !\n",
|
||||
}
|
||||
|
||||
pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
|
||||
hdev->cdev_idx, dev_name(&hdev->pdev->dev));
|
||||
|
||||
return rc;
|
||||
@ -2344,12 +2414,13 @@ int hl_device_init(struct hl_device *hdev)
|
||||
*/
|
||||
void hl_device_fini(struct hl_device *hdev)
|
||||
{
|
||||
u32 user_interrupt_cnt;
|
||||
bool device_in_reset;
|
||||
ktime_t timeout;
|
||||
u64 reset_sec;
|
||||
int i, rc;
|
||||
|
||||
dev_info(hdev->dev, "Removing device\n");
|
||||
dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));
|
||||
|
||||
hdev->device_fini_pending = 1;
|
||||
flush_delayed_work(&hdev->device_reset_work.reset_work);
|
||||
@ -2425,14 +2496,14 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
hdev->process_kill_trial_cnt = 0;
|
||||
rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
|
||||
if (rc) {
|
||||
dev_crit(hdev->dev, "Failed to kill all open processes\n");
|
||||
dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
|
||||
device_disable_open_processes(hdev, false);
|
||||
}
|
||||
|
||||
hdev->process_kill_trial_cnt = 0;
|
||||
rc = device_kill_open_processes(hdev, 0, true);
|
||||
if (rc) {
|
||||
dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
|
||||
dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
|
||||
device_disable_open_processes(hdev, true);
|
||||
}
|
||||
|
||||
@ -2464,7 +2535,20 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||
hl_cq_fini(hdev, &hdev->completion_queue[i]);
|
||||
kfree(hdev->completion_queue);
|
||||
kfree(hdev->user_interrupt);
|
||||
|
||||
user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
|
||||
hdev->asic_prop.user_interrupt_count;
|
||||
|
||||
if (user_interrupt_cnt) {
|
||||
if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
|
||||
for (i = 0 ; i < user_interrupt_cnt ; i++)
|
||||
vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
|
||||
}
|
||||
|
||||
kfree(hdev->user_interrupt);
|
||||
}
|
||||
|
||||
vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
|
||||
|
||||
hl_hw_queues_destroy(hdev);
|
||||
|
||||
@ -2475,6 +2559,7 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
|
||||
/* Hide devices and sysfs/debugfs files from user */
|
||||
cdev_sysfs_debugfs_remove(hdev);
|
||||
drm_dev_unregister(&hdev->drm);
|
||||
|
||||
hl_debugfs_device_fini(hdev);
|
||||
|
||||
@ -2690,6 +2775,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
|
||||
*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
|
||||
}
|
||||
|
||||
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
|
||||
{
|
||||
struct engine_err_info *info = &hdev->captured_err_info.engine_err;
|
||||
|
||||
/* Capture only the first engine error */
|
||||
if (atomic_cmpxchg(&info->event_detected, 0, 1))
|
||||
return;
|
||||
|
||||
info->event.timestamp = ktime_to_ns(ktime_get());
|
||||
info->event.engine_id = engine_id;
|
||||
info->event.error_count = error_count;
|
||||
info->event_info_available = true;
|
||||
}
|
||||
|
||||
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
|
||||
{
|
||||
vfree(captured_err_info->page_fault_info.user_mappings);
|
||||
|
@ -6,7 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
#include "../include/common/hl_boot_if.h"
|
||||
#include <linux/habanalabs/hl_boot_if.h>
|
||||
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/crc32.h>
|
||||
@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
|
||||
err_exists = true;
|
||||
}
|
||||
|
||||
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
|
||||
dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
|
||||
err_exists = true;
|
||||
}
|
||||
|
||||
if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
|
||||
/* Ignore this bit, don't prevent driver loading */
|
||||
dev_dbg(hdev->dev, "device unusable status is set\n");
|
||||
@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
dev_err(hdev->dev,
|
||||
"Device boot progress - Stuck in preboot after security initialization\n");
|
||||
break;
|
||||
case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
|
||||
dev_err(hdev->dev,
|
||||
"Device boot progress - Stuck in preparation for shutdown\n");
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev,
|
||||
"Device boot progress - Invalid or unexpected status code %d\n", status);
|
||||
@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
int hl_fw_wait_preboot_ready(struct hl_device *hdev)
|
||||
{
|
||||
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
u32 status;
|
||||
int rc;
|
||||
u32 status = 0, timeout;
|
||||
int rc, tries = 1;
|
||||
bool preboot_still_runs;
|
||||
|
||||
/* Need to check two possible scenarios:
|
||||
*
|
||||
@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
|
||||
* All other status values - for older firmwares where the uboot was
|
||||
* loaded from the FLASH
|
||||
*/
|
||||
timeout = pre_fw_load->wait_for_preboot_timeout;
|
||||
retry:
|
||||
rc = hl_poll_timeout(
|
||||
hdev,
|
||||
pre_fw_load->cpu_boot_status_reg,
|
||||
@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
|
||||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
|
||||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
|
||||
hdev->fw_poll_interval_usec,
|
||||
pre_fw_load->wait_for_preboot_timeout);
|
||||
timeout);
|
||||
/*
|
||||
* if F/W reports "security-ready" it means preboot might take longer.
|
||||
* If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
|
||||
* with that timeout
|
||||
*/
|
||||
preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
|
||||
status == CPU_BOOT_STATUS_IN_PREBOOT ||
|
||||
status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
|
||||
status == CPU_BOOT_STATUS_DRAM_RDY);
|
||||
|
||||
if (rc && tries && preboot_still_runs) {
|
||||
tries--;
|
||||
if (pre_fw_load->wait_for_preboot_extended_timeout) {
|
||||
timeout = pre_fw_load->wait_for_preboot_extended_timeout;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
detect_cpu_boot_status(hdev, status);
|
||||
@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
||||
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
|
||||
struct lkd_fw_binning_info *binning_info;
|
||||
|
||||
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
|
||||
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
|
||||
sizeof(struct lkd_msg_comms));
|
||||
if (rc)
|
||||
goto protocol_err;
|
||||
|
||||
@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
||||
hdev->decoder_binning, hdev->rotator_binning);
|
||||
}
|
||||
|
||||
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
|
||||
hdev->asic_prop.reserved_fw_mem_size =
|
||||
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2022 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2023 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@ -8,7 +8,7 @@
|
||||
#ifndef HABANALABSP_H_
|
||||
#define HABANALABSP_H_
|
||||
|
||||
#include "../include/common/cpucp_if.h"
|
||||
#include <linux/habanalabs/cpucp_if.h>
|
||||
#include "../include/common/qman_if.h"
|
||||
#include "../include/hw_ip/mmu/mmu_general.h"
|
||||
#include <uapi/drm/habanalabs_accel.h>
|
||||
@ -29,6 +29,9 @@
|
||||
#include <linux/coresight.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_file.h>
|
||||
|
||||
#include "security.h"
|
||||
|
||||
#define HL_NAME "habanalabs"
|
||||
@ -82,8 +85,6 @@ struct hl_fpriv;
|
||||
|
||||
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
|
||||
|
||||
#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */
|
||||
|
||||
#define HL_INVALID_QUEUE UINT_MAX
|
||||
|
||||
#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF
|
||||
@ -103,6 +104,8 @@ struct hl_fpriv;
|
||||
/* MMU */
|
||||
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
|
||||
#define TIMESTAMP_FREE_NODES_NUM 512
|
||||
|
||||
/**
|
||||
* enum hl_mmu_page_table_location - mmu page table location
|
||||
* @MMU_DR_PGT: page-table is located on device DRAM.
|
||||
@ -154,6 +157,11 @@ enum hl_mmu_page_table_location {
|
||||
#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
|
||||
hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
|
||||
|
||||
#define hl_dma_map_sgtable(hdev, sgt, dir) \
|
||||
hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__)
|
||||
#define hl_dma_unmap_sgtable(hdev, sgt, dir) \
|
||||
hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__)
|
||||
|
||||
/*
|
||||
* Reset Flags
|
||||
*
|
||||
@ -545,8 +553,7 @@ struct hl_hints_range {
|
||||
* allocated with huge pages.
|
||||
* @hints_dram_reserved_va_range: dram hint addresses reserved range.
|
||||
* @hints_host_reserved_va_range: host hint addresses reserved range.
|
||||
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
|
||||
* range.
|
||||
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range.
|
||||
* @sram_base_address: SRAM physical start address.
|
||||
* @sram_end_address: SRAM physical end address.
|
||||
* @sram_user_base_address - SRAM physical start address for user access.
|
||||
@ -585,7 +592,7 @@ struct hl_hints_range {
|
||||
* @mmu_pte_size: PTE size in MMU page tables.
|
||||
* @mmu_hop_table_size: MMU hop table size.
|
||||
* @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
|
||||
* @dram_page_size: page size for MMU DRAM allocation.
|
||||
* @dram_page_size: The DRAM physical page size.
|
||||
* @cfg_size: configuration space size on SRAM.
|
||||
* @sram_size: total size of SRAM.
|
||||
* @max_asid: maximum number of open contexts (ASIDs).
|
||||
@ -641,6 +648,7 @@ struct hl_hints_range {
|
||||
* @glbl_err_cause_num: global err cause number.
|
||||
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
|
||||
* not supported.
|
||||
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
|
||||
* @collective_first_sob: first sync object available for collective use
|
||||
* @collective_first_mon: first monitor available for collective use
|
||||
* @sync_stream_first_sob: first sync object available for sync stream use
|
||||
@ -686,9 +694,10 @@ struct hl_hints_range {
|
||||
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
|
||||
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
|
||||
* @supports_user_set_page_size: true if user can set the allocation page size.
|
||||
* @dma_mask: the dma mask to be set for this device
|
||||
* @dma_mask: the dma mask to be set for this device.
|
||||
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
|
||||
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
|
||||
* @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
@ -772,6 +781,7 @@ struct asic_fixed_properties {
|
||||
u32 num_of_special_blocks;
|
||||
u32 glbl_err_cause_num;
|
||||
u32 hbw_flush_reg;
|
||||
u32 reserved_fw_mem_size;
|
||||
u16 collective_first_sob;
|
||||
u16 collective_first_mon;
|
||||
u16 sync_stream_first_sob;
|
||||
@ -808,6 +818,7 @@ struct asic_fixed_properties {
|
||||
u8 dma_mask;
|
||||
u8 supports_advanced_cpucp_rc;
|
||||
u8 supports_engine_modes;
|
||||
u8 support_dynamic_resereved_fw_size;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1097,20 +1108,42 @@ enum hl_user_interrupt_type {
|
||||
HL_USR_INTERRUPT_UNEXPECTED
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
|
||||
* @free_nodes_pool: pool of nodes to be used for free timestamp jobs
|
||||
* @free_nodes_length: number of nodes in free_nodes_pool
|
||||
* @next_avail_free_node_idx: index of the next free node in the pool
|
||||
*
|
||||
* the free nodes pool must be protected by the user interrupt lock
|
||||
* to avoid race between different interrupts which are using the same
|
||||
* ts buffer with different offsets.
|
||||
*/
|
||||
struct hl_ts_free_jobs {
|
||||
struct timestamp_reg_free_node *free_nodes_pool;
|
||||
u32 free_nodes_length;
|
||||
u32 next_avail_free_node_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_user_interrupt - holds user interrupt information
|
||||
* @hdev: pointer to the device structure
|
||||
* @ts_free_jobs_data: timestamp free jobs related data
|
||||
* @type: user interrupt type
|
||||
* @wait_list_head: head to the list of user threads pending on this interrupt
|
||||
* @ts_list_head: head to the list of timestamp records
|
||||
* @wait_list_lock: protects wait_list_head
|
||||
* @ts_list_lock: protects ts_list_head
|
||||
* @timestamp: last timestamp taken upon interrupt
|
||||
* @interrupt_id: msix interrupt id
|
||||
*/
|
||||
struct hl_user_interrupt {
|
||||
struct hl_device *hdev;
|
||||
struct hl_ts_free_jobs ts_free_jobs_data;
|
||||
enum hl_user_interrupt_type type;
|
||||
struct list_head wait_list_head;
|
||||
struct list_head ts_list_head;
|
||||
spinlock_t wait_list_lock;
|
||||
spinlock_t ts_list_lock;
|
||||
ktime_t timestamp;
|
||||
u32 interrupt_id;
|
||||
};
|
||||
@ -1120,11 +1153,15 @@ struct hl_user_interrupt {
|
||||
* @free_objects_node: node in the list free_obj_jobs
|
||||
* @cq_cb: pointer to cq command buffer to be freed
|
||||
* @buf: pointer to timestamp buffer to be freed
|
||||
* @in_use: indicates whether the node still in use in workqueue thread.
|
||||
* @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
|
||||
*/
|
||||
struct timestamp_reg_free_node {
|
||||
struct list_head free_objects_node;
|
||||
struct hl_cb *cq_cb;
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
atomic_t in_use;
|
||||
u8 dynamic_alloc;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
|
||||
@ -1133,17 +1170,21 @@ struct timestamp_reg_free_node {
|
||||
* @free_obj: workqueue object to free timestamp registration node objects
|
||||
* @hdev: pointer to the device structure
|
||||
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
|
||||
* @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
|
||||
* interrupt handler.
|
||||
*/
|
||||
struct timestamp_reg_work_obj {
|
||||
struct work_struct free_obj;
|
||||
struct hl_device *hdev;
|
||||
struct list_head *free_obj_head;
|
||||
struct list_head *dynamic_alloc_free_obj_head;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_info - holds the timestamp registration related data.
|
||||
* @buf: pointer to the timestamp buffer which include both user/kernel buffers.
|
||||
* relevant only when doing timestamps records registration.
|
||||
* @cq_cb: pointer to CQ counter CB.
|
||||
* @interrupt: interrupt that the node hanged on it's wait list.
|
||||
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
|
||||
* relevant only when doing timestamps records
|
||||
* registration.
|
||||
@ -1153,17 +1194,18 @@ struct timestamp_reg_work_obj {
|
||||
* allocating records dynamically.
|
||||
*/
|
||||
struct timestamp_reg_info {
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
struct hl_cb *cq_cb;
|
||||
u64 *timestamp_kernel_addr;
|
||||
u8 in_use;
|
||||
struct hl_mmap_mem_buf *buf;
|
||||
struct hl_cb *cq_cb;
|
||||
struct hl_user_interrupt *interrupt;
|
||||
u64 *timestamp_kernel_addr;
|
||||
bool in_use;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_user_pending_interrupt - holds a context to a user thread
|
||||
* pending on an interrupt
|
||||
* @ts_reg_info: holds the timestamps registration nodes info
|
||||
* @wait_list_node: node in the list of user threads pending on an interrupt
|
||||
* @list_node: node in the list of user threads pending on an interrupt or timestamp
|
||||
* @fence: hl fence object for interrupt completion
|
||||
* @cq_target_value: CQ target value
|
||||
* @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
|
||||
@ -1171,7 +1213,7 @@ struct timestamp_reg_info {
|
||||
*/
|
||||
struct hl_user_pending_interrupt {
|
||||
struct timestamp_reg_info ts_reg_info;
|
||||
struct list_head wait_list_node;
|
||||
struct list_head list_node;
|
||||
struct hl_fence fence;
|
||||
u64 cq_target_value;
|
||||
u64 *cq_kernel_addr;
|
||||
@ -1370,6 +1412,8 @@ struct dynamic_fw_load_mgr {
|
||||
* @boot_err0_reg: boot_err0 register address
|
||||
* @boot_err1_reg: boot_err1 register address
|
||||
* @wait_for_preboot_timeout: timeout to poll for preboot ready
|
||||
* @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know
|
||||
* preboot needs longer time.
|
||||
*/
|
||||
struct pre_fw_load_props {
|
||||
u32 cpu_boot_status_reg;
|
||||
@ -1378,6 +1422,7 @@ struct pre_fw_load_props {
|
||||
u32 boot_err0_reg;
|
||||
u32 boot_err1_reg;
|
||||
u32 wait_for_preboot_timeout;
|
||||
u32 wait_for_preboot_extended_timeout;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1477,11 +1522,9 @@ struct engines_data {
|
||||
* @asic_dma_pool_free: free small DMA allocation from pool.
|
||||
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
|
||||
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
|
||||
* @asic_dma_unmap_single: unmap a single DMA buffer
|
||||
* @asic_dma_map_single: map a single buffer to a DMA
|
||||
* @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
|
||||
* @dma_unmap_sgtable: DMA unmap scatter-gather table.
|
||||
* @dma_map_sgtable: DMA map scatter-gather table.
|
||||
* @cs_parser: parse Command Submission.
|
||||
* @asic_dma_map_sgtable: DMA map scatter-gather table.
|
||||
* @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
|
||||
* @update_eq_ci: update event queue CI.
|
||||
* @context_switch: called upon ASID context switch.
|
||||
@ -1602,18 +1645,11 @@ struct hl_asic_funcs {
|
||||
size_t size, dma_addr_t *dma_handle);
|
||||
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
|
||||
size_t size, void *vaddr);
|
||||
void (*asic_dma_unmap_single)(struct hl_device *hdev,
|
||||
dma_addr_t dma_addr, int len,
|
||||
void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
|
||||
void *addr, int len,
|
||||
enum dma_data_direction dir);
|
||||
void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
|
||||
struct sg_table *sgt,
|
||||
int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
|
||||
int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
void (*add_end_of_cb_packets)(struct hl_device *hdev,
|
||||
void *kernel_address, u32 len,
|
||||
u32 original_len,
|
||||
@ -1771,16 +1807,19 @@ struct hl_cs_counters_atomic {
|
||||
* @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
|
||||
* where virtual memory is supported.
|
||||
* @memhash_hnode: pointer to the memhash node. this object holds the export count.
|
||||
* @device_address: physical address of the device's memory. Relevant only
|
||||
* if phys_pg_pack is NULL (dma-buf was exported from address).
|
||||
* The total size can be taken from the dmabuf object.
|
||||
* @offset: the offset into the buffer from which the memory is exported.
|
||||
* Relevant only if virtual memory is supported and phys_pg_pack is being used.
|
||||
* device_phys_addr: physical address of the device's memory. Relevant only
|
||||
* if phys_pg_pack is NULL (dma-buf was exported from address).
|
||||
* The total size can be taken from the dmabuf object.
|
||||
*/
|
||||
struct hl_dmabuf_priv {
|
||||
struct dma_buf *dmabuf;
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_vm_hash_node *memhash_hnode;
|
||||
uint64_t device_address;
|
||||
u64 offset;
|
||||
u64 device_phys_addr;
|
||||
};
|
||||
|
||||
#define HL_CS_OUTCOME_HISTORY_LEN 256
|
||||
@ -1835,6 +1874,7 @@ struct hl_cs_outcome_store {
|
||||
* @va_range: holds available virtual addresses for host and dram mappings.
|
||||
* @mem_hash_lock: protects the mem_hash.
|
||||
* @hw_block_list_lock: protects the HW block memory list.
|
||||
* @ts_reg_lock: timestamp registration ioctls lock.
|
||||
* @debugfs_list: node in debugfs list of contexts.
|
||||
* @hw_block_mem_list: list of HW block virtual mapped addresses.
|
||||
* @cs_counters: context command submission counters.
|
||||
@ -1871,6 +1911,7 @@ struct hl_ctx {
|
||||
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
|
||||
struct mutex mem_hash_lock;
|
||||
struct mutex hw_block_list_lock;
|
||||
struct mutex ts_reg_lock;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head hw_block_mem_list;
|
||||
struct hl_cs_counters_atomic cs_counters;
|
||||
@ -1917,17 +1958,17 @@ struct hl_ctx_mgr {
|
||||
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
|
||||
*/
|
||||
struct hl_userptr {
|
||||
enum vm_type vm_type; /* must be first */
|
||||
struct list_head job_node;
|
||||
struct page **pages;
|
||||
unsigned int npages;
|
||||
struct sg_table *sgt;
|
||||
enum dma_data_direction dir;
|
||||
struct list_head debugfs_list;
|
||||
pid_t pid;
|
||||
u64 addr;
|
||||
u64 size;
|
||||
u8 dma_mapped;
|
||||
enum vm_type vm_type; /* must be first */
|
||||
struct list_head job_node;
|
||||
struct page **pages;
|
||||
unsigned int npages;
|
||||
struct sg_table *sgt;
|
||||
enum dma_data_direction dir;
|
||||
struct list_head debugfs_list;
|
||||
pid_t pid;
|
||||
u64 addr;
|
||||
u64 size;
|
||||
u8 dma_mapped;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -2148,7 +2189,6 @@ struct hl_vm_hw_block_list_node {
|
||||
* @pages: the physical page array.
|
||||
* @npages: num physical pages in the pack.
|
||||
* @total_size: total size of all the pages in this list.
|
||||
* @exported_size: buffer exported size.
|
||||
* @node: used to attach to deletion list that is used when all the allocations are cleared
|
||||
* at the teardown of the context.
|
||||
* @mapping_cnt: number of shared mappings.
|
||||
@ -2165,7 +2205,6 @@ struct hl_vm_phys_pg_pack {
|
||||
u64 *pages;
|
||||
u64 npages;
|
||||
u64 total_size;
|
||||
u64 exported_size;
|
||||
struct list_head node;
|
||||
atomic_t mapping_cnt;
|
||||
u32 asid;
|
||||
@ -2250,7 +2289,7 @@ struct hl_notifier_event {
|
||||
/**
|
||||
* struct hl_fpriv - process information stored in FD private data.
|
||||
* @hdev: habanalabs device structure.
|
||||
* @filp: pointer to the given file structure.
|
||||
* @file_priv: pointer to the DRM file private data structure.
|
||||
* @taskpid: current process ID.
|
||||
* @ctx: current executing context. TODO: remove for multiple ctx per process
|
||||
* @ctx_mgr: context manager to handle multiple context for this FD.
|
||||
@ -2265,7 +2304,7 @@ struct hl_notifier_event {
|
||||
*/
|
||||
struct hl_fpriv {
|
||||
struct hl_device *hdev;
|
||||
struct file *filp;
|
||||
struct drm_file *file_priv;
|
||||
struct pid *taskpid;
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_ctx_mgr ctx_mgr;
|
||||
@ -2706,6 +2745,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||
usr_intr.type = intr_type; \
|
||||
INIT_LIST_HEAD(&usr_intr.wait_list_head); \
|
||||
spin_lock_init(&usr_intr.wait_list_lock); \
|
||||
INIT_LIST_HEAD(&usr_intr.ts_list_head); \
|
||||
spin_lock_init(&usr_intr.ts_list_lock); \
|
||||
})
|
||||
|
||||
struct hwmon_chip_info;
|
||||
@ -3054,6 +3095,20 @@ struct fw_err_info {
|
||||
bool event_info_available;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct engine_err_info - engine error information.
|
||||
* @event: holds information on the event.
|
||||
* @event_detected: if set as 1, then an engine event was discovered for the
|
||||
* first time after the driver has finished booting-up.
|
||||
* @event_info_available: indicates that an engine event info is now available.
|
||||
*/
|
||||
struct engine_err_info {
|
||||
struct hl_info_engine_err_event event;
|
||||
atomic_t event_detected;
|
||||
bool event_info_available;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_error_info - holds information collected during an error.
|
||||
* @cs_timeout: CS timeout error information.
|
||||
@ -3062,6 +3117,7 @@ struct fw_err_info {
|
||||
* @page_fault_info: page fault information.
|
||||
* @hw_err: (fatal) hardware error information.
|
||||
* @fw_err: firmware error information.
|
||||
* @engine_err: engine error information.
|
||||
*/
|
||||
struct hl_error_info {
|
||||
struct cs_timeout_info cs_timeout;
|
||||
@ -3070,6 +3126,7 @@ struct hl_error_info {
|
||||
struct page_fault_info page_fault_info;
|
||||
struct hw_err_info hw_err;
|
||||
struct fw_err_info fw_err;
|
||||
struct engine_err_info engine_err;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -3117,8 +3174,7 @@ struct hl_reset_info {
|
||||
* (required only for PCI address match mode)
|
||||
* @pcie_bar: array of available PCIe bars virtual addresses.
|
||||
* @rmmio: configuration area address on SRAM.
|
||||
* @hclass: pointer to the habanalabs class.
|
||||
* @cdev: related char device.
|
||||
* @drm: related DRM device.
|
||||
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
|
||||
* @dev: related kernel basic device structure.
|
||||
* @dev_ctrl: related kernel device structure for the control device
|
||||
@ -3245,8 +3301,7 @@ struct hl_reset_info {
|
||||
* @rotator_binning: contains mask of rotators engines that is received from the f/w
|
||||
* which indicates which rotator engines are binned-out(Gaudi3 and above).
|
||||
* @id: device minor.
|
||||
* @id_control: minor of the control device.
|
||||
* @cdev_idx: char device index. Used for setting its name.
|
||||
* @cdev_idx: char device index.
|
||||
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
|
||||
* addresses.
|
||||
* @is_in_dram_scrub: true if dram scrub operation is on going.
|
||||
@ -3289,6 +3344,7 @@ struct hl_reset_info {
|
||||
* device.
|
||||
* @supports_ctx_switch: true if a ctx switch is required upon first submission.
|
||||
* @support_preboot_binning: true if we support read binning info from preboot.
|
||||
* @eq_heartbeat_received: indication that eq heartbeat event has received from FW.
|
||||
* @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
|
||||
* @fw_components: Controls which f/w components to load to the device. There are multiple f/w
|
||||
* stages and sometimes we want to stop at a certain stage. Used only for testing.
|
||||
@ -3308,8 +3364,7 @@ struct hl_device {
|
||||
u64 pcie_bar_phys[HL_PCI_NUM_BARS];
|
||||
void __iomem *pcie_bar[HL_PCI_NUM_BARS];
|
||||
void __iomem *rmmio;
|
||||
struct class *hclass;
|
||||
struct cdev cdev;
|
||||
struct drm_device drm;
|
||||
struct cdev cdev_ctrl;
|
||||
struct device *dev;
|
||||
struct device *dev_ctrl;
|
||||
@ -3418,7 +3473,6 @@ struct hl_device {
|
||||
u32 device_release_watchdog_timeout_sec;
|
||||
u32 rotator_binning;
|
||||
u16 id;
|
||||
u16 id_control;
|
||||
u16 cdev_idx;
|
||||
u16 cpu_pci_msb_addr;
|
||||
u8 is_in_dram_scrub;
|
||||
@ -3451,6 +3505,7 @@ struct hl_device {
|
||||
u8 reset_upon_device_release;
|
||||
u8 supports_ctx_switch;
|
||||
u8 support_preboot_binning;
|
||||
u8 eq_heartbeat_received;
|
||||
|
||||
/* Parameters for bring-up to be upstreamed */
|
||||
u64 nic_ports_mask;
|
||||
@ -3582,6 +3637,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline struct hl_device *to_hl_device(struct drm_device *ddev)
|
||||
{
|
||||
return container_of(ddev, struct hl_device, drm);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
|
||||
* @address: The start address of the area we want to validate.
|
||||
@ -3611,8 +3671,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t
|
||||
dma_addr_t *dma_handle, const char *caller);
|
||||
void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
|
||||
const char *caller);
|
||||
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
|
||||
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir, const char *caller);
|
||||
void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir, const char *caller);
|
||||
int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
|
||||
enum dma_data_direction dir);
|
||||
int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
|
||||
@ -3620,7 +3685,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
enum debugfs_access_type acc_type);
|
||||
int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
|
||||
u64 addr, u64 *val, enum debugfs_access_type acc_type);
|
||||
int hl_device_open(struct inode *inode, struct file *filp);
|
||||
|
||||
int hl_mmap(struct file *filp, struct vm_area_struct *vma);
|
||||
|
||||
int hl_device_open(struct drm_device *drm, struct drm_file *file_priv);
|
||||
void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv);
|
||||
|
||||
int hl_device_open_ctrl(struct inode *inode, struct file *filp);
|
||||
bool hl_device_operational(struct hl_device *hdev,
|
||||
enum hl_device_status *status);
|
||||
@ -3652,8 +3722,9 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
|
||||
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
|
||||
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
|
||||
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
|
||||
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
|
||||
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg);
|
||||
irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
|
||||
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
|
||||
u32 hl_cq_inc_ptr(u32 ptr);
|
||||
|
||||
int hl_asid_init(struct hl_device *hdev);
|
||||
@ -3944,16 +4015,14 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
|
||||
u64 *event_mask);
|
||||
void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
|
||||
void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
|
||||
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
|
||||
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
void hl_debugfs_init(void);
|
||||
void hl_debugfs_fini(void);
|
||||
int hl_debugfs_device_init(struct hl_device *hdev);
|
||||
void hl_debugfs_device_fini(struct hl_device *hdev);
|
||||
void hl_debugfs_add_device(struct hl_device *hdev);
|
||||
void hl_debugfs_remove_device(struct hl_device *hdev);
|
||||
void hl_debugfs_add_file(struct hl_fpriv *hpriv);
|
||||
void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
|
||||
void hl_debugfs_add_cb(struct hl_cb *cb);
|
||||
@ -3972,14 +4041,6 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
|
||||
#else
|
||||
|
||||
static inline void __init hl_debugfs_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hl_debugfs_fini(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int hl_debugfs_device_init(struct hl_device *hdev)
|
||||
{
|
||||
return 0;
|
||||
@ -3993,10 +4054,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hl_debugfs_remove_device(struct hl_device *hdev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
|
||||
{
|
||||
}
|
||||
@ -4108,11 +4165,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset,
|
||||
const u32 pb_blocks[], u32 blocks_array_size);
|
||||
|
||||
/* IOCTLs */
|
||||
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
|
||||
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
|
||||
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
|
||||
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
|
||||
int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
|
||||
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
|
||||
int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
|
||||
|
||||
#endif /* HABANALABSP_H_ */
|
||||
|
@ -14,6 +14,11 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <drm/drm_accel.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_ioctl.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/habanalabs.h>
|
||||
@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
static int hl_major;
|
||||
static struct class *hl_class;
|
||||
static DEFINE_IDR(hl_devs_idr);
|
||||
static DEFINE_MUTEX(hl_devs_idr_lock);
|
||||
|
||||
@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, ids);
|
||||
|
||||
static const struct drm_ioctl_desc hl_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
|
||||
};
|
||||
|
||||
static const struct file_operations hl_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = accel_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
.llseek = noop_llseek,
|
||||
.mmap = hl_mmap
|
||||
};
|
||||
|
||||
static const struct drm_driver hl_driver = {
|
||||
.driver_features = DRIVER_COMPUTE_ACCEL,
|
||||
|
||||
.name = HL_NAME,
|
||||
.desc = HL_DRIVER_DESC,
|
||||
.major = LINUX_VERSION_MAJOR,
|
||||
.minor = LINUX_VERSION_PATCHLEVEL,
|
||||
.patchlevel = LINUX_VERSION_SUBLEVEL,
|
||||
.date = "20190505",
|
||||
|
||||
.fops = &hl_fops,
|
||||
.open = hl_device_open,
|
||||
.postclose = hl_device_release,
|
||||
.ioctls = hl_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
|
||||
};
|
||||
|
||||
/*
|
||||
* get_asic_type - translate device id to asic type
|
||||
*
|
||||
@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_open - open function for habanalabs device
|
||||
*
|
||||
* @inode: pointer to inode structure
|
||||
* @filp: pointer to file structure
|
||||
* hl_device_open() - open function for habanalabs device.
|
||||
* @ddev: pointer to DRM device structure.
|
||||
* @file: pointer to DRM file private data structure.
|
||||
*
|
||||
* Called when process opens an habanalabs device.
|
||||
*/
|
||||
int hl_device_open(struct inode *inode, struct file *filp)
|
||||
int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_device *hdev = to_hl_device(ddev);
|
||||
enum hl_device_status status;
|
||||
struct hl_device *hdev;
|
||||
struct hl_fpriv *hpriv;
|
||||
int rc;
|
||||
|
||||
mutex_lock(&hl_devs_idr_lock);
|
||||
hdev = idr_find(&hl_devs_idr, iminor(inode));
|
||||
mutex_unlock(&hl_devs_idr_lock);
|
||||
|
||||
if (!hdev) {
|
||||
pr_err("Couldn't find device %d:%d\n",
|
||||
imajor(inode), iminor(inode));
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
|
||||
if (!hpriv)
|
||||
return -ENOMEM;
|
||||
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
|
||||
mutex_init(&hpriv->notifier_event.lock);
|
||||
mutex_init(&hpriv->restore_phase_mutex);
|
||||
mutex_init(&hpriv->ctx_lock);
|
||||
kref_init(&hpriv->refcount);
|
||||
nonseekable_open(inode, filp);
|
||||
|
||||
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
||||
hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
|
||||
@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
hdev->last_successful_open_jif = jiffies;
|
||||
hdev->last_successful_open_ktime = ktime_get();
|
||||
|
||||
file_priv->driver_priv = hpriv;
|
||||
hpriv->file_priv = file_priv;
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
@ -232,7 +260,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
hl_mem_mgr_fini(&hpriv->mem_mgr);
|
||||
hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
|
||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||
filp->private_data = NULL;
|
||||
mutex_destroy(&hpriv->ctx_lock);
|
||||
mutex_destroy(&hpriv->restore_phase_mutex);
|
||||
mutex_destroy(&hpriv->notifier_event.lock);
|
||||
@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
|
||||
*/
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
|
||||
mutex_init(&hpriv->notifier_event.lock);
|
||||
nonseekable_open(inode, filp);
|
||||
|
||||
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||
@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
|
||||
hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
|
||||
|
||||
hdev->major = hl_major;
|
||||
hdev->hclass = hl_class;
|
||||
hdev->memory_scrub = memory_scrub;
|
||||
hdev->reset_on_lockup = reset_on_lockup;
|
||||
hdev->boot_error_status_mask = boot_error_status_mask;
|
||||
@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int allocate_device_id(struct hl_device *hdev)
|
||||
{
|
||||
int id;
|
||||
|
||||
mutex_lock(&hl_devs_idr_lock);
|
||||
id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
|
||||
mutex_unlock(&hl_devs_idr_lock);
|
||||
|
||||
if (id < 0) {
|
||||
if (id == -ENOSPC)
|
||||
pr_err("too many devices in the system\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
hdev->id = id;
|
||||
|
||||
/*
|
||||
* Firstly initialized with the internal device ID.
|
||||
* Will be updated later after the DRM device registration to hold the minor ID.
|
||||
*/
|
||||
hdev->cdev_idx = hdev->id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* create_hdev - create habanalabs device instance
|
||||
*
|
||||
@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev)
|
||||
*/
|
||||
static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
|
||||
{
|
||||
int main_id, ctrl_id = 0, rc = 0;
|
||||
struct hl_device *hdev;
|
||||
int rc;
|
||||
|
||||
*dev = NULL;
|
||||
|
||||
hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
|
||||
if (!hdev)
|
||||
return -ENOMEM;
|
||||
hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
|
||||
if (IS_ERR(hdev))
|
||||
return PTR_ERR(hdev);
|
||||
|
||||
hdev->dev = hdev->drm.dev;
|
||||
|
||||
/* Will be NULL in case of simulator device */
|
||||
hdev->pdev = pdev;
|
||||
|
||||
/* Assign status description string */
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
|
||||
"in device creation", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
|
||||
"in reset after device release", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
|
||||
"in device creation", HL_STR_MAX);
|
||||
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
|
||||
"in reset after device release", HL_STR_MAX);
|
||||
|
||||
|
||||
/* First, we must find out which ASIC are we handling. This is needed
|
||||
@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
|
||||
if (hdev->asic_type == ASIC_INVALID) {
|
||||
dev_err(&pdev->dev, "Unsupported ASIC\n");
|
||||
rc = -ENODEV;
|
||||
goto free_hdev;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
copy_kernel_module_params_to_device(hdev);
|
||||
@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
|
||||
|
||||
fixup_device_params(hdev);
|
||||
|
||||
mutex_lock(&hl_devs_idr_lock);
|
||||
|
||||
/* Always save 2 numbers, 1 for main device and 1 for control.
|
||||
* They must be consecutive
|
||||
*/
|
||||
main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
|
||||
|
||||
if (main_id >= 0)
|
||||
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
|
||||
main_id + 2, GFP_KERNEL);
|
||||
|
||||
mutex_unlock(&hl_devs_idr_lock);
|
||||
|
||||
if ((main_id < 0) || (ctrl_id < 0)) {
|
||||
if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
|
||||
pr_err("too many devices in the system\n");
|
||||
|
||||
if (main_id >= 0) {
|
||||
mutex_lock(&hl_devs_idr_lock);
|
||||
idr_remove(&hl_devs_idr, main_id);
|
||||
mutex_unlock(&hl_devs_idr_lock);
|
||||
}
|
||||
|
||||
rc = -EBUSY;
|
||||
goto free_hdev;
|
||||
}
|
||||
|
||||
hdev->id = main_id;
|
||||
hdev->id_control = ctrl_id;
|
||||
rc = allocate_device_id(hdev);
|
||||
if (rc)
|
||||
goto out_err;
|
||||
|
||||
*dev = hdev;
|
||||
|
||||
return 0;
|
||||
|
||||
free_hdev:
|
||||
kfree(hdev);
|
||||
out_err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
|
||||
/* Remove device from the device list */
|
||||
mutex_lock(&hl_devs_idr_lock);
|
||||
idr_remove(&hl_devs_idr, hdev->id);
|
||||
idr_remove(&hl_devs_idr, hdev->id_control);
|
||||
mutex_unlock(&hl_devs_idr_lock);
|
||||
|
||||
kfree(hdev);
|
||||
}
|
||||
|
||||
static int hl_pmops_suspend(struct device *dev)
|
||||
@ -691,28 +713,16 @@ static int __init hl_init(void)
|
||||
|
||||
hl_major = MAJOR(dev);
|
||||
|
||||
hl_class = class_create(HL_NAME);
|
||||
if (IS_ERR(hl_class)) {
|
||||
pr_err("failed to allocate class\n");
|
||||
rc = PTR_ERR(hl_class);
|
||||
goto remove_major;
|
||||
}
|
||||
|
||||
hl_debugfs_init();
|
||||
|
||||
rc = pci_register_driver(&hl_pci_driver);
|
||||
if (rc) {
|
||||
pr_err("failed to register pci device\n");
|
||||
goto remove_debugfs;
|
||||
goto remove_major;
|
||||
}
|
||||
|
||||
pr_debug("driver loaded\n");
|
||||
|
||||
return 0;
|
||||
|
||||
remove_debugfs:
|
||||
hl_debugfs_fini();
|
||||
class_destroy(hl_class);
|
||||
remove_major:
|
||||
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
|
||||
return rc;
|
||||
@ -725,14 +735,6 @@ static void __exit hl_exit(void)
|
||||
{
|
||||
pci_unregister_driver(&hl_pci_driver);
|
||||
|
||||
/*
|
||||
* Removing debugfs must be after all devices or simulator devices
|
||||
* have been removed because otherwise we get a bug in the
|
||||
* debugfs module for referencing NULL objects
|
||||
*/
|
||||
hl_debugfs_fini();
|
||||
|
||||
class_destroy(hl_class);
|
||||
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
|
||||
|
||||
idr_destroy(&hl_devs_idr);
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
|
||||
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
|
||||
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
|
||||
[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
|
||||
@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||
|
||||
time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
|
||||
time_sync.host_time = ktime_get_raw_ns();
|
||||
time_sync.tsc_time = rdtsc();
|
||||
|
||||
return copy_to_user(out, &time_sync,
|
||||
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
|
||||
@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
return rc ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
u32 user_buf_size = args->return_size;
|
||||
struct engine_err_info *info;
|
||||
int rc;
|
||||
|
||||
if (!user_buf)
|
||||
return -EINVAL;
|
||||
|
||||
info = &hdev->captured_err_info.engine_err;
|
||||
if (!info->event_info_available)
|
||||
return 0;
|
||||
|
||||
if (user_buf_size < sizeof(struct hl_info_engine_err_event))
|
||||
return -ENOMEM;
|
||||
|
||||
rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
|
||||
return rc ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
|
||||
{
|
||||
void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
|
||||
@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
case HL_INFO_FW_ERR_EVENT:
|
||||
return fw_err_info(hpriv, args);
|
||||
|
||||
case HL_INFO_USER_ENGINE_ERR_EVENT:
|
||||
return engine_err_info(hpriv, args);
|
||||
|
||||
case HL_INFO_DRAM_USAGE:
|
||||
return dram_usage_info(hpriv, args);
|
||||
default:
|
||||
@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
|
||||
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
|
||||
}
|
||||
|
||||
static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
|
||||
{
|
||||
struct hl_info_args *args = data;
|
||||
|
||||
switch (args->op) {
|
||||
case HL_INFO_GET_EVENTS:
|
||||
case HL_INFO_UNREGISTER_EVENTFD:
|
||||
case HL_INFO_REGISTER_EVENTFD:
|
||||
return -EOPNOTSUPP;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
|
||||
}
|
||||
|
||||
static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_debug_args *args = data;
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_debug_args *args = data;
|
||||
enum hl_device_status status;
|
||||
|
||||
int rc = 0;
|
||||
@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
}
|
||||
|
||||
#define HL_IOCTL_DEF(ioctl, _func) \
|
||||
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
|
||||
|
||||
static const struct hl_ioctl_desc hl_ioctls[] = {
|
||||
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
|
||||
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
|
||||
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
|
||||
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
|
||||
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
|
||||
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
|
||||
};
|
||||
[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
|
||||
|
||||
static const struct hl_ioctl_desc hl_ioctls_control[] = {
|
||||
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
|
||||
HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
|
||||
};
|
||||
|
||||
static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
|
||||
const struct hl_ioctl_desc *ioctl, struct device *dev)
|
||||
static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
|
||||
const struct hl_ioctl_desc *ioctl, struct device *dev)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filep->private_data;
|
||||
unsigned int nr = _IOC_NR(cmd);
|
||||
char stack_kdata[128] = {0};
|
||||
char *kdata = NULL;
|
||||
@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
|
||||
retcode = -EFAULT;
|
||||
|
||||
out_err:
|
||||
if (retcode)
|
||||
dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
|
||||
task_pid_nr(current), cmd, nr);
|
||||
if (retcode) {
|
||||
char task_comm[TASK_COMM_LEN];
|
||||
|
||||
dev_dbg_ratelimited(dev,
|
||||
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
|
||||
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
|
||||
}
|
||||
|
||||
if (kdata != stack_kdata)
|
||||
kfree(kdata);
|
||||
@ -1204,29 +1240,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
|
||||
return retcode;
|
||||
}
|
||||
|
||||
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filep->private_data;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
const struct hl_ioctl_desc *ioctl = NULL;
|
||||
unsigned int nr = _IOC_NR(cmd);
|
||||
|
||||
if (!hdev) {
|
||||
pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
|
||||
ioctl = &hl_ioctls[nr];
|
||||
} else {
|
||||
dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
|
||||
task_pid_nr(current), nr);
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
|
||||
}
|
||||
|
||||
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct hl_fpriv *hpriv = filep->private_data;
|
||||
@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (nr == _IOC_NR(HL_IOCTL_INFO)) {
|
||||
ioctl = &hl_ioctls_control[nr];
|
||||
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
|
||||
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
|
||||
} else {
|
||||
dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
|
||||
task_pid_nr(current), nr);
|
||||
char task_comm[TASK_COMM_LEN];
|
||||
|
||||
dev_dbg_ratelimited(hdev->dev_ctrl,
|
||||
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
|
||||
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
|
||||
return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
|
||||
}
|
||||
|
@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
|
||||
{
|
||||
struct timestamp_reg_work_obj *job =
|
||||
container_of(work, struct timestamp_reg_work_obj, free_obj);
|
||||
struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
|
||||
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
|
||||
struct list_head *free_list_head = job->free_obj_head;
|
||||
|
||||
struct hl_device *hdev = job->hdev;
|
||||
|
||||
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
|
||||
@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
|
||||
|
||||
hl_mmap_mem_buf_put(free_obj->buf);
|
||||
hl_cb_put(free_obj->cq_cb);
|
||||
kfree(free_obj);
|
||||
atomic_set(&free_obj->in_use, 0);
|
||||
}
|
||||
|
||||
kfree(free_list_head);
|
||||
|
||||
if (dynamic_alloc_free_list_head) {
|
||||
list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
|
||||
free_objects_node) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
|
||||
free_obj->buf,
|
||||
free_obj->cq_cb);
|
||||
|
||||
hl_mmap_mem_buf_put(free_obj->buf);
|
||||
hl_cb_put(free_obj->cq_cb);
|
||||
list_del(&free_obj->free_objects_node);
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kfree(dynamic_alloc_free_list_head);
|
||||
}
|
||||
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
|
||||
* list to a dedicated workqueue to do the actual put.
|
||||
*/
|
||||
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
|
||||
struct list_head **free_list, ktime_t now)
|
||||
struct list_head **free_list,
|
||||
struct list_head **dynamic_alloc_list,
|
||||
struct hl_user_interrupt *intr)
|
||||
{
|
||||
struct hl_ts_free_jobs *ts_free_jobs_data;
|
||||
struct timestamp_reg_free_node *free_node;
|
||||
u32 free_node_index;
|
||||
u64 timestamp;
|
||||
|
||||
ts_free_jobs_data = &intr->ts_free_jobs_data;
|
||||
free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
|
||||
|
||||
if (!(*free_list)) {
|
||||
/* Alloc/Init the timestamp registration free objects list */
|
||||
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
|
||||
@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
|
||||
INIT_LIST_HEAD(*free_list);
|
||||
}
|
||||
|
||||
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
|
||||
if (!free_node)
|
||||
return -ENOMEM;
|
||||
free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
|
||||
if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
|
||||
pend->ts_reg_info.buf,
|
||||
pend,
|
||||
intr->interrupt_id);
|
||||
|
||||
timestamp = ktime_to_ns(now);
|
||||
if (!(*dynamic_alloc_list)) {
|
||||
*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
|
||||
if (!(*dynamic_alloc_list))
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(*dynamic_alloc_list);
|
||||
}
|
||||
|
||||
free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
|
||||
if (!free_node)
|
||||
return -ENOMEM;
|
||||
|
||||
free_node->dynamic_alloc = 1;
|
||||
}
|
||||
|
||||
timestamp = ktime_to_ns(intr->timestamp);
|
||||
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
|
||||
|
||||
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
|
||||
pend->ts_reg_info.timestamp_kernel_addr,
|
||||
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
|
||||
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
|
||||
pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
|
||||
|
||||
list_del(&pend->wait_list_node);
|
||||
|
||||
/* Mark kernel CB node as free */
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
list_del(&pend->list_node);
|
||||
|
||||
/* Putting the refcount for ts_buff and cq_cb objects will be handled
|
||||
* in workqueue context, just add job to free_list.
|
||||
*/
|
||||
free_node->buf = pend->ts_reg_info.buf;
|
||||
free_node->cq_cb = pend->ts_reg_info.cq_cb;
|
||||
list_add(&free_node->free_objects_node, *free_list);
|
||||
|
||||
if (free_node->dynamic_alloc) {
|
||||
list_add(&free_node->free_objects_node, *dynamic_alloc_list);
|
||||
} else {
|
||||
ts_free_jobs_data->next_avail_free_node_idx =
|
||||
(++free_node_index) % ts_free_jobs_data->free_nodes_length;
|
||||
list_add(&free_node->free_objects_node, *free_list);
|
||||
}
|
||||
|
||||
/* Mark TS record as free */
|
||||
pend->ts_reg_info.in_use = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
|
||||
static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
|
||||
{
|
||||
struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
|
||||
struct hl_user_pending_interrupt *pend, *temp_pend;
|
||||
struct list_head *ts_reg_free_list_head = NULL;
|
||||
struct timestamp_reg_work_obj *job;
|
||||
bool reg_node_handle_fail = false;
|
||||
unsigned long flags;
|
||||
int rc;
|
||||
|
||||
/* For registration nodes:
|
||||
@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
|
||||
* or in irq handler context at all (since release functions are long and
|
||||
* might sleep), so we will need to handle that part in workqueue context.
|
||||
* To avoid handling kmalloc failure which compels us rolling back actions
|
||||
* and move nodes hanged on the free list back to the interrupt wait list
|
||||
* and move nodes hanged on the free list back to the interrupt ts list
|
||||
* we always alloc the job of the WQ at the beginning.
|
||||
*/
|
||||
job = kmalloc(sizeof(*job), GFP_ATOMIC);
|
||||
if (!job)
|
||||
return;
|
||||
|
||||
spin_lock(&intr->wait_list_lock);
|
||||
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
|
||||
spin_lock_irqsave(&intr->ts_list_lock, flags);
|
||||
list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
|
||||
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
!pend->cq_kernel_addr) {
|
||||
if (pend->ts_reg_info.buf) {
|
||||
if (!reg_node_handle_fail) {
|
||||
rc = handle_registration_node(hdev, pend,
|
||||
&ts_reg_free_list_head, intr->timestamp);
|
||||
if (rc)
|
||||
reg_node_handle_fail = true;
|
||||
}
|
||||
} else {
|
||||
/* Handle wait target value node */
|
||||
pend->fence.timestamp = intr->timestamp;
|
||||
complete_all(&pend->fence.completion);
|
||||
if (!reg_node_handle_fail) {
|
||||
rc = handle_registration_node(hdev, pend,
|
||||
&ts_reg_free_list_head,
|
||||
&dynamic_alloc_list_head, intr);
|
||||
if (rc)
|
||||
reg_node_handle_fail = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&intr->wait_list_lock);
|
||||
spin_unlock_irqrestore(&intr->ts_list_lock, flags);
|
||||
|
||||
if (ts_reg_free_list_head) {
|
||||
INIT_WORK(&job->free_obj, hl_ts_free_objects);
|
||||
job->free_obj_head = ts_reg_free_list_head;
|
||||
job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
|
||||
job->hdev = hdev;
|
||||
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
|
||||
} else {
|
||||
@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
|
||||
}
|
||||
}
|
||||
|
||||
static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend, *temp_pend;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&intr->wait_list_lock, flags);
|
||||
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
|
||||
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
!pend->cq_kernel_addr) {
|
||||
/* Handle wait target value node */
|
||||
pend->fence.timestamp = intr->timestamp;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&intr->wait_list_lock, flags);
|
||||
}
|
||||
|
||||
static void handle_tpc_interrupt(struct hl_device *hdev)
|
||||
{
|
||||
u64 event_mask;
|
||||
@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_irq_handler_user_interrupt - irq handler for user interrupts
|
||||
* hl_irq_user_interrupt_handler - irq handler for user interrupts.
|
||||
*
|
||||
* @irq: irq number
|
||||
* @arg: pointer to user interrupt structure
|
||||
*
|
||||
*/
|
||||
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
|
||||
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
|
||||
{
|
||||
struct hl_user_interrupt *user_int = arg;
|
||||
struct hl_device *hdev = user_int->hdev;
|
||||
|
||||
user_int->timestamp = ktime_get();
|
||||
switch (user_int->type) {
|
||||
case HL_USR_INTERRUPT_CQ:
|
||||
/* First handle user waiters threads */
|
||||
handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
|
||||
handle_user_interrupt_wait_list(hdev, user_int);
|
||||
|
||||
return IRQ_WAKE_THREAD;
|
||||
/* Second handle user timestamp registrations */
|
||||
handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt);
|
||||
handle_user_interrupt_ts_list(hdev, user_int);
|
||||
break;
|
||||
case HL_USR_INTERRUPT_DECODER:
|
||||
handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
|
||||
|
||||
/* Handle decoder interrupt registered on this specific irq */
|
||||
handle_user_interrupt_wait_list(hdev, user_int);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
|
||||
struct hl_user_interrupt *user_int = arg;
|
||||
struct hl_device *hdev = user_int->hdev;
|
||||
|
||||
user_int->timestamp = ktime_get();
|
||||
switch (user_int->type) {
|
||||
case HL_USR_INTERRUPT_CQ:
|
||||
handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
|
||||
|
||||
/* Handle user cq interrupt registered on this specific irq */
|
||||
handle_user_interrupt(hdev, user_int);
|
||||
break;
|
||||
case HL_USR_INTERRUPT_DECODER:
|
||||
handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
|
||||
|
||||
/* Handle decoder interrupt registered on this specific irq */
|
||||
handle_user_interrupt(hdev, user_int);
|
||||
break;
|
||||
case HL_USR_INTERRUPT_TPC:
|
||||
handle_tpc_interrupt(hdev);
|
||||
break;
|
||||
@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
|
||||
{
|
||||
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
|
||||
struct hl_device *hdev = arg;
|
||||
|
||||
dev_err(hdev->dev, "EQ error interrupt received\n");
|
||||
|
||||
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_irq_handler_eq - irq handler for event queue
|
||||
*
|
||||
|
@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
|
||||
|
||||
*p_userptr = userptr;
|
||||
|
||||
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
|
||||
rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
|
||||
goto dma_map_err;
|
||||
@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
* physical pages
|
||||
*
|
||||
* This function does the following:
|
||||
* - Pin the physical pages related to the given virtual block.
|
||||
* - Create a physical page pack from the physical pages related to the given
|
||||
* virtual block.
|
||||
*/
|
||||
@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
|
||||
}
|
||||
|
||||
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
|
||||
u64 page_size, u64 exported_size,
|
||||
u64 page_size, u64 exported_size, u64 offset,
|
||||
struct device *dev, enum dma_data_direction dir)
|
||||
{
|
||||
u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
|
||||
struct asic_fixed_properties *prop;
|
||||
int rc, i, j, nents, cur_page;
|
||||
u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
|
||||
left_size_in_dma_seg, device_address, bar_address, start_page;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct scatterlist *sg;
|
||||
unsigned int nents, i;
|
||||
struct sg_table *sgt;
|
||||
bool next_sg_entry;
|
||||
int rc;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
||||
dma_max_seg_size = dma_get_max_seg_size(dev);
|
||||
|
||||
/* We would like to align the max segment size to PAGE_SIZE, so the
|
||||
* SGL will contain aligned addresses that can be easily mapped to
|
||||
* an MMU
|
||||
*/
|
||||
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
|
||||
/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
|
||||
dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
|
||||
if (dma_max_seg_size < PAGE_SIZE) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
|
||||
@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
|
||||
if (!sgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* remove export size restrictions in case not explicitly defined */
|
||||
cur_size_to_export = exported_size ? exported_size : (npages * page_size);
|
||||
/* Use the offset to move to the actual first page that is exported */
|
||||
for (start_page = 0 ; start_page < npages ; ++start_page) {
|
||||
if (offset < page_size)
|
||||
break;
|
||||
|
||||
/* If the size of each page is larger than the dma max segment size,
|
||||
* then we can't combine pages and the number of entries in the SGL
|
||||
* will just be the
|
||||
* <number of pages> * <chunks of max segment size in each page>
|
||||
*/
|
||||
if (page_size > dma_max_seg_size) {
|
||||
/* we should limit number of pages according to the exported size */
|
||||
cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
|
||||
nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
|
||||
} else {
|
||||
cur_npages = npages;
|
||||
/* The offset value was validated so there can't be an underflow */
|
||||
offset -= page_size;
|
||||
}
|
||||
|
||||
/* Get number of non-contiguous chunks */
|
||||
for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
|
||||
if (pages[i - 1] + page_size != pages[i] ||
|
||||
chunk_size + page_size > dma_max_seg_size) {
|
||||
nents++;
|
||||
chunk_size = page_size;
|
||||
continue;
|
||||
}
|
||||
/* Calculate the required number of entries for the SG table */
|
||||
curr_page = start_page;
|
||||
nents = 1;
|
||||
left_size_to_export = exported_size;
|
||||
left_size_in_page = page_size - offset;
|
||||
left_size_in_dma_seg = dma_max_seg_size;
|
||||
next_sg_entry = false;
|
||||
|
||||
chunk_size += page_size;
|
||||
while (true) {
|
||||
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
|
||||
left_size_to_export -= size;
|
||||
left_size_in_page -= size;
|
||||
left_size_in_dma_seg -= size;
|
||||
|
||||
if (!left_size_to_export)
|
||||
break;
|
||||
|
||||
if (!left_size_in_page) {
|
||||
/* left_size_to_export is not zero so there must be another page */
|
||||
if (pages[curr_page] + page_size != pages[curr_page + 1])
|
||||
next_sg_entry = true;
|
||||
|
||||
++curr_page;
|
||||
left_size_in_page = page_size;
|
||||
}
|
||||
|
||||
if (!left_size_in_dma_seg) {
|
||||
next_sg_entry = true;
|
||||
left_size_in_dma_seg = dma_max_seg_size;
|
||||
}
|
||||
|
||||
if (next_sg_entry) {
|
||||
++nents;
|
||||
next_sg_entry = false;
|
||||
}
|
||||
}
|
||||
|
||||
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
|
||||
if (rc)
|
||||
goto error_free;
|
||||
goto err_free_sgt;
|
||||
|
||||
cur_page = 0;
|
||||
/* Prepare the SG table entries */
|
||||
curr_page = start_page;
|
||||
device_address = pages[curr_page] + offset;
|
||||
left_size_to_export = exported_size;
|
||||
left_size_in_page = page_size - offset;
|
||||
left_size_in_dma_seg = dma_max_seg_size;
|
||||
next_sg_entry = false;
|
||||
|
||||
if (page_size > dma_max_seg_size) {
|
||||
u64 size_left, cur_device_address = 0;
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
|
||||
chunk_size = 0;
|
||||
|
||||
size_left = page_size;
|
||||
for ( ; curr_page < npages ; ++curr_page) {
|
||||
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
|
||||
chunk_size += size;
|
||||
left_size_to_export -= size;
|
||||
left_size_in_page -= size;
|
||||
left_size_in_dma_seg -= size;
|
||||
|
||||
/* Need to split each page into the number of chunks of
|
||||
* dma_max_seg_size
|
||||
*/
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
if (size_left == page_size)
|
||||
cur_device_address =
|
||||
pages[cur_page] - prop->dram_base_address;
|
||||
else
|
||||
cur_device_address += dma_max_seg_size;
|
||||
if (!left_size_to_export)
|
||||
break;
|
||||
|
||||
/* make sure not to export over exported size */
|
||||
chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
|
||||
if (!left_size_in_page) {
|
||||
/* left_size_to_export is not zero so there must be another page */
|
||||
if (pages[curr_page] + page_size != pages[curr_page + 1]) {
|
||||
device_address = pages[curr_page + 1];
|
||||
next_sg_entry = true;
|
||||
}
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start + cur_device_address;
|
||||
|
||||
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
||||
if (rc)
|
||||
goto error_unmap;
|
||||
|
||||
cur_size_to_export -= chunk_size;
|
||||
|
||||
if (size_left > dma_max_seg_size) {
|
||||
size_left -= dma_max_seg_size;
|
||||
} else {
|
||||
cur_page++;
|
||||
size_left = page_size;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Merge pages and put them into the scatterlist */
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
chunk_size = page_size;
|
||||
for (j = cur_page + 1 ; j < cur_npages ; j++) {
|
||||
if (pages[j - 1] + page_size != pages[j] ||
|
||||
chunk_size + page_size > dma_max_seg_size)
|
||||
break;
|
||||
|
||||
chunk_size += page_size;
|
||||
left_size_in_page = page_size;
|
||||
}
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start +
|
||||
(pages[cur_page] - prop->dram_base_address);
|
||||
if (!left_size_in_dma_seg) {
|
||||
/*
|
||||
* Skip setting a new device address if already moving to a page
|
||||
* which is not contiguous with the current page.
|
||||
*/
|
||||
if (!next_sg_entry) {
|
||||
device_address += chunk_size;
|
||||
next_sg_entry = true;
|
||||
}
|
||||
|
||||
/* make sure not to export over exported size */
|
||||
chunk_size = min(chunk_size, cur_size_to_export);
|
||||
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
||||
if (rc)
|
||||
goto error_unmap;
|
||||
left_size_in_dma_seg = dma_max_seg_size;
|
||||
}
|
||||
|
||||
cur_size_to_export -= chunk_size;
|
||||
cur_page = j;
|
||||
if (next_sg_entry) {
|
||||
next_sg_entry = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
||||
if (rc)
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
/* Because we are not going to include a CPU list we want to have some
|
||||
* chance that other users will detect this by setting the orig_nents
|
||||
* to 0 and using only nents (length of DMA list) when going over the
|
||||
* sgl
|
||||
/* There should be nothing left to export exactly after looping over all SG elements */
|
||||
if (left_size_to_export) {
|
||||
dev_err(hdev->dev,
|
||||
"left size to export %#llx after initializing %u SG elements\n",
|
||||
left_size_to_export, sgt->nents);
|
||||
rc = -ENOMEM;
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we are not going to include a CPU list, we want to have some chance that other
|
||||
* users will detect this when going over SG table, by setting the orig_nents to 0 and using
|
||||
* only nents (length of DMA list).
|
||||
*/
|
||||
sgt->orig_nents = 0;
|
||||
|
||||
dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
|
||||
nents, dev_name(dev));
|
||||
for_each_sgtable_dma_sg(sgt, sg, i)
|
||||
dev_dbg(hdev->dev,
|
||||
"SG entry %d: address %#llx, length %#x\n",
|
||||
i, sg_dma_address(sg), sg_dma_len(sg));
|
||||
|
||||
return sgt;
|
||||
|
||||
error_unmap:
|
||||
err_unmap:
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
if (!sg_dma_len(sg))
|
||||
continue;
|
||||
|
||||
dma_unmap_resource(dev, sg_dma_address(sg),
|
||||
sg_dma_len(sg), dir,
|
||||
dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
}
|
||||
|
||||
sg_free_table(sgt);
|
||||
|
||||
error_free:
|
||||
err_free_sgt:
|
||||
kfree(sgt);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
|
||||
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
u64 *pages, npages, page_size, exported_size, offset;
|
||||
struct dma_buf *dma_buf = attachment->dmabuf;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
|
||||
|
||||
hl_dmabuf = dma_buf->priv;
|
||||
hdev = hl_dmabuf->ctx->hdev;
|
||||
phys_pg_pack = hl_dmabuf->phys_pg_pack;
|
||||
|
||||
if (!attachment->peer2peer) {
|
||||
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
|
||||
return ERR_PTR(-EPERM);
|
||||
}
|
||||
|
||||
if (phys_pg_pack)
|
||||
sgt = alloc_sgt_from_device_pages(hdev,
|
||||
phys_pg_pack->pages,
|
||||
phys_pg_pack->npages,
|
||||
phys_pg_pack->page_size,
|
||||
phys_pg_pack->exported_size,
|
||||
attachment->dev,
|
||||
dir);
|
||||
else
|
||||
sgt = alloc_sgt_from_device_pages(hdev,
|
||||
&hl_dmabuf->device_address,
|
||||
1,
|
||||
hl_dmabuf->dmabuf->size,
|
||||
0,
|
||||
attachment->dev,
|
||||
dir);
|
||||
exported_size = hl_dmabuf->dmabuf->size;
|
||||
offset = hl_dmabuf->offset;
|
||||
phys_pg_pack = hl_dmabuf->phys_pg_pack;
|
||||
|
||||
if (phys_pg_pack) {
|
||||
pages = phys_pg_pack->pages;
|
||||
npages = phys_pg_pack->npages;
|
||||
page_size = phys_pg_pack->page_size;
|
||||
} else {
|
||||
pages = &hl_dmabuf->device_phys_addr;
|
||||
npages = 1;
|
||||
page_size = hl_dmabuf->dmabuf->size;
|
||||
}
|
||||
|
||||
sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
|
||||
attachment->dev, dir);
|
||||
if (IS_ERR(sgt))
|
||||
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
|
||||
|
||||
@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
/* Paired with get_file() in export_dmabuf() */
|
||||
fput(ctx->hpriv->filp);
|
||||
fput(ctx->hpriv->file_priv->filp);
|
||||
|
||||
kfree(hl_dmabuf);
|
||||
}
|
||||
@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
|
||||
* released first and only then the compute device.
|
||||
* Paired with fput() in hl_release_dmabuf().
|
||||
*/
|
||||
get_file(ctx->hpriv->filp);
|
||||
get_file(ctx->hpriv->file_priv->filp);
|
||||
|
||||
*dmabuf_fd = fd;
|
||||
|
||||
@ -1876,22 +1898,29 @@ static int export_dmabuf(struct hl_ctx *ctx,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
|
||||
static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
|
||||
{
|
||||
if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
|
||||
if (!PAGE_ALIGNED(addr)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"exported device memory address 0x%llx should be aligned to 0x%lx\n",
|
||||
device_addr, PAGE_SIZE);
|
||||
"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
|
||||
addr, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (size < PAGE_SIZE) {
|
||||
if (!size || !PAGE_ALIGNED(size)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"exported device memory size %llu should be equal to or greater than %lu\n",
|
||||
"exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
|
||||
size, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(offset)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
|
||||
offset, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
|
||||
u64 bar_address;
|
||||
int rc;
|
||||
|
||||
rc = validate_export_params_common(hdev, device_addr, size);
|
||||
rc = validate_export_params_common(hdev, device_addr, size, 0);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (device_addr < prop->dram_user_base_address ||
|
||||
(device_addr + size) > prop->dram_end_address ||
|
||||
(device_addr + size) < device_addr) {
|
||||
(device_addr + size) > prop->dram_end_address ||
|
||||
(device_addr + size) < device_addr) {
|
||||
dev_dbg(hdev->dev,
|
||||
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
|
||||
device_addr, size);
|
||||
@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
|
||||
u64 bar_address;
|
||||
int i, rc;
|
||||
|
||||
rc = validate_export_params_common(hdev, device_addr, size);
|
||||
rc = validate_export_params_common(hdev, device_addr, size, offset);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if ((offset + size) > phys_pg_pack->total_size) {
|
||||
dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
|
||||
offset, size, phys_pg_pack->total_size);
|
||||
offset, size, phys_pg_pack->total_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start +
|
||||
(phys_pg_pack->pages[i] - prop->dram_base_address);
|
||||
(phys_pg_pack->pages[i] - prop->dram_base_address);
|
||||
|
||||
if ((bar_address + phys_pg_pack->page_size) >
|
||||
(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
|
||||
(bar_address + phys_pg_pack->page_size) < bar_address) {
|
||||
dev_dbg(hdev->dev,
|
||||
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
|
||||
phys_pg_pack->pages[i],
|
||||
phys_pg_pack->page_size);
|
||||
|
||||
phys_pg_pack->pages[i], phys_pg_pack->page_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
|
||||
struct asic_fixed_properties *prop;
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
struct hl_device *hdev;
|
||||
u64 export_addr;
|
||||
int rc;
|
||||
|
||||
hdev = ctx->hdev;
|
||||
@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
export_addr = addr + offset;
|
||||
|
||||
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
|
||||
if (!hl_dmabuf)
|
||||
return -ENOMEM;
|
||||
@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
|
||||
rc = PTR_ERR(phys_pg_pack);
|
||||
goto dec_memhash_export_cnt;
|
||||
}
|
||||
rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
|
||||
rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
|
||||
if (rc)
|
||||
goto dec_memhash_export_cnt;
|
||||
|
||||
phys_pg_pack->exported_size = size;
|
||||
hl_dmabuf->phys_pg_pack = phys_pg_pack;
|
||||
hl_dmabuf->memhash_hnode = hnode;
|
||||
hl_dmabuf->offset = offset;
|
||||
} else {
|
||||
rc = validate_export_params_no_mmu(hdev, export_addr, size);
|
||||
rc = validate_export_params_no_mmu(hdev, addr, size);
|
||||
if (rc)
|
||||
goto err_free_dmabuf_wrapper;
|
||||
}
|
||||
|
||||
hl_dmabuf->device_address = export_addr;
|
||||
hl_dmabuf->device_phys_addr = addr;
|
||||
}
|
||||
|
||||
rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
|
||||
if (rc)
|
||||
@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
|
||||
{
|
||||
struct hl_fpriv *hpriv = file_priv->driver_priv;
|
||||
enum hl_device_status status;
|
||||
union hl_mem_args *args = data;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
|
||||
hl_debugfs_remove_userptr(hdev, userptr);
|
||||
|
||||
if (userptr->dma_mapped)
|
||||
hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
|
||||
hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
|
||||
|
||||
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
|
||||
kvfree(userptr->pages);
|
||||
|
@ -63,6 +63,10 @@
|
||||
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
|
||||
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
|
||||
|
||||
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
|
||||
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
|
||||
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
|
||||
|
||||
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
|
||||
|
||||
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
|
||||
@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
|
||||
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
|
||||
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
|
||||
@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
|
||||
static int gaudi_scrub_device_mem(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
|
||||
min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
|
||||
u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
|
||||
u64 addr, size, val = hdev->memory_scrub_val;
|
||||
ktime_t timeout;
|
||||
int rc = 0;
|
||||
@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
|
||||
|
||||
list_add_tail(&userptr->job_node, parser->job_userptr_list);
|
||||
|
||||
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
|
||||
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
|
||||
goto unpin_memory;
|
||||
@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
|
||||
return rc;
|
||||
|
||||
if (!strlen(prop->cpucp_info.card_name))
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
|
||||
@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.asic_dma_pool_free = gaudi_dma_pool_free,
|
||||
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
|
||||
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
|
||||
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
|
||||
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
|
||||
.cs_parser = gaudi_cs_parser,
|
||||
.asic_dma_map_sgtable = hl_dma_map_sgtable,
|
||||
.dma_map_sgtable = hl_asic_dma_map_sgtable,
|
||||
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
|
||||
.update_eq_ci = gaudi_update_eq_ci,
|
||||
.context_switch = gaudi_context_switch,
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <uapi/drm/habanalabs_accel.h>
|
||||
#include "../common/habanalabs.h"
|
||||
#include "../include/common/hl_boot_if.h"
|
||||
#include <linux/habanalabs/hl_boot_if.h>
|
||||
#include "../include/gaudi/gaudi_packets.h"
|
||||
#include "../include/gaudi/gaudi.h"
|
||||
#include "../include/gaudi/gaudi_async_events.h"
|
||||
|
@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
|
||||
|
||||
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
|
||||
|
||||
val = RREG32(base_reg + 0x20);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
val = RREG32(base_reg + 0x304);
|
||||
val |= 0x1000;
|
||||
WREG32(base_reg + 0x304, val);
|
||||
@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
|
||||
|
||||
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
|
||||
|
||||
val = RREG32(mmPSOC_ETR_CTL);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
val = RREG32(mmPSOC_ETR_FFCR);
|
||||
val |= 0x1000;
|
||||
WREG32(mmPSOC_ETR_FFCR, val);
|
||||
|
@ -66,7 +66,6 @@
|
||||
#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
|
||||
#define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
|
||||
#define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
|
||||
#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
|
||||
#define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
|
||||
#define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
|
||||
#define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
|
||||
@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
|
||||
"sbte_prtn_intr_4",
|
||||
};
|
||||
|
||||
static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
|
||||
"i0",
|
||||
"i1",
|
||||
"i2",
|
||||
"i3",
|
||||
"i4",
|
||||
};
|
||||
|
||||
static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
|
||||
"WBC ERR RESP_0",
|
||||
"WBC ERR RESP_1",
|
||||
@ -993,6 +984,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
|
||||
"TLP is blocked by RR"
|
||||
};
|
||||
|
||||
static const int gaudi2_queue_id_to_engine_id[] = {
|
||||
[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
|
||||
[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_MME,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_MME,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_MME,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_MME,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_2,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_3,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_4,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_5,
|
||||
[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
|
||||
GAUDI2_DCORE0_ENGINE_ID_TPC_6,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_2,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_3,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_4,
|
||||
[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
|
||||
GAUDI2_DCORE1_ENGINE_ID_TPC_5,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_2,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_3,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_4,
|
||||
[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
|
||||
GAUDI2_DCORE2_ENGINE_ID_TPC_5,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_0,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_1,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_2,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_3,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_4,
|
||||
[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
|
||||
GAUDI2_DCORE3_ENGINE_ID_TPC_5,
|
||||
[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
|
||||
[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
|
||||
[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
|
||||
[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
|
||||
[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
|
||||
};
|
||||
|
||||
const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
|
||||
[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
|
||||
[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
|
||||
@ -2001,7 +2097,8 @@ enum razwi_event_sources {
|
||||
RAZWI_PDMA,
|
||||
RAZWI_NIC,
|
||||
RAZWI_DEC,
|
||||
RAZWI_ROT
|
||||
RAZWI_ROT,
|
||||
RAZWI_ARC_FARM
|
||||
};
|
||||
|
||||
struct hbm_mc_error_causes {
|
||||
@ -2431,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
|
||||
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
|
||||
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
|
||||
strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
|
||||
|
||||
prop->mme_master_slave_mode = 1;
|
||||
|
||||
@ -2884,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
if (!strlen(prop->cpucp_info.card_name))
|
||||
strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
|
||||
strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
/* Overwrite binning masks with the actual binning values from F/W */
|
||||
hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
|
||||
@ -4077,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
|
||||
return "gaudi2 unexpected error";
|
||||
case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
|
||||
return "gaudi2 user completion";
|
||||
case GAUDI2_IRQ_NUM_EQ_ERROR:
|
||||
return "gaudi2 eq error";
|
||||
default:
|
||||
return "invalid";
|
||||
}
|
||||
@ -4127,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
|
||||
rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
|
||||
gaudi2_irq_name(i), (void *) dec);
|
||||
} else {
|
||||
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
|
||||
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
|
||||
gaudi2_irq_name(i),
|
||||
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
|
||||
(void *) &hdev->user_interrupt[dec->core_id]);
|
||||
}
|
||||
|
||||
@ -4187,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
|
||||
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
|
||||
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
|
||||
gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
|
||||
rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
|
||||
gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
|
||||
&hdev->tpc_interrupt);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
|
||||
goto free_dec_irq;
|
||||
}
|
||||
|
||||
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
|
||||
rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
|
||||
gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
|
||||
rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
|
||||
gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
|
||||
&hdev->unexpected_error_interrupt);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
|
||||
@ -4209,16 +4307,23 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
|
||||
i++, j++, user_irq_init_cnt++) {
|
||||
|
||||
irq = pci_irq_vector(hdev->pdev, i);
|
||||
rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
|
||||
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
|
||||
gaudi2_irq_name(i), &hdev->user_interrupt[j]);
|
||||
|
||||
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
|
||||
&hdev->user_interrupt[j]);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
|
||||
goto free_user_irq;
|
||||
}
|
||||
}
|
||||
|
||||
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
|
||||
rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
|
||||
IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
|
||||
hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
|
||||
goto free_user_irq;
|
||||
}
|
||||
|
||||
gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
|
||||
|
||||
return 0;
|
||||
@ -4278,6 +4383,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
|
||||
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
|
||||
}
|
||||
|
||||
static void gaudi2_disable_msix(struct hl_device *hdev)
|
||||
@ -4314,6 +4420,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
|
||||
cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
|
||||
free_irq(irq, cq);
|
||||
|
||||
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
|
||||
free_irq(irq, hdev);
|
||||
|
||||
pci_free_irq_vectors(hdev->pdev);
|
||||
|
||||
gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
|
||||
@ -4716,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
|
||||
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
|
||||
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
|
||||
pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
|
||||
pre_fw_load->wait_for_preboot_extended_timeout =
|
||||
GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
|
||||
}
|
||||
|
||||
static void gaudi2_init_firmware_loader(struct hl_device *hdev)
|
||||
@ -6157,17 +6268,14 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
|
||||
static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
|
||||
u32 poll_timeout_us)
|
||||
{
|
||||
struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
|
||||
int rc = 0;
|
||||
int rc;
|
||||
|
||||
if (!driver_performs_reset) {
|
||||
if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
|
||||
/* set SP to indicate reset request sent to FW */
|
||||
if (dyn_regs->cpu_rst_status)
|
||||
WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
|
||||
else
|
||||
WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
|
||||
WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
|
||||
WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
|
||||
|
||||
WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
|
||||
gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
|
||||
|
||||
/* wait for f/w response */
|
||||
@ -6623,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
|
||||
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
|
||||
}
|
||||
|
||||
static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
|
||||
if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
|
||||
return 0;
|
||||
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
|
||||
}
|
||||
|
||||
static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
|
||||
{
|
||||
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
|
||||
@ -7703,11 +7793,13 @@ static inline bool is_info_event(u32 event)
|
||||
switch (event) {
|
||||
case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
|
||||
case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
|
||||
case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
|
||||
|
||||
/* return in case of NIC status event - these events are received periodically and not as
|
||||
* an indication to an error.
|
||||
*/
|
||||
case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
|
||||
case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -7739,21 +7831,34 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
|
||||
static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
|
||||
struct hl_eq_ecc_data *ecc_data)
|
||||
{
|
||||
u64 ecc_address = 0, ecc_syndrom = 0;
|
||||
u64 ecc_address = 0, ecc_syndrome = 0;
|
||||
u8 memory_wrapper_idx = 0;
|
||||
bool has_block_id = false;
|
||||
u16 block_id;
|
||||
|
||||
if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
|
||||
has_block_id = true;
|
||||
|
||||
ecc_address = le64_to_cpu(ecc_data->ecc_address);
|
||||
ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
|
||||
ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
|
||||
memory_wrapper_idx = ecc_data->memory_wrapper_idx;
|
||||
|
||||
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
|
||||
"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
|
||||
ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
|
||||
if (has_block_id) {
|
||||
block_id = le16_to_cpu(ecc_data->block_id);
|
||||
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
|
||||
"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
|
||||
ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
|
||||
ecc_data->is_critical);
|
||||
} else {
|
||||
gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
|
||||
"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
|
||||
ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
|
||||
}
|
||||
|
||||
return !!ecc_data->is_critical;
|
||||
}
|
||||
|
||||
static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
|
||||
static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
|
||||
{
|
||||
u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
|
||||
u64 cq_ptr, arc_cq_ptr, cp_current_inst;
|
||||
@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
|
||||
dev_info(hdev->dev,
|
||||
"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
|
||||
cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
|
||||
|
||||
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
|
||||
if (arc_cq_ptr) {
|
||||
hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
|
||||
hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
|
||||
} else {
|
||||
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
|
||||
hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
|
||||
}
|
||||
|
||||
hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
|
||||
}
|
||||
}
|
||||
|
||||
static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
|
||||
u64 qman_base, u32 qid_base)
|
||||
u64 qman_base, u32 qid_base, u64 *event_mask)
|
||||
{
|
||||
u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
|
||||
u64 glbl_sts_addr, arb_err_addr;
|
||||
@ -7812,8 +7929,22 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
|
||||
error_count++;
|
||||
}
|
||||
|
||||
if (i == QMAN_STREAMS)
|
||||
print_lower_qman_data_on_err(hdev, qman_base);
|
||||
if (i == QMAN_STREAMS && error_count) {
|
||||
/* check for undefined opcode */
|
||||
if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
|
||||
hdev->captured_err_info.undef_opcode.write_enable) {
|
||||
memset(&hdev->captured_err_info.undef_opcode, 0,
|
||||
sizeof(hdev->captured_err_info.undef_opcode));
|
||||
|
||||
hdev->captured_err_info.undef_opcode.write_enable = false;
|
||||
hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
|
||||
hdev->captured_err_info.undef_opcode.engine_id =
|
||||
gaudi2_queue_id_to_engine_id[qid_base];
|
||||
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
|
||||
}
|
||||
|
||||
handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
|
||||
}
|
||||
}
|
||||
|
||||
arb_err_val = RREG32(arb_err_addr);
|
||||
@ -7927,6 +8058,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
|
||||
case RAZWI_ROT:
|
||||
return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
|
||||
|
||||
case RAZWI_ARC_FARM:
|
||||
return GAUDI2_ENGINE_ID_ARC_FARM;
|
||||
|
||||
default:
|
||||
return GAUDI2_ENGINE_ID_SIZE;
|
||||
}
|
||||
@ -8036,6 +8170,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
|
||||
lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
|
||||
sprintf(initiator_name, "ROT_%u", module_idx);
|
||||
break;
|
||||
case RAZWI_ARC_FARM:
|
||||
lbw_rtr_id = DCORE1_RTR5;
|
||||
hbw_rtr_id = DCORE1_RTR7;
|
||||
sprintf(initiator_name, "ARC_FARM_%u", module_idx);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
@ -8149,11 +8288,11 @@ static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u
|
||||
eng_id[num_of_eng] = razwi_info[i].eng_id;
|
||||
base[num_of_eng] = razwi_info[i].rtr_ctrl;
|
||||
if (!num_of_eng)
|
||||
str_size += snprintf(eng_name + str_size,
|
||||
str_size += scnprintf(eng_name + str_size,
|
||||
PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
|
||||
razwi_info[i].eng_name);
|
||||
else
|
||||
str_size += snprintf(eng_name + str_size,
|
||||
str_size += scnprintf(eng_name + str_size,
|
||||
PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
|
||||
razwi_info[i].eng_name);
|
||||
num_of_eng++;
|
||||
@ -8475,7 +8614,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
|
||||
return 0;
|
||||
}
|
||||
|
||||
error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
|
||||
error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
|
||||
qid_base, event_mask);
|
||||
|
||||
/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
|
||||
if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
|
||||
@ -8488,7 +8628,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
|
||||
return error_count;
|
||||
}
|
||||
|
||||
static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
|
||||
static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
|
||||
{
|
||||
u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
|
||||
|
||||
@ -8510,6 +8650,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type
|
||||
sts_clr_val);
|
||||
}
|
||||
|
||||
gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
|
||||
hl_check_for_glbl_errors(hdev);
|
||||
|
||||
return error_count;
|
||||
@ -8649,21 +8790,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
|
||||
return error_count;
|
||||
}
|
||||
|
||||
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
|
||||
u64 intr_cause_data)
|
||||
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
int i, error_count = 0;
|
||||
|
||||
for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
|
||||
if (intr_cause_data & BIT(i)) {
|
||||
gaudi2_print_event(hdev, event_type, true,
|
||||
"err cause: %s", guadi2_mme_sbte_error_cause[i]);
|
||||
error_count++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have a single error cause here but the report mechanism is
|
||||
* buggy. Hence there is no good reason to fetch the cause so we
|
||||
* just check for glbl_errors and exit.
|
||||
*/
|
||||
hl_check_for_glbl_errors(hdev);
|
||||
|
||||
return error_count;
|
||||
return GAUDI2_NA_EVENT_CAUSE;
|
||||
}
|
||||
|
||||
static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
|
||||
@ -9460,6 +9596,176 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
|
||||
}
|
||||
}
|
||||
|
||||
static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
|
||||
{
|
||||
enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
|
||||
u16 index;
|
||||
|
||||
switch (event_type) {
|
||||
case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
|
||||
index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
|
||||
type = GAUDI2_BLOCK_TYPE_TPC;
|
||||
break;
|
||||
case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
|
||||
index = event_type - GAUDI2_EVENT_TPC0_QM;
|
||||
type = GAUDI2_BLOCK_TYPE_TPC;
|
||||
break;
|
||||
case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
|
||||
case GAUDI2_EVENT_MME0_QM:
|
||||
index = 0;
|
||||
type = GAUDI2_BLOCK_TYPE_MME;
|
||||
break;
|
||||
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
|
||||
case GAUDI2_EVENT_MME1_QM:
|
||||
index = 1;
|
||||
type = GAUDI2_BLOCK_TYPE_MME;
|
||||
break;
|
||||
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
|
||||
case GAUDI2_EVENT_MME2_QM:
|
||||
index = 2;
|
||||
type = GAUDI2_BLOCK_TYPE_MME;
|
||||
break;
|
||||
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
|
||||
case GAUDI2_EVENT_MME3_QM:
|
||||
index = 3;
|
||||
type = GAUDI2_BLOCK_TYPE_MME;
|
||||
break;
|
||||
case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
|
||||
case GAUDI2_EVENT_KDMA_BM_SPMU:
|
||||
case GAUDI2_EVENT_KDMA0_CORE:
|
||||
return GAUDI2_ENGINE_ID_KDMA;
|
||||
case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
|
||||
case GAUDI2_EVENT_PDMA0_CORE:
|
||||
case GAUDI2_EVENT_PDMA0_BM_SPMU:
|
||||
case GAUDI2_EVENT_PDMA0_QM:
|
||||
return GAUDI2_ENGINE_ID_PDMA_0;
|
||||
case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
|
||||
case GAUDI2_EVENT_PDMA1_CORE:
|
||||
case GAUDI2_EVENT_PDMA1_BM_SPMU:
|
||||
case GAUDI2_EVENT_PDMA1_QM:
|
||||
return GAUDI2_ENGINE_ID_PDMA_1;
|
||||
case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
|
||||
index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
|
||||
type = GAUDI2_BLOCK_TYPE_DEC;
|
||||
break;
|
||||
case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
|
||||
index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
|
||||
type = GAUDI2_BLOCK_TYPE_DEC;
|
||||
break;
|
||||
case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
|
||||
index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
|
||||
return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
|
||||
case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
|
||||
index = event_type - GAUDI2_EVENT_NIC0_QM0;
|
||||
return GAUDI2_ENGINE_ID_NIC0_0 + index;
|
||||
case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
|
||||
index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
|
||||
return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
|
||||
case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
|
||||
index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
|
||||
type = GAUDI2_BLOCK_TYPE_TPC;
|
||||
break;
|
||||
case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
|
||||
case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
|
||||
return GAUDI2_ENGINE_ID_ROT_0;
|
||||
case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
|
||||
case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
|
||||
case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
|
||||
return GAUDI2_ENGINE_ID_ROT_1;
|
||||
case GAUDI2_EVENT_HDMA0_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA0_QM:
|
||||
case GAUDI2_EVENT_HDMA0_CORE:
|
||||
return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
|
||||
case GAUDI2_EVENT_HDMA1_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA1_QM:
|
||||
case GAUDI2_EVENT_HDMA1_CORE:
|
||||
return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
|
||||
case GAUDI2_EVENT_HDMA2_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA2_QM:
|
||||
case GAUDI2_EVENT_HDMA2_CORE:
|
||||
return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
|
||||
case GAUDI2_EVENT_HDMA3_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA3_QM:
|
||||
case GAUDI2_EVENT_HDMA3_CORE:
|
||||
return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
|
||||
case GAUDI2_EVENT_HDMA4_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA4_QM:
|
||||
case GAUDI2_EVENT_HDMA4_CORE:
|
||||
return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
|
||||
case GAUDI2_EVENT_HDMA5_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA5_QM:
|
||||
case GAUDI2_EVENT_HDMA5_CORE:
|
||||
return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
|
||||
case GAUDI2_EVENT_HDMA6_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA6_QM:
|
||||
case GAUDI2_EVENT_HDMA6_CORE:
|
||||
return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
|
||||
case GAUDI2_EVENT_HDMA7_BM_SPMU:
|
||||
case GAUDI2_EVENT_HDMA7_QM:
|
||||
case GAUDI2_EVENT_HDMA7_CORE:
|
||||
return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case GAUDI2_BLOCK_TYPE_TPC:
|
||||
switch (index) {
|
||||
case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
|
||||
return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
|
||||
case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
|
||||
return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
|
||||
case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
|
||||
return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
|
||||
case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
|
||||
return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case GAUDI2_BLOCK_TYPE_MME:
|
||||
switch (index) {
|
||||
case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
|
||||
case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
|
||||
case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
|
||||
case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case GAUDI2_BLOCK_TYPE_DEC:
|
||||
switch (index) {
|
||||
case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
|
||||
case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
|
||||
case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
|
||||
case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
|
||||
case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
|
||||
case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
|
||||
case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
|
||||
case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
|
||||
case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
|
||||
case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return U16_MAX;
|
||||
}
|
||||
|
||||
static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
|
||||
{
|
||||
hdev->eq_heartbeat_received = true;
|
||||
}
|
||||
|
||||
static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
{
|
||||
struct gaudi2_device *gaudi2 = hdev->asic_specific;
|
||||
@ -9501,7 +9807,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
break;
|
||||
|
||||
case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
|
||||
error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
|
||||
error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
|
||||
@ -9724,8 +10030,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
|
||||
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
|
||||
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
|
||||
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
|
||||
le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
|
||||
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
|
||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||
break;
|
||||
case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
|
||||
@ -9875,6 +10180,21 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
is_critical = true;
|
||||
break;
|
||||
|
||||
case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
|
||||
case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
|
||||
case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
|
||||
case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
|
||||
case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
|
||||
case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
|
||||
error_count = GAUDI2_NA_EVENT_CAUSE;
|
||||
dev_info_ratelimited(hdev->dev, "%s event received\n",
|
||||
gaudi2_irq_map_table[event_type].name);
|
||||
break;
|
||||
|
||||
case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
|
||||
hl_eq_heartbeat_event_handle(hdev);
|
||||
error_count = GAUDI2_NA_EVENT_CAUSE;
|
||||
break;
|
||||
default:
|
||||
if (gaudi2_irq_map_table[event_type].valid) {
|
||||
dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
|
||||
@ -9883,6 +10203,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
}
|
||||
}
|
||||
|
||||
if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
|
||||
hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
|
||||
|
||||
/* Make sure to dump an error in case no error cause was printed so far.
|
||||
* Note that although we have counted the errors, we use this number as
|
||||
* a boolean.
|
||||
@ -10523,6 +10846,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ctx->asid == HL_KERNEL_ASID_ID)
|
||||
return 0;
|
||||
|
||||
rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
|
||||
if (rc)
|
||||
return rc;
|
||||
@ -11014,6 +11340,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
|
||||
static void gaudi2_get_msi_info(__le32 *table)
|
||||
{
|
||||
table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
|
||||
table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
|
||||
}
|
||||
|
||||
static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
|
||||
@ -11170,11 +11497,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
|
||||
.asic_dma_pool_free = gaudi2_dma_pool_free,
|
||||
.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
|
||||
.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
|
||||
.asic_dma_unmap_single = gaudi2_dma_unmap_single,
|
||||
.asic_dma_map_single = gaudi2_dma_map_single,
|
||||
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
|
||||
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
|
||||
.cs_parser = gaudi2_cs_parser,
|
||||
.asic_dma_map_sgtable = hl_dma_map_sgtable,
|
||||
.dma_map_sgtable = hl_asic_dma_map_sgtable,
|
||||
.add_end_of_cb_packets = NULL,
|
||||
.update_eq_ci = gaudi2_update_eq_ci,
|
||||
.context_switch = gaudi2_context_switch,
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <uapi/drm/habanalabs_accel.h>
|
||||
#include "../common/habanalabs.h"
|
||||
#include "../include/common/hl_boot_if.h"
|
||||
#include <linux/habanalabs/hl_boot_if.h>
|
||||
#include "../include/gaudi2/gaudi2.h"
|
||||
#include "../include/gaudi2/gaudi2_packets.h"
|
||||
#include "../include/gaudi2/gaudi2_fw_if.h"
|
||||
@ -84,6 +84,7 @@
|
||||
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
|
||||
|
||||
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
|
||||
#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */
|
||||
|
||||
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
|
||||
|
||||
@ -419,6 +420,7 @@ enum gaudi2_irq_num {
|
||||
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
|
||||
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
|
||||
GAUDI2_IRQ_NUM_TPC_ASSERT,
|
||||
GAUDI2_IRQ_NUM_EQ_ERROR,
|
||||
GAUDI2_IRQ_NUM_RESERVED_FIRST,
|
||||
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
|
||||
GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
|
||||
|
@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
|
||||
[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
|
||||
[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
|
||||
[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
|
||||
[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
|
||||
[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
|
||||
[GAUDI2_STM_PSOC_ARC0_CS] = 0,
|
||||
[GAUDI2_STM_PSOC_ARC1_CS] = 0,
|
||||
[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
|
||||
[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
|
||||
[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
|
||||
@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
|
||||
[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
|
||||
[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
|
||||
[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
|
||||
[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
|
||||
[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
|
||||
[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
|
||||
[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
|
||||
[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
|
||||
[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
|
||||
[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
|
||||
@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
|
||||
[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
|
||||
[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
|
||||
[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
|
||||
[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
|
||||
@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
|
||||
[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
|
||||
[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
|
||||
[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
|
||||
[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
|
||||
[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
|
||||
[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
|
||||
[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
|
||||
[GAUDI2_BMON_PSOC_ARC0_0] = 0,
|
||||
[GAUDI2_BMON_PSOC_ARC0_1] = 0,
|
||||
[GAUDI2_BMON_PSOC_ARC1_0] = 0,
|
||||
[GAUDI2_BMON_PSOC_ARC1_1] = 0,
|
||||
[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
|
||||
[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
|
||||
[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
|
||||
@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
|
||||
[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
|
||||
[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
|
||||
[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
|
||||
[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
|
||||
@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
|
||||
if (rc)
|
||||
return -EIO;
|
||||
|
||||
val = RREG32(base_reg + mmETF_CTL_OFFSET);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
val = RREG32(base_reg + mmETF_FFCR_OFFSET);
|
||||
val |= 0x1000;
|
||||
WREG32(base_reg + mmETF_FFCR_OFFSET, val);
|
||||
@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
|
||||
if (!input)
|
||||
return -EINVAL;
|
||||
|
||||
val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
|
||||
if (val) {
|
||||
val = ffs(val);
|
||||
WREG32(base_reg + mmETF_PSCR_OFFSET, val);
|
||||
} else {
|
||||
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
|
||||
}
|
||||
|
||||
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
|
||||
WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
|
||||
WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
|
||||
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
|
||||
WREG32(base_reg + mmETF_CTL_OFFSET, 1);
|
||||
} else {
|
||||
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
|
||||
@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
if (rc)
|
||||
return -EIO;
|
||||
|
||||
val = RREG32(mmPSOC_ETR_CTL);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
val = RREG32(mmPSOC_ETR_FFCR);
|
||||
val |= 0x1000;
|
||||
WREG32(mmPSOC_ETR_FFCR, val);
|
||||
@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
|
||||
* set enabled events mask based on input->event_types_num
|
||||
*/
|
||||
event_mask = 0x80000000;
|
||||
event_mask |= GENMASK(input->event_types_num, 0);
|
||||
if (input->event_types_num)
|
||||
event_mask |= GENMASK(input->event_types_num - 1, 0);
|
||||
|
||||
WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
|
||||
} else {
|
||||
|
@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
|
||||
mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
|
||||
mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
|
||||
mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
|
||||
mmDCORE0_TPC0_CFG_TPC_COUNT,
|
||||
mmDCORE0_TPC0_CFG_TPC_ID,
|
||||
mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
|
||||
mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
|
||||
@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
|
||||
* - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
|
||||
*
|
||||
* If F/W security is not enabled:
|
||||
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
|
||||
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
|
||||
*/
|
||||
u64 lbw_range_min_short[] = {
|
||||
mmNIC0_TX_AXUSER_BASE,
|
||||
@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
|
||||
mmNIC10_TX_AXUSER_BASE,
|
||||
mmNIC11_TX_AXUSER_BASE,
|
||||
mmPSOC_I2C_M0_BASE,
|
||||
mmPSOC_EFUSE_BASE
|
||||
mmPSOC_GPIO0_BASE
|
||||
};
|
||||
u64 lbw_range_max_short[] = {
|
||||
mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
|
||||
@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
|
||||
*/
|
||||
static int gaudi2_init_protection_bits(struct hl_device *hdev)
|
||||
{
|
||||
u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 instance_offset;
|
||||
int rc = 0;
|
||||
@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
|
||||
/* PSOC.
|
||||
* Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
|
||||
* protected by privileged RR.
|
||||
* For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
|
||||
* cores to raise events towards F/W.
|
||||
*/
|
||||
engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
|
||||
if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
|
||||
engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
|
||||
user_regs_array = &engine_core_intr_reg;
|
||||
user_regs_array_size = 1;
|
||||
} else {
|
||||
dev_err(hdev->dev,
|
||||
"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
|
||||
engine_core_intr_reg);
|
||||
}
|
||||
|
||||
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
|
||||
HL_PB_SINGLE_INSTANCE, HL_PB_NA,
|
||||
gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
|
||||
NULL, HL_PB_NA);
|
||||
user_regs_array, user_regs_array_size);
|
||||
|
||||
if (!hdev->asic_prop.fw_security_enabled)
|
||||
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
|
||||
|
@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
||||
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
|
||||
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
|
||||
|
||||
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
|
||||
@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
|
||||
|
||||
list_add_tail(&userptr->job_node, parser->job_userptr_list);
|
||||
|
||||
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
|
||||
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
|
||||
goto unpin_memory;
|
||||
@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
|
||||
}
|
||||
|
||||
if (!strlen(prop->cpucp_info.card_name))
|
||||
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
|
||||
CARD_NAME_MAX_LEN);
|
||||
|
||||
return 0;
|
||||
@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.asic_dma_pool_free = goya_dma_pool_free,
|
||||
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
|
||||
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
|
||||
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
|
||||
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
|
||||
.cs_parser = goya_cs_parser,
|
||||
.asic_dma_map_sgtable = hl_dma_map_sgtable,
|
||||
.dma_map_sgtable = hl_asic_dma_map_sgtable,
|
||||
.add_end_of_cb_packets = goya_add_end_of_cb_packets,
|
||||
.update_eq_ci = goya_update_eq_ci,
|
||||
.context_switch = goya_context_switch,
|
||||
|
@ -9,8 +9,8 @@
|
||||
#define GOYAP_H_
|
||||
|
||||
#include <uapi/drm/habanalabs_accel.h>
|
||||
#include <linux/habanalabs/hl_boot_if.h>
|
||||
#include "../common/habanalabs.h"
|
||||
#include "../include/common/hl_boot_if.h"
|
||||
#include "../include/goya/goya_packets.h"
|
||||
#include "../include/goya/goya.h"
|
||||
#include "../include/goya/goya_async_events.h"
|
||||
|
@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,
|
||||
|
||||
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
|
||||
|
||||
val = RREG32(base_reg + 0x20);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
val = RREG32(base_reg + 0x304);
|
||||
val |= 0x1000;
|
||||
WREG32(base_reg + 0x304, val);
|
||||
@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,
|
||||
|
||||
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
|
||||
|
||||
val = RREG32(mmPSOC_ETR_CTL);
|
||||
|
||||
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
|
||||
return 0;
|
||||
|
||||
val = RREG32(mmPSOC_ETR_FFCR);
|
||||
val |= 0x1000;
|
||||
WREG32(mmPSOC_ETR_FFCR, val);
|
||||
|
@ -44,38 +44,6 @@ struct eq_nic_sei_event {
|
||||
__u8 pad[6];
|
||||
};
|
||||
|
||||
/*
|
||||
* struct gaudi_nic_status - describes the status of a NIC port.
|
||||
* @port: NIC port index.
|
||||
* @bad_format_cnt: e.g. CRC.
|
||||
* @responder_out_of_sequence_psn_cnt: e.g NAK.
|
||||
* @high_ber_reinit_cnt: link reinit due to high BER.
|
||||
* @correctable_err_cnt: e.g. bit-flip.
|
||||
* @uncorrectable_err_cnt: e.g. MAC errors.
|
||||
* @retraining_cnt: re-training counter.
|
||||
* @up: is port up.
|
||||
* @pcs_link: has PCS link.
|
||||
* @phy_ready: is PHY ready.
|
||||
* @auto_neg: is Autoneg enabled.
|
||||
* @timeout_retransmission_cnt: timeout retransmission events
|
||||
* @high_ber_cnt: high ber events
|
||||
*/
|
||||
struct gaudi_nic_status {
|
||||
__u32 port;
|
||||
__u32 bad_format_cnt;
|
||||
__u32 responder_out_of_sequence_psn_cnt;
|
||||
__u32 high_ber_reinit;
|
||||
__u32 correctable_err_cnt;
|
||||
__u32 uncorrectable_err_cnt;
|
||||
__u32 retraining_cnt;
|
||||
__u8 up;
|
||||
__u8 pcs_link;
|
||||
__u8 phy_ready;
|
||||
__u8 auto_neg;
|
||||
__u32 timeout_retransmission_cnt;
|
||||
__u32 high_ber_cnt;
|
||||
};
|
||||
|
||||
struct gaudi_cold_rst_data {
|
||||
union {
|
||||
struct {
|
||||
|
@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
|
||||
GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
|
||||
GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
|
||||
GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
|
||||
GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
|
||||
GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
|
||||
GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
|
||||
GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
|
||||
GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
|
||||
GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
|
||||
GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
|
||||
GAUDI2_EVENT_SIZE,
|
||||
};
|
||||
|
||||
|
@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
|
||||
.name = "" },
|
||||
{ .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "PCIE_P2P_MSIX" },
|
||||
{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
|
||||
{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
|
||||
.name = "PCIE_DRAIN_COMPLETE" },
|
||||
{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "TPC0_BMON_SPMU" },
|
||||
@ -2673,6 +2673,20 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
|
||||
.name = "FP32_NOT_SUPPORTED" },
|
||||
{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
|
||||
.name = "DEV_RESET_REQ" },
|
||||
{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_BRK_ENTRY" },
|
||||
{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_BRK_EXT" },
|
||||
{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_RD_MODE0" },
|
||||
{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_RD_MODE1" },
|
||||
{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_RD_MODE2" },
|
||||
{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_PWR_RD_MODE3" },
|
||||
{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
|
||||
.name = "ARC_EQ_HEARTBEAT" },
|
||||
};
|
||||
|
||||
#endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
|
||||
|
@ -2,7 +2,6 @@
|
||||
# Copyright (C) 2023 Intel Corporation
|
||||
|
||||
intel_vpu-y := \
|
||||
ivpu_debugfs.o \
|
||||
ivpu_drv.o \
|
||||
ivpu_fw.o \
|
||||
ivpu_fw_log.o \
|
||||
@ -16,4 +15,6 @@ intel_vpu-y := \
|
||||
ivpu_mmu_context.o \
|
||||
ivpu_pm.o
|
||||
|
||||
intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
|
||||
|
||||
obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
|
||||
|
@ -1,11 +0,0 @@
|
||||
- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
|
||||
- Implement support for BLOB IDs
|
||||
- Add debugfs support to improve debugging and testing
|
||||
- Add tracing events for performance debugging
|
||||
- Implement HW based scheduling support
|
||||
- Use syncobjs for submit/sync
|
||||
- Refactor IPC protocol to improve message latency
|
||||
- Implement BO cache and MADVISE IOCTL
|
||||
- Add support for user allocated buffers using prime import and dma-buf heaps
|
||||
- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
|
||||
- Add driver/device documentation
|
@ -17,20 +17,26 @@
|
||||
#include "ivpu_jsm_msg.h"
|
||||
#include "ivpu_pm.h"
|
||||
|
||||
static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s)
|
||||
{
|
||||
struct drm_debugfs_entry *entry = s->private;
|
||||
|
||||
return to_ivpu_device(entry->dev);
|
||||
}
|
||||
|
||||
static int bo_list_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct drm_printer p = drm_seq_file_printer(s);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
ivpu_bo_list(node->minor->dev, &p);
|
||||
ivpu_bo_list(&vdev->drm, &p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fw_name_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
seq_printf(s, "%s\n", vdev->fw->name);
|
||||
return 0;
|
||||
@ -38,8 +44,7 @@ static int fw_name_show(struct seq_file *s, void *v)
|
||||
|
||||
static int fw_trace_capability_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
u64 trace_hw_component_mask;
|
||||
u32 trace_destination_mask;
|
||||
int ret;
|
||||
@ -57,8 +62,7 @@ static int fw_trace_capability_show(struct seq_file *s, void *v)
|
||||
|
||||
static int fw_trace_config_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
/**
|
||||
* WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet,
|
||||
* so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config()
|
||||
@ -78,8 +82,7 @@ static int fw_trace_config_show(struct seq_file *s, void *v)
|
||||
|
||||
static int last_bootmode_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot");
|
||||
|
||||
@ -88,8 +91,7 @@ static int last_bootmode_show(struct seq_file *s, void *v)
|
||||
|
||||
static int reset_counter_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter));
|
||||
return 0;
|
||||
@ -97,14 +99,13 @@ static int reset_counter_show(struct seq_file *s, void *v)
|
||||
|
||||
static int reset_pending_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct drm_info_node *node = (struct drm_info_node *)s->private;
|
||||
struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
|
||||
struct ivpu_device *vdev = seq_to_ivpu(s);
|
||||
|
||||
seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct drm_info_list vdev_debugfs_list[] = {
|
||||
static const struct drm_debugfs_info vdev_debugfs_list[] = {
|
||||
{"bo_list", bo_list_show, 0},
|
||||
{"fw_name", fw_name_show, 0},
|
||||
{"fw_trace_capability", fw_trace_capability_show, 0},
|
||||
@ -270,25 +271,24 @@ static const struct file_operations ivpu_reset_engine_fops = {
|
||||
.write = ivpu_reset_engine_fn,
|
||||
};
|
||||
|
||||
void ivpu_debugfs_init(struct drm_minor *minor)
|
||||
void ivpu_debugfs_init(struct ivpu_device *vdev)
|
||||
{
|
||||
struct ivpu_device *vdev = to_ivpu_device(minor->dev);
|
||||
struct dentry *debugfs_root = vdev->drm.debugfs_root;
|
||||
|
||||
drm_debugfs_create_files(vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list),
|
||||
minor->debugfs_root, minor);
|
||||
drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list));
|
||||
|
||||
debugfs_create_file("force_recovery", 0200, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
|
||||
&ivpu_force_recovery_fops);
|
||||
|
||||
debugfs_create_file("fw_log", 0644, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
|
||||
&fw_log_fops);
|
||||
debugfs_create_file("fw_trace_destination_mask", 0200, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
|
||||
&fw_trace_destination_mask_fops);
|
||||
debugfs_create_file("fw_trace_hw_comp_mask", 0200, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev,
|
||||
&fw_trace_hw_comp_mask_fops);
|
||||
debugfs_create_file("fw_trace_level", 0200, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
|
||||
&fw_trace_level_fops);
|
||||
|
||||
debugfs_create_file("reset_engine", 0200, minor->debugfs_root, vdev,
|
||||
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
|
||||
&ivpu_reset_engine_fops);
|
||||
}
|
||||
|
@ -6,8 +6,12 @@
|
||||
#ifndef __IVPU_DEBUGFS_H__
|
||||
#define __IVPU_DEBUGFS_H__
|
||||
|
||||
struct drm_minor;
|
||||
struct ivpu_device;
|
||||
|
||||
void ivpu_debugfs_init(struct drm_minor *minor);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
void ivpu_debugfs_init(struct ivpu_device *vdev);
|
||||
#else
|
||||
static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { }
|
||||
#endif
|
||||
|
||||
#endif /* __IVPU_DEBUGFS_H__ */
|
||||
|
@ -131,6 +131,22 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ivpu_rpm_get_if_active(vdev);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
*clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
|
||||
|
||||
if (ret)
|
||||
ivpu_rpm_put(vdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
|
||||
{
|
||||
struct ivpu_file_priv *file_priv = file->driver_priv;
|
||||
@ -154,7 +170,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
|
||||
args->value = vdev->platform;
|
||||
break;
|
||||
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
|
||||
args->value = ivpu_hw_reg_pll_freq_get(vdev);
|
||||
ret = ivpu_get_core_clock_rate(vdev, &args->value);
|
||||
break;
|
||||
case DRM_IVPU_PARAM_NUM_CONTEXTS:
|
||||
args->value = ivpu_get_context_count(vdev);
|
||||
@ -400,10 +416,6 @@ static const struct drm_driver driver = {
|
||||
.postclose = ivpu_postclose,
|
||||
.gem_prime_import = ivpu_gem_prime_import,
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
.debugfs_init = ivpu_debugfs_init,
|
||||
#endif
|
||||
|
||||
.ioctls = ivpu_drm_ioctls,
|
||||
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
|
||||
.fops = &ivpu_fops,
|
||||
@ -523,78 +535,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
|
||||
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
|
||||
|
||||
ret = ivpu_pci_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_xa_destroy;
|
||||
}
|
||||
|
||||
ret = ivpu_irq_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_xa_destroy;
|
||||
}
|
||||
|
||||
/* Init basic HW info based on buttress registers which are accessible before power up */
|
||||
ret = ivpu_hw_info_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_xa_destroy;
|
||||
}
|
||||
|
||||
/* Power up early so the rest of init code can access VPU registers */
|
||||
ret = ivpu_hw_power_up(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_xa_destroy;
|
||||
}
|
||||
|
||||
ret = ivpu_mmu_global_context_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_power_down;
|
||||
}
|
||||
|
||||
ret = ivpu_mmu_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_mmu_gctx_fini;
|
||||
|
||||
ret = ivpu_mmu_reserved_context_init(vdev);
|
||||
if (ret)
|
||||
goto err_mmu_gctx_fini;
|
||||
}
|
||||
|
||||
ret = ivpu_fw_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
|
||||
goto err_mmu_gctx_fini;
|
||||
}
|
||||
if (ret)
|
||||
goto err_mmu_rctx_fini;
|
||||
|
||||
ret = ivpu_ipc_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_fw_fini;
|
||||
}
|
||||
|
||||
ret = ivpu_pm_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
|
||||
goto err_ipc_fini;
|
||||
}
|
||||
ivpu_pm_init(vdev);
|
||||
|
||||
ret = ivpu_job_done_thread_init(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_ipc_fini;
|
||||
}
|
||||
|
||||
ret = ivpu_fw_load(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
|
||||
goto err_job_done_thread_fini;
|
||||
}
|
||||
|
||||
ret = ivpu_boot(vdev);
|
||||
if (ret) {
|
||||
ivpu_err(vdev, "Failed to boot: %d\n", ret);
|
||||
if (ret)
|
||||
goto err_job_done_thread_fini;
|
||||
}
|
||||
|
||||
ivpu_pm_enable(vdev);
|
||||
|
||||
@ -606,6 +592,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
|
||||
ivpu_ipc_fini(vdev);
|
||||
err_fw_fini:
|
||||
ivpu_fw_fini(vdev);
|
||||
err_mmu_rctx_fini:
|
||||
ivpu_mmu_reserved_context_fini(vdev);
|
||||
err_mmu_gctx_fini:
|
||||
ivpu_mmu_global_context_fini(vdev);
|
||||
err_power_down:
|
||||
@ -629,6 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
|
||||
|
||||
ivpu_ipc_fini(vdev);
|
||||
ivpu_fw_fini(vdev);
|
||||
ivpu_mmu_reserved_context_fini(vdev);
|
||||
ivpu_mmu_global_context_fini(vdev);
|
||||
|
||||
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
|
||||
@ -657,10 +646,10 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
pci_set_drvdata(pdev, vdev);
|
||||
|
||||
ret = ivpu_dev_init(vdev);
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ivpu_debugfs_init(vdev);
|
||||
|
||||
ret = drm_dev_register(&vdev->drm, 0);
|
||||
if (ret) {
|
||||
|
@ -29,12 +29,13 @@
|
||||
#define IVPU_HW_37XX 37
|
||||
#define IVPU_HW_40XX 40
|
||||
|
||||
#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
|
||||
/* SSID 1 is used by the VPU to represent invalid context */
|
||||
#define IVPU_USER_CONTEXT_MIN_SSID 2
|
||||
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
|
||||
#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
|
||||
/* SSID 1 is used by the VPU to represent reserved context */
|
||||
#define IVPU_RESERVED_CONTEXT_MMU_SSID 1
|
||||
#define IVPU_USER_CONTEXT_MIN_SSID 2
|
||||
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
|
||||
|
||||
#define IVPU_NUM_ENGINES 2
|
||||
#define IVPU_NUM_ENGINES 2
|
||||
|
||||
#define IVPU_PLATFORM_SILICON 0
|
||||
#define IVPU_PLATFORM_SIMICS 2
|
||||
@ -76,6 +77,11 @@
|
||||
|
||||
#define IVPU_WA(wa_name) (vdev->wa.wa_name)
|
||||
|
||||
#define IVPU_PRINT_WA(wa_name) do { \
|
||||
if (IVPU_WA(wa_name)) \
|
||||
ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n"); \
|
||||
} while (0)
|
||||
|
||||
struct ivpu_wa_table {
|
||||
bool punit_disabled;
|
||||
bool clear_runtime_mem;
|
||||
@ -105,6 +111,7 @@ struct ivpu_device {
|
||||
struct ivpu_pm_info *pm;
|
||||
|
||||
struct ivpu_mmu_context gctx;
|
||||
struct ivpu_mmu_context rctx;
|
||||
struct xarray context_xa;
|
||||
struct xa_limit context_xa_limit;
|
||||
|
||||
@ -118,6 +125,7 @@ struct ivpu_device {
|
||||
int jsm;
|
||||
int tdr;
|
||||
int reschedule_suspend;
|
||||
int autosuspend;
|
||||
} timeout;
|
||||
};
|
||||
|
||||
|
@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
|
||||
if (ret)
|
||||
goto err_fw_release;
|
||||
|
||||
ivpu_fw_load(vdev);
|
||||
|
||||
return 0;
|
||||
|
||||
err_fw_release:
|
||||
@ -314,25 +316,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev)
|
||||
ivpu_fw_release(vdev);
|
||||
}
|
||||
|
||||
int ivpu_fw_load(struct ivpu_device *vdev)
|
||||
void ivpu_fw_load(struct ivpu_device *vdev)
|
||||
{
|
||||
struct ivpu_fw_info *fw = vdev->fw;
|
||||
u64 image_end_offset = fw->image_load_offset + fw->image_size;
|
||||
|
||||
memset(fw->mem->kvaddr, 0, fw->image_load_offset);
|
||||
memcpy(fw->mem->kvaddr + fw->image_load_offset,
|
||||
memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset);
|
||||
memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset,
|
||||
fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);
|
||||
|
||||
if (IVPU_WA(clear_runtime_mem)) {
|
||||
u8 *start = fw->mem->kvaddr + image_end_offset;
|
||||
u64 size = fw->mem->base.size - image_end_offset;
|
||||
u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset;
|
||||
u64 size = ivpu_bo_size(fw->mem) - image_end_offset;
|
||||
|
||||
memset(start, 0, size);
|
||||
}
|
||||
|
||||
wmb(); /* Flush WC buffers after writing fw->mem */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
|
||||
@ -451,10 +451,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
|
||||
vdev->hw->ranges.global.start;
|
||||
|
||||
boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
|
||||
boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
|
||||
boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
|
||||
|
||||
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
|
||||
boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
|
||||
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
|
||||
boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
|
||||
|
||||
boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
|
||||
boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
|
||||
@ -486,9 +486,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
|
||||
boot_params->trace_destination_mask = vdev->fw->trace_destination_mask;
|
||||
boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask;
|
||||
boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr;
|
||||
boot_params->crit_tracing_buff_size = vdev->fw->mem_log_crit->base.size;
|
||||
boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit);
|
||||
boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr;
|
||||
boot_params->verbose_tracing_buff_size = vdev->fw->mem_log_verb->base.size;
|
||||
boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb);
|
||||
|
||||
boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
|
||||
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
|
||||
|
@ -31,7 +31,7 @@ struct ivpu_fw_info {
|
||||
|
||||
int ivpu_fw_init(struct ivpu_device *vdev);
|
||||
void ivpu_fw_fini(struct ivpu_device *vdev);
|
||||
int ivpu_fw_load(struct ivpu_device *vdev);
|
||||
void ivpu_fw_load(struct ivpu_device *vdev);
|
||||
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
|
||||
|
||||
static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
|
||||
|
@ -31,10 +31,10 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
|
||||
{
|
||||
struct vpu_tracing_buffer_header *log;
|
||||
|
||||
if ((*offset + sizeof(*log)) > bo->base.size)
|
||||
if ((*offset + sizeof(*log)) > ivpu_bo_size(bo))
|
||||
return -EINVAL;
|
||||
|
||||
log = bo->kvaddr + *offset;
|
||||
log = ivpu_bo_vaddr(bo) + *offset;
|
||||
|
||||
if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY)
|
||||
return -EINVAL;
|
||||
@ -43,7 +43,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
|
||||
ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
if ((char *)log + log->size > (char *)bo->kvaddr + bo->base.size) {
|
||||
if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) {
|
||||
ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ static const struct ivpu_bo_ops prime_ops = {
|
||||
|
||||
static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
|
||||
{
|
||||
int npages = bo->base.size >> PAGE_SHIFT;
|
||||
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
|
||||
struct page **pages;
|
||||
|
||||
pages = drm_gem_get_pages(&bo->base);
|
||||
@ -88,7 +88,7 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
|
||||
static void shmem_free_pages_locked(struct ivpu_bo *bo)
|
||||
{
|
||||
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
|
||||
set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
|
||||
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
|
||||
|
||||
drm_gem_put_pages(&bo->base, bo->pages, true, false);
|
||||
bo->pages = NULL;
|
||||
@ -96,7 +96,7 @@ static void shmem_free_pages_locked(struct ivpu_bo *bo)
|
||||
|
||||
static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
|
||||
{
|
||||
int npages = bo->base.size >> PAGE_SHIFT;
|
||||
int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
|
||||
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
|
||||
struct sg_table *sgt;
|
||||
int ret;
|
||||
@ -142,7 +142,7 @@ static const struct ivpu_bo_ops shmem_ops = {
|
||||
|
||||
static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
|
||||
{
|
||||
unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
|
||||
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
|
||||
struct page **pages;
|
||||
int ret;
|
||||
|
||||
@ -171,10 +171,10 @@ static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
|
||||
|
||||
static void internal_free_pages_locked(struct ivpu_bo *bo)
|
||||
{
|
||||
unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
|
||||
unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
|
||||
|
||||
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
|
||||
set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
|
||||
set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
|
||||
|
||||
for (i = 0; i < npages; i++)
|
||||
put_page(bo->pages[i]);
|
||||
@ -291,7 +291,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->lock);
|
||||
ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
|
||||
ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
|
||||
if (!ret) {
|
||||
bo->ctx = ctx;
|
||||
bo->vpu_addr = bo->mm_node.start;
|
||||
@ -438,7 +438,7 @@ static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
|
||||
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
|
||||
|
||||
ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
|
||||
bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
|
||||
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
|
||||
|
||||
if (obj->import_attach) {
|
||||
/* Drop the reference drm_gem_mmap_obj() acquired.*/
|
||||
@ -553,7 +553,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
|
||||
drm_gem_object_put(&bo->base);
|
||||
|
||||
ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
|
||||
file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
|
||||
file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -590,22 +590,22 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
|
||||
goto err_put;
|
||||
|
||||
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
|
||||
drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
|
||||
drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
|
||||
|
||||
if (bo->flags & DRM_IVPU_BO_WC)
|
||||
set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
|
||||
set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
|
||||
else if (bo->flags & DRM_IVPU_BO_UNCACHED)
|
||||
set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
|
||||
set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
|
||||
|
||||
prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
|
||||
bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
|
||||
bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
|
||||
if (!bo->kvaddr) {
|
||||
ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
|
||||
bo->vpu_addr, bo->base.size, flags);
|
||||
bo->vpu_addr, ivpu_bo_size(bo), flags);
|
||||
|
||||
return bo;
|
||||
|
||||
@ -718,7 +718,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
|
||||
dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
|
||||
|
||||
drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
|
||||
bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
|
||||
bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
|
||||
kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user