Merge drm/drm-next into drm-misc-next

Backmerging from drm-next to get commit e24e6d695377 ("drm/i915/display:
Implement fb_mmap callback function").

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
This commit is contained in:
Thomas Zimmermann 2023-06-01 11:31:39 +02:00
commit 33675759a5
321 changed files with 7062 additions and 3127 deletions

View File

@ -18,7 +18,6 @@ Block
kyber-iosched
null_blk
pr
request
stat
switching-sched
writeback_cache_control

View File

@ -1,99 +0,0 @@
============================
struct request documentation
============================
Jens Axboe <jens.axboe@oracle.com> 27/05/02
.. FIXME:
No idea about what does mean - seems just some noise, so comment it
1.0
Index
2.0 Struct request members classification
2.1 struct request members explanation
3.0
2.0
Short explanation of request members
====================================
Classification flags:
= ====================
D driver member
B block layer member
I I/O scheduler member
= ====================
Unless an entry contains a D classification, a device driver must not access
this member. Some members may contain D classifications, but should only be
access through certain macros or functions (eg ->flags).
<linux/blkdev.h>
=============================== ======= =======================================
Member Flag Comment
=============================== ======= =======================================
struct list_head queuelist BI Organization on various internal
queues
``void *elevator_private`` I I/O scheduler private data
unsigned char cmd[16] D Driver can use this for setting up
a cdb before execution, see
blk_queue_prep_rq
unsigned long flags DBI Contains info about data direction,
request type, etc.
int rq_status D Request status bits
kdev_t rq_dev DBI Target device
int errors DB Error counts
sector_t sector DBI Target location
unsigned long hard_nr_sectors B Used to keep sector sane
unsigned long nr_sectors DBI Total number of sectors in request
unsigned long hard_nr_sectors B Used to keep nr_sectors sane
unsigned short nr_phys_segments DB Number of physical scatter gather
segments in a request
unsigned short nr_hw_segments DB Number of hardware scatter gather
segments in a request
unsigned int current_nr_sectors DB Number of sectors in first segment
of request
unsigned int hard_cur_sectors B Used to keep current_nr_sectors sane
int tag DB TCQ tag, if assigned
``void *special`` D Free to be used by driver
``char *buffer`` D Map of first segment, also see
section on bouncing SECTION
``struct completion *waiting`` D Can be used by driver to get signalled
on request completion
``struct bio *bio`` DBI First bio in request
``struct bio *biotail`` DBI Last bio in request
``struct request_queue *q`` DB Request queue this request belongs to
``struct request_list *rl`` B Request list this request came from
=============================== ======= =======================================

View File

@ -49,6 +49,7 @@ properties:
properties:
data-lanes:
minItems: 1
maxItems: 2
required:

View File

@ -17,20 +17,11 @@ description:
properties:
clocks:
minItems: 3
items:
- description: PCIe bridge clock.
- description: PCIe bus clock.
- description: PCIe PHY clock.
- description: Additional required clock entry for imx6sx-pcie,
imx6sx-pcie-ep, imx8mq-pcie, imx8mq-pcie-ep.
maxItems: 4
clock-names:
minItems: 3
items:
- const: pcie
- const: pcie_bus
- enum: [ pcie_phy, pcie_aux ]
- enum: [ pcie_inbound_axi, pcie_aux ]
maxItems: 4
num-lanes:
const: 1

View File

@ -31,6 +31,19 @@ properties:
- const: dbi
- const: addr_space
clocks:
minItems: 3
items:
- description: PCIe bridge clock.
- description: PCIe bus clock.
- description: PCIe PHY clock.
- description: Additional required clock entry for imx6sx-pcie,
imx6sx-pcie-ep, imx8mq-pcie, imx8mq-pcie-ep.
clock-names:
minItems: 3
maxItems: 4
interrupts:
items:
- description: builtin eDMA interrupter.
@ -49,6 +62,31 @@ required:
allOf:
- $ref: /schemas/pci/snps,dw-pcie-ep.yaml#
- $ref: /schemas/pci/fsl,imx6q-pcie-common.yaml#
- if:
properties:
compatible:
enum:
- fsl,imx8mq-pcie-ep
then:
properties:
clocks:
minItems: 4
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_phy
- const: pcie_aux
else:
properties:
clocks:
maxItems: 3
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_aux
unevaluatedProperties: false

View File

@ -40,6 +40,19 @@ properties:
- const: dbi
- const: config
clocks:
minItems: 3
items:
- description: PCIe bridge clock.
- description: PCIe bus clock.
- description: PCIe PHY clock.
- description: Additional required clock entry for imx6sx-pcie,
imx6sx-pcie-ep, imx8mq-pcie, imx8mq-pcie-ep.
clock-names:
minItems: 3
maxItems: 4
interrupts:
items:
- description: builtin MSI controller.
@ -77,6 +90,70 @@ required:
allOf:
- $ref: /schemas/pci/snps,dw-pcie.yaml#
- $ref: /schemas/pci/fsl,imx6q-pcie-common.yaml#
- if:
properties:
compatible:
enum:
- fsl,imx6sx-pcie
then:
properties:
clocks:
minItems: 4
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_phy
- const: pcie_inbound_axi
- if:
properties:
compatible:
enum:
- fsl,imx8mq-pcie
then:
properties:
clocks:
minItems: 4
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_phy
- const: pcie_aux
- if:
properties:
compatible:
enum:
- fsl,imx6q-pcie
- fsl,imx6qp-pcie
- fsl,imx7d-pcie
then:
properties:
clocks:
maxItems: 3
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_phy
- if:
properties:
compatible:
enum:
- fsl,imx8mm-pcie
- fsl,imx8mp-pcie
then:
properties:
clocks:
maxItems: 3
clock-names:
items:
- const: pcie
- const: pcie_bus
- const: pcie_aux
unevaluatedProperties: false

View File

@ -776,10 +776,11 @@ peer_notif_delay
Specify the delay, in milliseconds, between each peer
notification (gratuitous ARP and unsolicited IPv6 Neighbor
Advertisement) when they are issued after a failover event.
This delay should be a multiple of the link monitor interval
(arp_interval or miimon, whichever is active). The default
value is 0 which means to match the value of the link monitor
interval.
This delay should be a multiple of the MII link monitor interval
(miimon).
The valid range is 0 - 300000. The default value is 0, which means
to match the value of the MII link monitor interval.
prio
Slave priority. A higher number means higher priority.

View File

@ -116,8 +116,8 @@ Contents:
udplite
vrf
vxlan
x25-iface
x25
x25-iface
xfrm_device
xfrm_proc
xfrm_sync

View File

@ -1,8 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
============================-
X.25 Device Driver Interface
============================-
============================
Version 1.1

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 4
SUBLEVEL = 0
EXTRAVERSION = -rc1
EXTRAVERSION = -rc2
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*

View File

@ -308,6 +308,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
return URC_OK;
}
static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
{
unsigned long bytes = 0;
unsigned long insn;
unsigned long result = 0;
/*
* unwind_get_byte() will advance `ctrl` one instruction at a time, so
* loop until we get an instruction byte where bit 7 is not set.
*
* Note: This decodes a maximum of 4 bytes to output 28 bits data where
* max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
* it is sufficient for unwinding the stack.
*/
do {
insn = unwind_get_byte(ctrl);
result |= (insn & 0x7f) << (bytes * 7);
bytes++;
} while (!!(insn & 0x80) && (bytes != sizeof(result)));
return result;
}
/*
* Execute the current unwind instruction.
*/
@ -361,7 +384,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
if (ret)
goto error;
} else if (insn == 0xb2) {
unsigned long uleb128 = unwind_get_byte(ctrl);
unsigned long uleb128 = unwind_decode_uleb128(ctrl);
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {

View File

@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/**
/*
* arch/arm/mac-sa1100/jornada720_ssp.c
*
* Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
@ -26,6 +26,7 @@ static unsigned long jornada_ssp_flags;
/**
* jornada_ssp_reverse - reverses input byte
* @byte: input byte to reverse
*
* we need to reverse all data we receive from the mcu due to its physical location
* returns : 01110111 -> 11101110
@ -46,6 +47,7 @@ EXPORT_SYMBOL(jornada_ssp_reverse);
/**
* jornada_ssp_byte - waits for ready ssp bus and sends byte
* @byte: input byte to transmit
*
* waits for fifo buffer to clear and then transmits, if it doesn't then we will
* timeout after <timeout> rounds. Needs mcu running before its called.
@ -77,6 +79,7 @@ EXPORT_SYMBOL(jornada_ssp_byte);
/**
* jornada_ssp_inout - decide if input is command or trading byte
* @byte: input byte to send (may be %TXDUMMY)
*
* returns : (jornada_ssp_byte(byte)) on success
* : %-ETIMEDOUT on timeout failure

View File

@ -23,6 +23,9 @@
@
ENTRY(do_vfp)
mov r1, r10
mov r3, r9
b vfp_entry
str lr, [sp, #-8]!
add r3, sp, #4
str r9, [r3]
bl vfp_entry
ldr pc, [sp], #8
ENDPROC(do_vfp)

View File

@ -172,13 +172,14 @@ vfp_hw_state_valid:
@ out before setting an FPEXC that
@ stops us reading stuff
VFPFMXR FPEXC, r1 @ Restore FPEXC last
mov sp, r3 @ we think we have handled things
pop {lr}
sub r2, r2, #4 @ Retry current instruction - if Thumb
str r2, [sp, #S_PC] @ mode it's two 16-bit instructions,
@ else it's one 32-bit instruction, so
@ always subtract 4 from the following
@ instruction address.
mov lr, r3 @ we think we have handled things
local_bh_enable_and_ret:
adr r0, .
mov r1, #SOFTIRQ_DISABLE_OFFSET
@ -209,8 +210,9 @@ skip:
process_exception:
DBGSTR "bounce"
mov sp, r3 @ setup for a return to the user code.
pop {lr}
mov r2, sp @ nothing stacked - regdump is at TOS
mov lr, r3 @ setup for a return to the user code.
@ Now call the C code to package up the bounce to the support code
@ r0 holds the trigger instruction

View File

@ -413,12 +413,12 @@ extern void paging_init (void);
* For the 64bit version, the offset is extended by 32bit.
*/
#define __swp_type(x) ((x).val & 0x1f)
#define __swp_offset(x) ( (((x).val >> 6) & 0x7) | \
(((x).val >> 8) & ~0x7) )
#define __swp_offset(x) ( (((x).val >> 5) & 0x7) | \
(((x).val >> 10) << 3) )
#define __swp_entry(type, offset) ((swp_entry_t) { \
((type) & 0x1f) | \
((offset & 0x7) << 6) | \
((offset & ~0x7) << 8) })
((offset & 0x7) << 5) | \
((offset >> 3) << 10) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })

View File

@ -4,6 +4,8 @@
#include <linux/console.h>
#include <linux/kexec.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>

View File

@ -22,7 +22,7 @@ KCOV_INSTRUMENT := n
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
--remove-section=.note.gnu.property \
--prefix-alloc-sections=.init
--prefix-alloc-sections=.init.pi
$(obj)/%.pi.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)

View File

@ -84,11 +84,8 @@ SECTIONS
__init_data_begin = .;
INIT_DATA_SECTION(16)
/* Those sections result from the compilation of kernel/pi/string.c */
.init.pidata : {
*(.init.srodata.cst8*)
*(.init__bug_table*)
*(.init.sdata*)
.init.pi : {
*(.init.pi*)
}
.init.bss : {

View File

@ -1703,10 +1703,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);

View File

@ -1229,12 +1229,14 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
struct pmu *pmu = event->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
* that does not hurt:
*/
bool update = cpuc->n_pebs == 1;
if (cpuc->n_pebs == 1)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
if (!needed_cb)
@ -1242,7 +1244,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
else
perf_sched_cb_dec(pmu);
update = true;
cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
}
/*
@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
if (x86_pmu.intel_cap.pebs_baseline && add) {
u64 pebs_data_cfg;
/* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
if (cpuc->n_pebs == 1) {
cpuc->pebs_data_cfg = 0;
cpuc->pebs_record_size = sizeof(struct pebs_basic);
}
pebs_data_cfg = pebs_update_adaptive_cfg(event);
/* Update pebs_record_size if new event requires more data. */
if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
cpuc->pebs_data_cfg |= pebs_data_cfg;
adaptive_pebs_record_size_update();
update = true;
/*
* Be sure to update the thresholds when we change the record.
*/
if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
}
}
if (update)
pebs_update_threshold(cpuc);
}
void intel_pmu_pebs_add(struct perf_event *event)
@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
wrmsrl(base + idx, value);
}
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
{
if (cpuc->n_pebs == cpuc->n_large_pebs &&
cpuc->n_pebs != cpuc->n_pebs_via_pt)
intel_pmu_drain_pebs_buffer();
}
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
unsigned int idx = hwc->idx;
@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
if (x86_pmu.intel_cap.pebs_baseline) {
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
/*
* drain_pebs() assumes uniform record size;
* hence we need to drain when changing said
* size.
*/
intel_pmu_drain_large_pebs(cpuc);
adaptive_pebs_record_size_update();
wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
cpuc->active_pebs_data_cfg = pebs_data_cfg;
}
}
if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
cpuc->pebs_data_cfg = pebs_data_cfg;
pebs_update_threshold(cpuc);
}
if (idx >= INTEL_PMC_IDX_FIXED) {
if (x86_pmu.intel_cap.pebs_format < 5)
@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (cpuc->n_pebs == cpuc->n_large_pebs &&
cpuc->n_pebs != cpuc->n_pebs_via_pt)
intel_pmu_drain_pebs_buffer();
intel_pmu_drain_large_pebs(cpuc);
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);

View File

@ -121,6 +121,9 @@
#define PEBS_DATACFG_LBRS BIT_ULL(3)
#define PEBS_DATACFG_LBR_SHIFT 24
/* Steal the highest bit of pebs_data_cfg for SW usage */
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:

View File

@ -36,6 +36,7 @@
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@ -79,6 +80,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
{}
};

View File

@ -144,8 +144,8 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*/
.align 64
.skip 63, 0xcc
SYM_FUNC_START_NOALIGN(zen_untrain_ret);
SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
* As executed from zen_untrain_ret, this is:
*

View File

@ -1666,7 +1666,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
return -EIO;
dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
if (!dir) {
if (IS_ERR(dir)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
nbd_name(nbd));
return -EIO;
@ -1692,7 +1692,7 @@ static int nbd_dbg_init(void)
struct dentry *dbg_dir;
dbg_dir = debugfs_create_dir("nbd", NULL);
if (!dbg_dir)
if (IS_ERR(dbg_dir))
return -EIO;
nbd_dbg_dir = dbg_dir;

View File

@ -241,7 +241,7 @@ static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf)
bio_opf = REQ_OP_WRITE;
break;
case RNBD_OP_FLUSH:
bio_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
bio_opf = REQ_OP_WRITE | REQ_PREFLUSH;
break;
case RNBD_OP_DISCARD:
bio_opf = REQ_OP_DISCARD;

View File

@ -1281,7 +1281,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
{
u32 ioc_type = _IOC_TYPE(cmd_op);
if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u')
if (!IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u')
return -EOPNOTSUPP;
if (ioc_type != 'u' && ioc_type != 0)

View File

@ -571,6 +571,7 @@ void read_cdat_data(struct cxl_port *port)
/* Don't leave table data allocated on error */
devm_kfree(dev, cdat_table);
dev_err(dev, "CDAT data read error\n");
return;
}
port->cdat.table = cdat_table + sizeof(__le32);

View File

@ -706,21 +706,22 @@ static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r,
int rcode;
if (destination == IEEE1394_ALL_NODES) {
kfree(r);
return;
}
if (offset != dev->handler.offset)
// Although the response to the broadcast packet is not necessarily required, the
// fw_send_response() function should still be called to maintain the reference
// counting of the object. In the case, the call of function just releases the
// object as a result to decrease the reference counting.
rcode = RCODE_COMPLETE;
} else if (offset != dev->handler.offset) {
rcode = RCODE_ADDRESS_ERROR;
else if (tcode != TCODE_WRITE_BLOCK_REQUEST)
} else if (tcode != TCODE_WRITE_BLOCK_REQUEST) {
rcode = RCODE_TYPE_ERROR;
else if (fwnet_incoming_packet(dev, payload, length,
} else if (fwnet_incoming_packet(dev, payload, length,
source, generation, false) != 0) {
dev_err(&dev->netdev->dev, "incoming packet failure\n");
rcode = RCODE_CONFLICT_ERROR;
} else
} else {
rcode = RCODE_COMPLETE;
}
fw_send_response(card, r, rcode);
}

View File

@ -51,7 +51,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si,
*
* It's not easily possible to fix this in struct screen_info,
* as this could break UAPI. The best solution is to compute
* bits_per_pixel here and ignore lfb_depth. In the loop below,
* bits_per_pixel from the color bits, reserved bits and
* reported lfb_depth, whichever is highest. In the loop below,
* ignore simplefb formats with alpha bits, as EFI and VESA
* don't specify alpha channels.
*/
@ -60,6 +61,7 @@ __init bool sysfb_parse_mode(const struct screen_info *si,
si->green_size + si->green_pos,
si->blue_size + si->blue_pos),
si->rsvd_size + si->rsvd_pos);
bits_per_pixel = max_t(u32, bits_per_pixel, si->lfb_depth);
} else {
bits_per_pixel = si->lfb_depth;
}

View File

@ -3757,6 +3757,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
* internal path natively support atomics, set have_atomics_support to true.
*/
else if ((adev->flags & AMD_IS_APU) &&
(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
adev->have_atomics_support = true;
else
adev->have_atomics_support =
!pci_enable_atomic_ops_to_root(adev->pdev,
@ -4506,7 +4512,11 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
dev_info(adev->dev, "recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
shadow = &vmbo->bo;
/* If vm is compute context or adev is APU, shadow will be NULL */
if (!vmbo->shadow)
continue;
shadow = vmbo->shadow;
/* No need to recover an evicted BO */
if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||

View File

@ -687,9 +687,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
if (r)
return r;
if (adev->gfx.cp_ecc_error_irq.funcs) {
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
goto late_fini;
}
} else {
amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}

View File

@ -1315,13 +1315,6 @@ static int gfx_v11_0_sw_init(void *handle)
if (r)
return r;
/* ECC error */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_ECC_ERROR,
&adev->gfx.cp_ecc_error_irq);
if (r)
return r;
/* FED error */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
@ -4444,7 +4437,6 @@ static int gfx_v11_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@ -5897,36 +5889,6 @@ static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
}
}
#define CP_ME1_PIPE_INST_ADDR_INTERVAL 0x1
#define SET_ECC_ME_PIPE_STATE(reg_addr, state) \
do { \
uint32_t tmp = RREG32_SOC15_IP(GC, reg_addr); \
tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, state); \
WREG32_SOC15_IP(GC, reg_addr, tmp); \
} while (0)
static int gfx_v11_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
uint32_t ecc_irq_state = 0;
uint32_t pipe0_int_cntl_addr = 0;
int i = 0;
ecc_irq_state = (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0;
pipe0_int_cntl_addr = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, ecc_irq_state);
for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++)
SET_ECC_ME_PIPE_STATE(pipe0_int_cntl_addr + i * CP_ME1_PIPE_INST_ADDR_INTERVAL,
ecc_irq_state);
return 0;
}
static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@ -6341,11 +6303,6 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
.process = gfx_v11_0_priv_inst_irq,
};
static const struct amdgpu_irq_src_funcs gfx_v11_0_cp_ecc_error_irq_funcs = {
.set = gfx_v11_0_set_cp_ecc_error_state,
.process = amdgpu_gfx_cp_ecc_error_irq,
};
static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
.process = gfx_v11_0_rlc_gc_fed_irq,
};
@ -6361,9 +6318,6 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
adev->gfx.cp_ecc_error_irq.num_types = 1; /* CP ECC error */
adev->gfx.cp_ecc_error_irq.funcs = &gfx_v11_0_cp_ecc_error_irq_funcs;
adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;

View File

@ -3764,6 +3764,7 @@ static int gfx_v9_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);

View File

@ -54,6 +54,7 @@ static int jpeg_v3_0_early_init(void *handle)
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 1, 2):
break;
default:
harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);

View File

@ -98,6 +98,16 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
};
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
};
static const struct amdgpu_video_codecs sc_video_codecs_encode = {
.codec_count = ARRAY_SIZE(sc_video_codecs_encode_array),
.codec_array = sc_video_codecs_encode_array,
};
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
@ -136,8 +146,8 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 =
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] =
@ -237,12 +247,12 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
} else {
if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
if (encode)
*codecs = &nv_video_codecs_encode;
*codecs = &sc_video_codecs_encode;
else
*codecs = &sc_video_codecs_decode_vcn1;
} else {
if (encode)
*codecs = &nv_video_codecs_encode;
*codecs = &sc_video_codecs_encode;
else
*codecs = &sc_video_codecs_decode_vcn0;
}
@ -251,14 +261,14 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
case IP_VERSION(3, 0, 16):
case IP_VERSION(3, 0, 2):
if (encode)
*codecs = &nv_video_codecs_encode;
*codecs = &sc_video_codecs_encode;
else
*codecs = &sc_video_codecs_decode_vcn0;
return 0;
case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 1, 2):
if (encode)
*codecs = &nv_video_codecs_encode;
*codecs = &sc_video_codecs_encode;
else
*codecs = &yc_video_codecs_decode;
return 0;

View File

@ -1917,10 +1917,12 @@ static int sdma_v4_0_hw_fini(void *handle)
return 0;
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
}
}
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);

View File

@ -711,7 +711,7 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
adev->external_rev_id = adev->rev_id + 0x80;
break;
default:

View File

@ -423,3 +423,68 @@ void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool
PERF_TRACE();
}
static void apply_symclk_on_tx_off_wa(struct dc_link *link)
{
/* There are use cases where SYMCLK is referenced by OTG. For instance
* for TMDS signal, OTG relies SYMCLK even if TX video output is off.
* However current link interface will power off PHY when disabling link
* output. This will turn off SYMCLK generated by PHY. The workaround is
* to identify such case where SYMCLK is still in use by OTG when we
* power off PHY. When this is detected, we will temporarily power PHY
* back on and move PHY's SYMCLK state to SYMCLK_ON_TX_OFF by calling
* program_pix_clk interface. When OTG is disabled, we will then power
* off PHY by calling disable link output again.
*
* In future dcn generations, we plan to rework transmitter control
* interface so that we could have an option to set SYMCLK ON TX OFF
* state in one step without this workaround
*/
struct dc *dc = link->ctx->dc;
struct pipe_ctx *pipe_ctx = NULL;
uint8_t i;
if (link->phy_state.symclk_ref_cnts.otg > 0) {
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream && pipe_ctx->stream->link == link && pipe_ctx->top_pipe == NULL) {
pipe_ctx->clock_source->funcs->program_pix_clk(
pipe_ctx->clock_source,
&pipe_ctx->stream_res.pix_clk_params,
dc->link_srv->dp_get_encoding_format(
&pipe_ctx->link_config.dp_link_settings),
&pipe_ctx->pll_settings);
link->phy_state.symclk_state = SYMCLK_ON_TX_OFF;
break;
}
}
}
}
void dcn314_disable_link_output(struct dc_link *link,
const struct link_resource *link_res,
enum signal_type signal)
{
struct dc *dc = link->ctx->dc;
const struct link_hwss *link_hwss = get_link_hwss(link, link_res);
struct dmcu *dmcu = dc->res_pool->dmcu;
if (signal == SIGNAL_TYPE_EDP &&
link->dc->hwss.edp_backlight_control)
link->dc->hwss.edp_backlight_control(link, false);
else if (dmcu != NULL && dmcu->funcs->lock_phy)
dmcu->funcs->lock_phy(dmcu);
link_hwss->disable_link_output(link, link_res, signal);
link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
/*
* Add the logic to extract BOTH power up and power down sequences
* from enable/disable link output and only call edp panel control
* in enable_link_dp and disable_link_dp once.
*/
if (dmcu != NULL && dmcu->funcs->lock_phy)
dmcu->funcs->unlock_phy(dmcu);
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
apply_symclk_on_tx_off_wa(link);
}

View File

@ -45,4 +45,6 @@ void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool
void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on);
void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal);
#endif /* __DC_HWSS_DCN314_H__ */

View File

@ -105,7 +105,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.enable_lvds_link_output = dce110_enable_lvds_link_output,
.enable_tmds_link_output = dce110_enable_tmds_link_output,
.enable_dp_link_output = dce110_enable_dp_link_output,
.disable_link_output = dce110_disable_link_output,
.disable_link_output = dcn314_disable_link_output,
.z10_restore = dcn31_z10_restore,
.z10_save_init = dcn31_z10_save_init,
.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,

View File

@ -810,7 +810,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->SwathHeightY[k],
v->SwathHeightC[k],
TWait,
v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ?
(v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= MIN_DCFCLK_FREQ_MHZ) ?
mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
/* Output */
&v->DSTXAfterScaler[k],
@ -3310,7 +3311,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->swath_width_chroma_ub_this_state[k],
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k], v->TWait,
v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ ?
(v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= MIN_DCFCLK_FREQ_MHZ) ?
mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
/* Output */

View File

@ -53,6 +53,7 @@
#define BPP_BLENDED_PIPE 0xffffffff
#define MEM_STROBE_FREQ_MHZ 1600
#define MIN_DCFCLK_FREQ_MHZ 200
#define MEM_STROBE_MAX_DELIVERY_TIME_US 60.0
struct display_mode_lib;

View File

@ -36,6 +36,8 @@
#define amdgpu_dpm_enable_bapm(adev, e) \
((adev)->powerplay.pp_funcs->enable_bapm((adev)->powerplay.pp_handle, (e)))
#define amdgpu_dpm_is_legacy_dpm(adev) ((adev)->powerplay.pp_handle == (adev))
int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
{
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
@ -1460,15 +1462,24 @@ int amdgpu_dpm_get_smu_prv_buf_details(struct amdgpu_device *adev,
int amdgpu_dpm_is_overdrive_supported(struct amdgpu_device *adev)
{
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
if (is_support_sw_smu(adev)) {
struct smu_context *smu = adev->powerplay.pp_handle;
if ((is_support_sw_smu(adev) && smu->od_enabled) ||
(is_support_sw_smu(adev) && smu->is_apu) ||
(!is_support_sw_smu(adev) && hwmgr->od_enabled))
return true;
return (smu->od_enabled || smu->is_apu);
} else {
struct pp_hwmgr *hwmgr;
/*
* dpm on some legacy asics don't carry od_enabled member
* as its pp_handle is casted directly from adev.
*/
if (amdgpu_dpm_is_legacy_dpm(adev))
return false;
hwmgr = (struct pp_hwmgr *)adev->powerplay.pp_handle;
return hwmgr->od_enabled;
}
}
int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev,

View File

@ -425,11 +425,12 @@ struct ast_device *ast_device_create(const struct drm_driver *drv,
return ERR_PTR(-EIO);
/*
* If we don't have IO space at all, use MMIO now and
* assume the chip has MMIO enabled by default (rev 0x20
* and higher).
* After AST2500, MMIO is enabled by default, and it should be adopted
* to be compatible with Arm.
*/
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
if (pdev->revision >= 0x40) {
ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
} else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
drm_info(dev, "platform has no IO space, trying MMIO\n");
ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
}

View File

@ -641,19 +641,27 @@ static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y,
static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, size_t len,
struct drm_rect *clip)
{
u32 line_length = info->fix.line_length;
u32 fb_height = info->var.yres;
off_t end = off + len;
u32 x1 = 0;
u32 y1 = off / info->fix.line_length;
u32 y1 = off / line_length;
u32 x2 = info->var.xres;
u32 y2 = DIV_ROUND_UP(end, info->fix.line_length);
u32 y2 = DIV_ROUND_UP(end, line_length);
/* Don't allow any of them beyond the bottom bound of display area */
if (y1 > fb_height)
y1 = fb_height;
if (y2 > fb_height)
y2 = fb_height;
if ((y2 - y1) == 1) {
/*
* We've only written to a single scanline. Try to reduce
* the number of horizontal pixels that need an update.
*/
off_t bit_off = (off % info->fix.line_length) * 8;
off_t bit_end = (end % info->fix.line_length) * 8;
off_t bit_off = (off % line_length) * 8;
off_t bit_end = (end % line_length) * 8;
x1 = bit_off / info->var.bits_per_pixel;
x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel);

View File

@ -221,7 +221,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
return dsi;
}
dsi->dev.of_node = info->node;
device_set_node(&dsi->dev, of_fwnode_handle(info->node));
dsi->channel = info->channel;
strlcpy(dsi->name, info->type, sizeof(dsi->name));

View File

@ -62,10 +62,11 @@ config DRM_I915_FORCE_PROBE
This is the default value for the i915.force_probe module
parameter. Using the module parameter overrides this option.
Force probe the i915 for Intel graphics devices that are
recognized but not properly supported by this kernel version. It is
recommended to upgrade to a kernel version with proper support as soon
as it is available.
Force probe the i915 driver for Intel graphics devices that are
recognized but not properly supported by this kernel version. Force
probing an unsupported device taints the kernel. It is recommended to
upgrade to a kernel version with proper support as soon as it is
available.
It can also be used to block the probe of recognized and fully
supported devices.
@ -75,7 +76,8 @@ config DRM_I915_FORCE_PROBE
Use "<pci-id>[,<pci-id>,...]" to force probe the i915 for listed
devices. For example, "4500" or "4500,4571".
Use "*" to force probe the driver for all known devices.
Use "*" to force probe the driver for all known devices. Not
recommended.
Use "!" right before the ID to block the probe of the device. For
example, "4500,!4571" forces the probe of 4500 and blocks the probe of

View File

@ -194,6 +194,7 @@ i915-y += \
# general-purpose microcontroller (GuC) support
i915-y += \
gt/uc/intel_gsc_fw.o \
gt/uc/intel_gsc_proxy.o \
gt/uc/intel_gsc_uc.o \
gt/uc/intel_gsc_uc_heci_cmd_submit.o\
gt/uc/intel_guc.o \
@ -338,6 +339,7 @@ i915-y += \
i915-$(CONFIG_DRM_I915_PXP) += \
pxp/intel_pxp_cmd.o \
pxp/intel_pxp_debugfs.o \
pxp/intel_pxp_gsccs.o \
pxp/intel_pxp_irq.o \
pxp/intel_pxp_pm.o \
pxp/intel_pxp_session.o
@ -373,7 +375,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += kvmgt.o
#
# Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build
ifdef CONFIG_DRM_I915_WERROR
cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $<
cmd_checkdoc = $(srctree)/scripts/kernel-doc -none -Werror $<
endif
# header test
@ -388,7 +390,7 @@ always-$(CONFIG_DRM_I915_WERROR) += \
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; \
$(srctree)/scripts/kernel-doc -none $<; touch $@
$(srctree)/scripts/kernel-doc -none -Werror $<; touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)

View File

@ -1028,7 +1028,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
int ret;
if (old_obj) {
const struct intel_crtc_state *crtc_state =
const struct intel_crtc_state *new_crtc_state =
intel_atomic_get_new_crtc_state(state,
to_intel_crtc(old_plane_state->hw.crtc));
@ -1043,7 +1043,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
* This should only fail upon a hung GPU, in which case we
* can safely continue.
*/
if (intel_crtc_needs_modeset(crtc_state)) {
if (new_crtc_state && intel_crtc_needs_modeset(new_crtc_state)) {
ret = i915_sw_fence_await_reservation(&state->commit_ready,
old_obj->base.resv,
false, 0,

View File

@ -1601,6 +1601,11 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
pipe_config->dsc.slice_count =
drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd,
true);
if (!pipe_config->dsc.slice_count) {
drm_dbg_kms(&dev_priv->drm, "Unsupported Slice Count %d\n",
pipe_config->dsc.slice_count);
return -EINVAL;
}
} else {
u16 dsc_max_output_bpp = 0;
u8 dsc_dp_slice_count;

View File

@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
vm->pte_encode(addr, level, flags));
vm->pte_encode(addr, pat_index, flags));
}
static void dpt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
static void dpt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@ -98,7 +98,7 @@ static void dpt_bind_vma(struct i915_address_space *vm,
if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
vm->insert_entries(vm, vma_res, cache_level, pte_flags);
vm->insert_entries(vm, vma_res, pat_index, pte_flags);
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma = dpt_bind_vma;
vm->vma_ops.unbind_vma = dpt_unbind_vma;
vm->pte_encode = gen8_ggtt_pte_encode;
vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;

View File

@ -1190,7 +1190,8 @@ bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb)
{
struct drm_i915_private *i915 = to_i915(fb->base.dev);
return IS_ALDERLAKE_P(i915) && intel_fb_uses_dpt(&fb->base);
return (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) &&
intel_fb_uses_dpt(&fb->base);
}
static int intel_fb_pitch(const struct intel_framebuffer *fb, int color_plane, unsigned int rotation)
@ -1326,9 +1327,10 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane,
unsigned int tile_width,
unsigned int src_stride_tiles, unsigned int dst_stride_tiles)
{
struct drm_i915_private *i915 = to_i915(fb->base.dev);
unsigned int stride_tiles;
if (IS_ALDERLAKE_P(to_i915(fb->base.dev)))
if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14)
stride_tiles = src_stride_tiles;
else
stride_tiles = dst_stride_tiles;
@ -1522,7 +1524,8 @@ static void intel_fb_view_init(struct drm_i915_private *i915, struct intel_fb_vi
memset(view, 0, sizeof(*view));
view->gtt.type = view_type;
if (view_type == I915_GTT_VIEW_REMAPPED && IS_ALDERLAKE_P(i915))
if (view_type == I915_GTT_VIEW_REMAPPED &&
(IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14))
view->gtt.remapped.plane_alignment = SZ_2M / PAGE_SIZE;
}

View File

@ -40,8 +40,10 @@
#include <drm/drm_crtc.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_gem_framebuffer_helper.h>
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_mman.h"
#include "i915_drv.h"
#include "intel_display_types.h"
@ -67,6 +69,11 @@ struct intel_fbdev {
struct mutex hpd_lock;
};
static struct intel_fbdev *to_intel_fbdev(struct drm_fb_helper *fb_helper)
{
return container_of(fb_helper, struct intel_fbdev, helper);
}
static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev)
{
return ifbdev->fb->frontbuffer;
@ -79,9 +86,7 @@ static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev)
static int intel_fbdev_set_par(struct fb_info *info)
{
struct drm_fb_helper *fb_helper = info->par;
struct intel_fbdev *ifbdev =
container_of(fb_helper, struct intel_fbdev, helper);
struct intel_fbdev *ifbdev = to_intel_fbdev(info->par);
int ret;
ret = drm_fb_helper_set_par(info);
@ -93,9 +98,7 @@ static int intel_fbdev_set_par(struct fb_info *info)
static int intel_fbdev_blank(int blank, struct fb_info *info)
{
struct drm_fb_helper *fb_helper = info->par;
struct intel_fbdev *ifbdev =
container_of(fb_helper, struct intel_fbdev, helper);
struct intel_fbdev *ifbdev = to_intel_fbdev(info->par);
int ret;
ret = drm_fb_helper_blank(blank, info);
@ -108,9 +111,7 @@ static int intel_fbdev_blank(int blank, struct fb_info *info)
static int intel_fbdev_pan_display(struct fb_var_screeninfo *var,
struct fb_info *info)
{
struct drm_fb_helper *fb_helper = info->par;
struct intel_fbdev *ifbdev =
container_of(fb_helper, struct intel_fbdev, helper);
struct intel_fbdev *ifbdev = to_intel_fbdev(info->par);
int ret;
ret = drm_fb_helper_pan_display(var, info);
@ -120,6 +121,15 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var,
return ret;
}
static int intel_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma)
{
struct intel_fbdev *fbdev = to_intel_fbdev(info->par);
struct drm_gem_object *bo = drm_gem_fb_get_obj(&fbdev->fb->base, 0);
struct drm_i915_gem_object *obj = to_intel_bo(bo);
return i915_gem_fb_mmap(obj, vma);
}
static const struct fb_ops intelfb_ops = {
.owner = THIS_MODULE,
DRM_FB_HELPER_DEFAULT_OPS,
@ -131,13 +141,13 @@ static const struct fb_ops intelfb_ops = {
.fb_imageblit = drm_fb_helper_cfb_imageblit,
.fb_pan_display = intel_fbdev_pan_display,
.fb_blank = intel_fbdev_blank,
.fb_mmap = intel_fbdev_mmap,
};
static int intelfb_alloc(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
{
struct intel_fbdev *ifbdev =
container_of(helper, struct intel_fbdev, helper);
struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
struct drm_framebuffer *fb;
struct drm_device *dev = helper->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
@ -163,7 +173,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
obj = ERR_PTR(-ENODEV);
if (HAS_LMEM(dev_priv)) {
obj = i915_gem_object_create_lmem(dev_priv, size,
I915_BO_ALLOC_CONTIGUOUS);
I915_BO_ALLOC_CONTIGUOUS |
I915_BO_ALLOC_USER);
} else {
/*
* If the FB is too big, just don't use it since fbdev is not very
@ -193,8 +204,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
static int intelfb_create(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
{
struct intel_fbdev *ifbdev =
container_of(helper, struct intel_fbdev, helper);
struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
struct intel_framebuffer *intel_fb = ifbdev->fb;
struct drm_device *dev = helper->dev;
struct drm_i915_private *dev_priv = to_i915(dev);

View File

@ -110,7 +110,9 @@ initial_plane_vma(struct drm_i915_private *i915,
size * 2 > i915->dsm.usable_size)
return NULL;
obj = i915_gem_object_create_region_at(mem, phys_base, size, 0);
obj = i915_gem_object_create_region_at(mem, phys_base, size,
I915_BO_ALLOC_USER |
I915_BO_PREALLOC);
if (IS_ERR(obj))
return NULL;

View File

@ -27,8 +27,15 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (IS_DGFX(i915))
return false;
return !(obj->cache_level == I915_CACHE_NONE ||
obj->cache_level == I915_CACHE_WT);
/*
* For objects created by userspace through GEM_CREATE with pat_index
* set by set_pat extension, i915_gem_object_has_cache_level() will
* always return true, because the coherency of such object is managed
* by userspace. Othereise the call here would fall back to checking
* whether the object is un-cached or write-through.
*/
return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
}
bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@ -267,7 +274,13 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
{
int ret;
if (obj->cache_level == cache_level)
/*
* For objects created by userspace through GEM_CREATE with pat_index
* set by set_pat extension, simply return 0 here without touching
* the cache setting, because such objects should have an immutable
* cache setting by desgin and always managed by userspace.
*/
if (i915_gem_object_has_cache_level(obj, cache_level))
return 0;
ret = i915_gem_object_wait(obj,
@ -278,10 +291,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
return ret;
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
@ -306,20 +317,22 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
switch (obj->cache_level) {
case I915_CACHE_LLC:
case I915_CACHE_L3_LLC:
args->caching = I915_CACHING_CACHED;
break;
case I915_CACHE_WT:
args->caching = I915_CACHING_DISPLAY;
break;
default:
args->caching = I915_CACHING_NONE;
break;
/*
* This ioctl should be disabled for the objects with pat_index
* set by user space.
*/
if (obj->pat_set_by_user) {
err = -EOPNOTSUPP;
goto out;
}
if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
args->caching = I915_CACHING_CACHED;
else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
args->caching = I915_CACHING_DISPLAY;
else
args->caching = I915_CACHING_NONE;
out:
rcu_read_unlock();
return err;
@ -337,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
if (IS_DGFX(i915))
return -ENODEV;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return -EOPNOTSUPP;
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
@ -364,6 +380,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
if (!obj)
return -ENOENT;
/*
* This ioctl should be disabled for the objects with pat_index
* set by user space.
*/
if (obj->pat_set_by_user) {
ret = -EOPNOTSUPP;
goto out;
}
/*
* The caching mode of proxy object is handled by its generator, and
* not allowed to be changed by userspace.

View File

@ -640,9 +640,15 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
return false;
/*
* For objects created by userspace through GEM_CREATE with pat_index
* set by set_pat extension, i915_gem_object_has_cache_level() always
* return true, otherwise the call would fall back to checking whether
* the object is un-cached.
*/
return (cache->has_llc ||
obj->cache_dirty ||
obj->cache_level != I915_CACHE_NONE);
!i915_gem_object_has_cache_level(obj, I915_CACHE_NONE));
}
static int eb_reserve_vma(struct i915_execbuffer *eb,
@ -1324,7 +1330,10 @@ static void *reloc_iomap(struct i915_vma *batch,
if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
offset, I915_CACHE_NONE, 0);
offset,
i915_gem_get_pat_index(ggtt->vm.i915,
I915_CACHE_NONE),
0);
} else {
offset += page << PAGE_SHIFT;
}
@ -1464,7 +1473,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
reloc_cache_unmap(&eb->reloc_cache);
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
target->vma->obj->pat_index,
PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);

View File

@ -383,7 +383,16 @@ retry:
}
/* Access to snoopable pages through the GTT is incoherent. */
if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
/*
* For objects created by userspace through GEM_CREATE with pat_index
* set by set_pat extension, coherency is managed by userspace, make
* sure we don't fail handling the vm fault by calling
* i915_gem_object_has_cache_level() which always return true for such
* objects. Otherwise this helper function would fall back to checking
* whether the object is un-cached.
*/
if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
HAS_LLC(i915))) {
ret = -EFAULT;
goto err_unpin;
}
@ -927,53 +936,15 @@ static struct file *mmap_singleton(struct drm_i915_private *i915)
return file;
}
/*
* This overcomes the limitation in drm_gem_mmap's assignment of a
* drm_gem_object as the vma->vm_private_data. Since we need to
* be able to resolve multiple mmap offsets which could be tied
* to a single gem object.
*/
int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
static int
i915_gem_object_mmap(struct drm_i915_gem_object *obj,
struct i915_mmap_offset *mmo,
struct vm_area_struct *vma)
{
struct drm_vma_offset_node *node;
struct drm_file *priv = filp->private_data;
struct drm_device *dev = priv->minor->dev;
struct drm_i915_gem_object *obj = NULL;
struct i915_mmap_offset *mmo = NULL;
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_device *dev = &i915->drm;
struct file *anon;
if (drm_dev_is_unplugged(dev))
return -ENODEV;
rcu_read_lock();
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff,
vma_pages(vma));
if (node && drm_vma_node_is_allowed(node, priv)) {
/*
* Skip 0-refcnted objects as it is in the process of being
* destroyed and will be invalid when the vma manager lock
* is released.
*/
if (!node->driver_private) {
mmo = container_of(node, struct i915_mmap_offset, vma_node);
obj = i915_gem_object_get_rcu(mmo->obj);
GEM_BUG_ON(obj && obj->ops->mmap_ops);
} else {
obj = i915_gem_object_get_rcu
(container_of(node, struct drm_i915_gem_object,
base.vma_node));
GEM_BUG_ON(obj && !obj->ops->mmap_ops);
}
}
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
rcu_read_unlock();
if (!obj)
return node ? -EACCES : -EINVAL;
if (i915_gem_object_is_readonly(obj)) {
if (vma->vm_flags & VM_WRITE) {
i915_gem_object_put(obj);
@ -1005,7 +976,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
if (obj->ops->mmap_ops) {
vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
vma->vm_ops = obj->ops->mmap_ops;
vma->vm_private_data = node->driver_private;
vma->vm_private_data = obj->base.vma_node.driver_private;
return 0;
}
@ -1043,6 +1014,91 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
/*
* This overcomes the limitation in drm_gem_mmap's assignment of a
* drm_gem_object as the vma->vm_private_data. Since we need to
* be able to resolve multiple mmap offsets which could be tied
* to a single gem object.
*/
int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct drm_vma_offset_node *node;
struct drm_file *priv = filp->private_data;
struct drm_device *dev = priv->minor->dev;
struct drm_i915_gem_object *obj = NULL;
struct i915_mmap_offset *mmo = NULL;
if (drm_dev_is_unplugged(dev))
return -ENODEV;
rcu_read_lock();
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff,
vma_pages(vma));
if (node && drm_vma_node_is_allowed(node, priv)) {
/*
* Skip 0-refcnted objects as it is in the process of being
* destroyed and will be invalid when the vma manager lock
* is released.
*/
if (!node->driver_private) {
mmo = container_of(node, struct i915_mmap_offset, vma_node);
obj = i915_gem_object_get_rcu(mmo->obj);
GEM_BUG_ON(obj && obj->ops->mmap_ops);
} else {
obj = i915_gem_object_get_rcu
(container_of(node, struct drm_i915_gem_object,
base.vma_node));
GEM_BUG_ON(obj && !obj->ops->mmap_ops);
}
}
drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
rcu_read_unlock();
if (!obj)
return node ? -EACCES : -EINVAL;
return i915_gem_object_mmap(obj, mmo, vma);
}
int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_device *dev = &i915->drm;
struct i915_mmap_offset *mmo = NULL;
enum i915_mmap_type mmap_type;
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
if (drm_dev_is_unplugged(dev))
return -ENODEV;
/* handle ttm object */
if (obj->ops->mmap_ops) {
/*
* ttm fault handler, ttm_bo_vm_fault_reserved() uses fake offset
* to calculate page offset so set that up.
*/
vma->vm_pgoff += drm_vma_node_start(&obj->base.vma_node);
} else {
/* handle stolen and smem objects */
mmap_type = i915_ggtt_has_aperture(ggtt) ? I915_MMAP_TYPE_GTT : I915_MMAP_TYPE_WC;
mmo = mmap_offset_attach(obj, mmap_type, NULL);
if (!mmo)
return -ENODEV;
}
/*
* When we install vm_ops for mmap we are too late for
* the vm_ops->open() which increases the ref_count of
* this obj and then it gets decreased by the vm_ops->close().
* To balance this increase the obj ref_count here.
*/
obj = i915_gem_object_get(obj);
return i915_gem_object_mmap(obj, mmo, vma);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_mman.c"
#endif

View File

@ -29,5 +29,5 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj);
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma);
#endif

View File

@ -45,6 +45,33 @@ static struct kmem_cache *slab_objects;
static const struct drm_gem_object_funcs i915_gem_object_funcs;
unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
enum i915_cache_level level)
{
if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL))
return 0;
return INTEL_INFO(i915)->cachelevel_to_pat[level];
}
bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
enum i915_cache_level lvl)
{
/*
* In case the pat_index is set by user space, this kernel mode
* driver should leave the coherency to be managed by user space,
* simply return true here.
*/
if (obj->pat_set_by_user)
return true;
/*
* Otherwise the pat_index should have been converted from cache_level
* so that the following comparison is valid.
*/
return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl);
}
struct drm_i915_gem_object *i915_gem_object_alloc(void)
{
struct drm_i915_gem_object *obj;
@ -124,7 +151,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
obj->cache_level = cache_level;
obj->pat_index = i915_gem_get_pat_index(i915, cache_level);
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
@ -139,6 +166,37 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
!IS_DGFX(i915);
}
/**
* i915_gem_object_set_pat_index - set PAT index to be used in PTE encode
* @obj: #drm_i915_gem_object
* @pat_index: PAT index
*
* This is a clone of i915_gem_object_set_cache_coherency taking pat index
* instead of cache_level as its second argument.
*/
void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
unsigned int pat_index)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
if (obj->pat_index == pat_index)
return;
obj->pat_index = pat_index;
if (pat_index != i915_gem_get_pat_index(i915, I915_CACHE_NONE))
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
I915_BO_CACHE_COHERENT_FOR_WRITE);
else if (HAS_LLC(i915))
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
else
obj->cache_coherent = 0;
obj->cache_dirty =
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
!IS_DGFX(i915);
}
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);

View File

@ -20,6 +20,8 @@
enum intel_region_id;
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
static inline bool i915_gem_object_size_2big(u64 size)
{
struct drm_i915_gem_object *obj;
@ -30,6 +32,10 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
}
unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
enum i915_cache_level level);
bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
enum i915_cache_level lvl);
void i915_gem_init__objects(struct drm_i915_private *i915);
void i915_objects_module_exit(void);
@ -80,7 +86,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
* @filp: DRM file private date
* @file: DRM file private date
* @handle: userspace handle
*
* Returns:
@ -760,6 +766,8 @@ bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj);
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
unsigned int pat_index);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);

View File

@ -194,6 +194,13 @@ enum i915_cache_level {
* engine.
*/
I915_CACHE_WT,
/**
* @I915_MAX_CACHE_LEVEL:
*
* Mark the last entry in the enum. Used for defining cachelevel_to_pat
* array for cache_level to pat translation table.
*/
I915_MAX_CACHE_LEVEL,
};
enum i915_map_type {
@ -328,6 +335,12 @@ struct drm_i915_gem_object {
*/
#define I915_BO_ALLOC_GPU_ONLY BIT(6)
#define I915_BO_ALLOC_CCS_AUX BIT(7)
/*
* Object is allowed to retain its initial data and will not be cleared on first
* access if used along with I915_BO_ALLOC_USER. This is mainly to keep
* preallocated framebuffer data intact while transitioning it to i915drmfb.
*/
#define I915_BO_PREALLOC BIT(8)
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
I915_BO_ALLOC_VOLATILE | \
I915_BO_ALLOC_CPU_CLEAR | \
@ -335,10 +348,11 @@ struct drm_i915_gem_object {
I915_BO_ALLOC_PM_VOLATILE | \
I915_BO_ALLOC_PM_EARLY | \
I915_BO_ALLOC_GPU_ONLY | \
I915_BO_ALLOC_CCS_AUX)
#define I915_BO_READONLY BIT(8)
#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(10)
I915_BO_ALLOC_CCS_AUX | \
I915_BO_PREALLOC)
#define I915_BO_READONLY BIT(9)
#define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(11)
/**
* @mem_flags - Mutable placement-related flags
*
@ -350,15 +364,43 @@ struct drm_i915_gem_object {
#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */
/**
* @cache_level: The desired GTT caching level.
* @pat_index: The desired PAT index.
*
* See enum i915_cache_level for possible values, along with what
* each does.
* See hardware specification for valid PAT indices for each platform.
* This field replaces the @cache_level that contains a value of enum
* i915_cache_level since PAT indices are being used by both userspace
* and kernel mode driver for caching policy control after GEN12.
* In the meantime platform specific tables are created to translate
* i915_cache_level into pat index, for more details check the macros
* defined i915/i915_pci.c, e.g. PVC_CACHELEVEL.
* For backward compatibility, this field contains values exactly match
* the entries of enum i915_cache_level for pre-GEN12 platforms (See
* LEGACY_CACHELEVEL), so that the PTE encode functions for these
* legacy platforms can stay the same.
*/
unsigned int cache_level:3;
unsigned int pat_index:6;
/**
* @pat_set_by_user: Indicate whether pat_index is set by user space
*
* This field is set to false by default, only set to true if the
* pat_index is set by user space. By design, user space is capable of
* managing caching behavior by setting pat_index, in which case this
* kernel mode driver should never touch the pat_index.
*/
unsigned int pat_set_by_user:1;
/**
* @cache_coherent:
*
* Note: with the change above which replaced @cache_level with pat_index,
* the use of @cache_coherent is limited to the objects created by kernel
* or by userspace without pat index specified.
* Check for @pat_set_by_user to find out if an object has pat index set
* by userspace. The ioctl's to change cache settings have also been
* disabled for the objects with pat index set by userspace. Please don't
* assume @cache_coherent having the flags set as describe here. A helper
* function i915_gem_object_has_cache_level() provides one way to bypass
* the use of this field.
*
* Track whether the pages are coherent with the GPU if reading or
* writing through the CPU caches. The largely depends on the
* @cache_level setting.
@ -432,6 +474,16 @@ struct drm_i915_gem_object {
/**
* @cache_dirty:
*
* Note: with the change above which replaced cache_level with pat_index,
* the use of @cache_dirty is limited to the objects created by kernel
* or by userspace without pat index specified.
* Check for @pat_set_by_user to find out if an object has pat index set
* by userspace. The ioctl's to change cache settings have also been
* disabled for the objects with pat_index set by userspace. Please don't
* assume @cache_dirty is set as describe here. Also see helper function
* i915_gem_object_has_cache_level() for possible ways to bypass the use
* of this field.
*
* Track if we are we dirty with writes through the CPU cache for this
* object. As a result reading directly from main memory might yield
* stale data.

View File

@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
bool always_coherent)
{
if (i915_gem_object_is_lmem(obj))
/*
* Wa_22016122933: always return I915_MAP_WC for MTL
*/
if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;

View File

@ -22,9 +22,7 @@ struct i915_gem_apply_to_region;
*/
struct i915_gem_apply_to_region_ops {
/**
* process_obj - Process the current object
* @apply: Embed this for private data.
* @obj: The current object.
* @process_obj: Process the current object
*
* Note that if this function is part of a ww transaction, and
* if returns -EDEADLK for one of the objects, it may be

View File

@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
if (HAS_LLC(i915))
/*
* MTL doesn't snoop CPU cache by default for GPU access (namely
* 1-way coherency). However some UMD's are currently depending on
* that. Make 1-way coherent the default setting for MTL. A follow
* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
* caching mode at BO creation time
*/
if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than

View File

@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
fs_reclaim_release(GFP_KERNEL);
}
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
/**
* i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
* default all object types that support shrinking(see IS_SHRINKABLE), will also

View File

@ -535,6 +535,14 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
/* Basic memrange allocator for stolen space. */
drm_mm_init(&i915->mm.stolen, 0, i915->dsm.usable_size);
/*
* Access to stolen lmem beyond certain size for MTL A0 stepping
* would crash the machine. Disable stolen lmem for userspace access
* by setting usable_size to zero.
*/
if (IS_METEORLAKE(i915) && INTEL_REVID(i915) == 0x0)
i915->dsm.usable_size = 0;
return 0;
}
@ -557,7 +565,9 @@ static void dbg_poison(struct i915_ggtt *ggtt,
ggtt->vm.insert_page(&ggtt->vm, addr,
ggtt->error_capture.start,
I915_CACHE_NONE, 0);
i915_gem_get_pat_index(ggtt->vm.i915,
I915_CACHE_NONE),
0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,

View File

@ -42,8 +42,9 @@ static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
/**
* i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
* struct drm_i915_gem_object.
* @bo: Pointer to the ttm buffer object
*
* Return: Pointer to the embedding struct ttm_buffer_object.
* Return: Pointer to the embedding struct drm_i915_gem_object.
*/
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)

View File

@ -214,7 +214,8 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
dst_st->sgl, dst_level,
dst_st->sgl,
i915_gem_get_pat_index(i915, dst_level),
i915_ttm_gtt_binds_lmem(dst_mem),
0, &rq);
} else {
@ -228,9 +229,10 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
deps, src_rsgt->table.sgl,
src_level,
i915_gem_get_pat_index(i915, src_level),
i915_ttm_gtt_binds_lmem(bo->resource),
dst_st->sgl, dst_level,
dst_st->sgl,
i915_gem_get_pat_index(i915, dst_level),
i915_ttm_gtt_binds_lmem(dst_mem),
&rq);
@ -576,7 +578,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
struct dma_fence *migration_fence = NULL;
struct ttm_tt *ttm = bo->ttm;
struct i915_refct_sgt *dst_rsgt;
bool clear;
bool clear, prealloc_bo;
int ret;
if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
@ -632,7 +634,8 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
return PTR_ERR(dst_rsgt);
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
prealloc_bo = obj->flags & I915_BO_PREALLOC;
if (!(clear && ttm && !((ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) && !prealloc_bo))) {
struct i915_deps deps;
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);

View File

@ -354,7 +354,7 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
return obj;
}
@ -695,8 +695,7 @@ out_put:
return err;
}
static void close_object_list(struct list_head *objects,
struct i915_ppgtt *ppgtt)
static void close_object_list(struct list_head *objects)
{
struct drm_i915_gem_object *obj, *on;
@ -710,17 +709,36 @@ static void close_object_list(struct list_head *objects,
}
}
static int igt_mock_ppgtt_huge_fill(void *arg)
static int igt_ppgtt_huge_fill(void *arg)
{
struct i915_ppgtt *ppgtt = arg;
struct drm_i915_private *i915 = ppgtt->vm.i915;
unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
struct drm_i915_private *i915 = arg;
unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
struct i915_address_space *vm;
struct i915_gem_context *ctx;
unsigned long max_pages;
unsigned long page_num;
struct file *file;
bool single = false;
LIST_HEAD(objects);
IGT_TIMEOUT(end_time);
int err = -ENODEV;
if (supported == I915_GTT_PAGE_SIZE_4K)
return 0;
file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = hugepage_ctx(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
vm = i915_gem_context_get_eb_vm(ctx);
max_pages = vm->total >> PAGE_SHIFT;
for_each_prime_number_from(page_num, 1, max_pages) {
struct drm_i915_gem_object *obj;
u64 size = page_num << PAGE_SHIFT;
@ -750,13 +768,14 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
list_add(&obj->st_link, &objects);
vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
break;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
/* vma start must be aligned to BIT(21) to allow 2M PTEs */
err = i915_vma_pin(vma, 0, BIT(21), PIN_USER);
if (err)
break;
@ -784,12 +803,13 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
GEM_BUG_ON(!expected_gtt);
GEM_BUG_ON(size);
if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M ||
expected_gtt & I915_GTT_PAGE_SIZE_2M))
expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
i915_vma_unpin(vma);
if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!IS_ALIGNED(vma->node.start,
I915_GTT_PAGE_SIZE_2M)) {
pr_err("node.start(%llx) not aligned to 2M\n",
@ -808,7 +828,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
}
if (vma->resource->page_sizes_gtt != expected_gtt) {
pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n",
vma->resource->page_sizes_gtt, expected_gtt,
obj->base.size, str_yes_no(!!single));
err = -EINVAL;
@ -823,19 +843,25 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
single = !single;
}
close_object_list(&objects, ppgtt);
close_object_list(&objects);
if (err == -ENOMEM || err == -ENOSPC)
err = 0;
i915_vm_put(vm);
out:
fput(file);
return err;
}
static int igt_mock_ppgtt_64K(void *arg)
static int igt_ppgtt_64K(void *arg)
{
struct i915_ppgtt *ppgtt = arg;
struct drm_i915_private *i915 = ppgtt->vm.i915;
struct drm_i915_private *i915 = arg;
bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
struct i915_gem_context *ctx;
struct file *file;
const struct object_info {
unsigned int size;
unsigned int gtt;
@ -907,16 +933,41 @@ static int igt_mock_ppgtt_64K(void *arg)
if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
return 0;
file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = hugepage_ctx(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
vm = i915_gem_context_get_eb_vm(ctx);
for (i = 0; i < ARRAY_SIZE(objects); ++i) {
unsigned int size = objects[i].size;
unsigned int expected_gtt = objects[i].gtt;
unsigned int offset = objects[i].offset;
unsigned int flags = PIN_USER;
/*
* For modern GTT models, the requirements for marking a page-table
* as 64K have been relaxed. Account for this.
*/
if (has_pte64) {
expected_gtt = 0;
if (size >= SZ_64K)
expected_gtt |= I915_GTT_PAGE_SIZE_64K;
if (size & (SZ_64K - 1))
expected_gtt |= I915_GTT_PAGE_SIZE_4K;
}
for (single = 0; single <= 1; single++) {
obj = fake_huge_pages_object(i915, size, !!single);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_vm;
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
@ -928,7 +979,7 @@ static int igt_mock_ppgtt_64K(void *arg)
*/
obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out_object_unpin;
@ -945,7 +996,8 @@ static int igt_mock_ppgtt_64K(void *arg)
if (err)
goto out_vma_unpin;
if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!has_pte64 && !offset &&
vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
if (!IS_ALIGNED(vma->node.start,
I915_GTT_PAGE_SIZE_2M)) {
pr_err("node.start(%llx) not aligned to 2M\n",
@ -964,9 +1016,10 @@ static int igt_mock_ppgtt_64K(void *arg)
}
if (vma->resource->page_sizes_gtt != expected_gtt) {
pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n",
vma->resource->page_sizes_gtt,
expected_gtt, i, str_yes_no(!!single));
expected_gtt, i, str_yes_no(!!single),
offset, size);
err = -EINVAL;
goto out_vma_unpin;
}
@ -982,7 +1035,7 @@ static int igt_mock_ppgtt_64K(void *arg)
}
}
return 0;
goto out_vm;
out_vma_unpin:
i915_vma_unpin(vma);
@ -992,7 +1045,10 @@ out_object_unpin:
i915_gem_object_unlock(obj);
out_object_put:
i915_gem_object_put(obj);
out_vm:
i915_vm_put(vm);
out:
fput(file);
return err;
}
@ -1910,8 +1966,6 @@ int i915_gem_huge_page_mock_selftests(void)
SUBTEST(igt_mock_exhaust_device_supported_pages),
SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
SUBTEST(igt_mock_ppgtt_huge_fill),
SUBTEST(igt_mock_ppgtt_64K),
};
struct drm_i915_private *dev_priv;
struct i915_ppgtt *ppgtt;
@ -1962,6 +2016,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_sanity_check),
SUBTEST(igt_ppgtt_compact),
SUBTEST(igt_ppgtt_mixed),
SUBTEST(igt_ppgtt_huge_fill),
SUBTEST(igt_ppgtt_64K),
};
if (!HAS_PPGTT(i915)) {

View File

@ -66,7 +66,7 @@ static int live_nop_switch(void *arg)
ctx[n] = live_context(i915, file);
if (IS_ERR(ctx[n])) {
err = PTR_ERR(ctx[n]);
goto out_file;
goto out_ctx;
}
}
@ -82,7 +82,7 @@ static int live_nop_switch(void *arg)
this = igt_request_alloc(ctx[n], engine);
if (IS_ERR(this)) {
err = PTR_ERR(this);
goto out_file;
goto out_ctx;
}
if (rq) {
i915_request_await_dma_fence(this, &rq->fence);
@ -93,10 +93,10 @@ static int live_nop_switch(void *arg)
}
if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
intel_gt_set_wedged(to_gt(i915));
intel_gt_set_wedged(engine->gt);
i915_request_put(rq);
err = -EIO;
goto out_file;
goto out_ctx;
}
i915_request_put(rq);
@ -107,7 +107,7 @@ static int live_nop_switch(void *arg)
err = igt_live_test_begin(&t, i915, __func__, engine->name);
if (err)
goto out_file;
goto out_ctx;
end_time = jiffies + i915_selftest.timeout_jiffies;
for_each_prime_number_from(prime, 2, 8192) {
@ -120,7 +120,7 @@ static int live_nop_switch(void *arg)
this = igt_request_alloc(ctx[n % nctx], engine);
if (IS_ERR(this)) {
err = PTR_ERR(this);
goto out_file;
goto out_ctx;
}
if (rq) { /* Force submission order */
@ -149,7 +149,7 @@ static int live_nop_switch(void *arg)
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Switching between %ld contexts timed out\n",
prime);
intel_gt_set_wedged(to_gt(i915));
intel_gt_set_wedged(engine->gt);
i915_request_put(rq);
break;
}
@ -165,7 +165,7 @@ static int live_nop_switch(void *arg)
err = igt_live_test_end(&t);
if (err)
goto out_file;
goto out_ctx;
pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
engine->name,
@ -173,6 +173,8 @@ static int live_nop_switch(void *arg)
prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
}
out_ctx:
kfree(ctx);
out_file:
fput(file);
return err;

View File

@ -219,7 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt,
continue;
err = intel_migrate_clear(&gt->migrate, &ww, deps,
obj->mm.pages->sgl, obj->cache_level,
obj->mm.pages->sgl, obj->pat_index,
i915_gem_object_is_lmem(obj),
0xdeadbeaf, &rq);
if (rq) {

View File

@ -1222,7 +1222,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
}
err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
obj->mm.pages->sgl, obj->cache_level,
obj->mm.pages->sgl, obj->pat_index,
i915_gem_object_is_lmem(obj),
expand32(POISON_INUSE), &rq);
i915_gem_object_unpin_pages(obj);

View File

@ -109,7 +109,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
@ -117,7 +117,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
unsigned int act_pt = first_entry / GEN6_PTES;
unsigned int act_pte = first_entry % GEN6_PTES;
const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
const u32 pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_dma iter = sgt_dma(vma_res);
gen6_pte_t *vaddr;
@ -227,7 +227,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
vm->scratch[0]->encode =
vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, PTE_READ_ONLY);
i915_gem_get_pat_index(vm->i915,
I915_CACHE_NONE),
PTE_READ_ONLY);
vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(vm->scratch[1])) {
@ -278,7 +280,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
static void pd_vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);

View File

@ -29,7 +29,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr,
}
static u64 gen8_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
@ -40,7 +40,12 @@ static u64 gen8_pte_encode(dma_addr_t addr,
if (flags & PTE_LM)
pte |= GEN12_PPGTT_PTE_LM;
switch (level) {
/*
* For pre-gen12 platforms pat_index is the same as enum
* i915_cache_level, so the switch-case here is still valid.
* See translation table defined by LEGACY_CACHELEVEL.
*/
switch (pat_index) {
case I915_CACHE_NONE:
pte |= PPAT_UNCACHED;
break;
@ -55,6 +60,33 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
}
static u64 gen12_pte_encode(dma_addr_t addr,
unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
if (unlikely(flags & PTE_READ_ONLY))
pte &= ~GEN8_PAGE_RW;
if (flags & PTE_LM)
pte |= GEN12_PPGTT_PTE_LM;
if (pat_index & BIT(0))
pte |= GEN12_PPGTT_PTE_PAT0;
if (pat_index & BIT(1))
pte |= GEN12_PPGTT_PTE_PAT1;
if (pat_index & BIT(2))
pte |= GEN12_PPGTT_PTE_PAT2;
if (pat_index & BIT(3))
pte |= MTL_PPGTT_PTE_PAT3;
return pte;
}
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
struct drm_i915_private *i915 = ppgtt->vm.i915;
@ -423,11 +455,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp,
struct sgt_dma *iter,
u64 idx,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
struct i915_page_directory *pd;
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
gen8_pte_t *vaddr;
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@ -470,10 +502,10 @@ static void
xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
struct sgt_dma *iter,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
u64 end = start + vma_res->vma_size;
@ -570,6 +602,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
}
} while (rem >= page_size && index < max);
drm_clflush_virt_range(vaddr, PAGE_SIZE);
vma_res->page_sizes_gtt |= page_size;
} while (iter->sg && sg_dma_len(iter->sg));
}
@ -577,10 +610,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
struct sgt_dma *iter,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
@ -700,17 +733,17 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
static void gen8_ppgtt_insert(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
struct sgt_dma iter = sgt_dma(vma_res);
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
if (HAS_64K_PAGES(vm->i915))
xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50))
xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
else
gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
} else {
u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
@ -719,7 +752,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
gen8_pdp_for_page_index(vm, idx);
idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
cache_level, flags);
pat_index, flags);
} while (idx);
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
@ -729,7 +762,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
u64 idx = offset >> GEN8_PTE_SHIFT;
@ -743,14 +776,14 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
vaddr = px_vaddr(pt);
vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}
static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
u64 idx = offset >> GEN8_PTE_SHIFT;
@ -773,20 +806,20 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
}
vaddr = px_vaddr(pt);
vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
}
static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
if (flags & PTE_LM)
return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
level, flags);
pat_index, flags);
return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
}
static int gen8_init_scratch(struct i915_address_space *vm)
@ -820,8 +853,10 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
vm->scratch[0]->encode =
gen8_pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, pte_flags);
vm->pte_encode(px_dma(vm->scratch[0]),
i915_gem_get_pat_index(vm->i915,
I915_CACHE_NONE),
pte_flags);
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@ -963,6 +998,9 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
if (GRAPHICS_VER(gt->i915) >= 12)
ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;

View File

@ -10,13 +10,12 @@
struct i915_address_space;
struct intel_gt;
enum i915_cache_level;
struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags);
#endif

View File

@ -578,10 +578,13 @@ void intel_context_bind_parent_child(struct intel_context *parent,
child->parallel.parent = parent;
}
u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{
u64 total, active;
if (ce->ops->update_stats)
ce->ops->update_stats(ce);
total = ce->stats.runtime.total;
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
total *= ce->engine->gt->clock_period_ns;

View File

@ -97,7 +97,7 @@ void intel_context_bind_parent_child(struct intel_context *parent,
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context
* @ce: the context
*
* Acquire a lock on the pinned status of the HW context, such that the context
* can neither be bound to the GPU or unbound whilst the lock is held, i.e.
@ -111,7 +111,7 @@ static inline int intel_context_lock_pinned(struct intel_context *ce)
/**
* intel_context_is_pinned - Reports the 'pinned' status
* @ce - the context
* @ce: the context
*
* While in use by the GPU, the context, along with its ring and page
* tables is pinned into memory and the GTT.
@ -133,7 +133,7 @@ static inline void intel_context_cancel_request(struct intel_context *ce,
/**
* intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
* @ce - the context
* @ce: the context
*
* Releases the lock earlier acquired by intel_context_unlock_pinned().
*/
@ -375,7 +375,7 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
static inline u64 intel_context_clock(void)

View File

@ -58,6 +58,8 @@ struct intel_context_ops {
void (*sched_disable)(struct intel_context *ce);
void (*update_stats)(struct intel_context *ce);
void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref);

View File

@ -1515,7 +1515,7 @@ int intel_engines_init(struct intel_gt *gt)
}
/**
* intel_engines_cleanup_common - cleans up the engine state created by
* intel_engine_cleanup_common - cleans up the engine state created by
* the common initiailizers.
* @engine: Engine to cleanup.
*

View File

@ -289,6 +289,7 @@ struct intel_engine_execlists {
*/
u8 csb_head;
/* private: selftest */
I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
};

View File

@ -117,7 +117,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
disabled |= (I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY);
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
if (intel_uc_uses_guc_submission(&engine->gt->uc))
enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;
for (i = 0; i < ARRAY_SIZE(map); i++) {

View File

@ -220,8 +220,28 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
}
}
static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
if (flags & PTE_LM)
pte |= GEN12_GGTT_PTE_LM;
if (pat_index & BIT(0))
pte |= MTL_GGTT_PTE_PAT0;
if (pat_index & BIT(1))
pte |= MTL_GGTT_PTE_PAT1;
return pte;
}
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
@ -240,25 +260,25 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
static void gen8_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags));
ggtt->invalidate(ggtt);
}
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
gen8_pte_t __iomem *gte;
gen8_pte_t __iomem *end;
struct sgt_iter iter;
@ -315,14 +335,14 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
gen6_pte_t __iomem *pte =
(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
iowrite32(vm->pte_encode(addr, level, flags), pte);
iowrite32(vm->pte_encode(addr, pat_index, flags), pte);
ggtt->invalidate(ggtt);
}
@ -335,7 +355,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
*/
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@ -352,7 +372,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
iowrite32(vm->scratch[0]->encode, gte++);
end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
iowrite32(vm->pte_encode(addr, level, flags), gte++);
iowrite32(vm->pte_encode(addr, pat_index, flags), gte++);
GEM_BUG_ON(gte > end);
/* Fill the allocated but "unused" space beyond the end of the buffer */
@ -387,14 +407,15 @@ struct insert_page {
struct i915_address_space *vm;
dma_addr_t addr;
u64 offset;
enum i915_cache_level level;
unsigned int pat_index;
};
static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
{
struct insert_page *arg = _arg;
gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset,
arg->pat_index, 0);
bxt_vtd_ggtt_wa(arg->vm);
return 0;
@ -403,10 +424,10 @@ static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level level,
unsigned int pat_index,
u32 unused)
{
struct insert_page arg = { vm, addr, offset, level };
struct insert_page arg = { vm, addr, offset, pat_index };
stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
}
@ -414,7 +435,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
struct insert_entries {
struct i915_address_space *vm;
struct i915_vma_resource *vma_res;
enum i915_cache_level level;
unsigned int pat_index;
u32 flags;
};
@ -422,7 +443,8 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
{
struct insert_entries *arg = _arg;
gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
gen8_ggtt_insert_entries(arg->vm, arg->vma_res,
arg->pat_index, arg->flags);
bxt_vtd_ggtt_wa(arg->vm);
return 0;
@ -430,10 +452,10 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags)
{
struct insert_entries arg = { vm, vma_res, level, flags };
struct insert_entries arg = { vm, vma_res, pat_index, flags };
stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
}
@ -462,7 +484,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
void intel_ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@ -479,7 +501,7 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm,
if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
vm->insert_entries(vm, vma_res, cache_level, pte_flags);
vm->insert_entries(vm, vma_res, pat_index, pte_flags);
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
@ -628,7 +650,7 @@ err:
static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@ -640,10 +662,10 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
if (flags & I915_VMA_LOCAL_BIND)
ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
stash, vma_res, cache_level, flags);
stash, vma_res, pat_index, flags);
if (flags & I915_VMA_GLOBAL_BIND)
vm->insert_entries(vm, vma_res, cache_level, pte_flags);
vm->insert_entries(vm, vma_res, pat_index, pte_flags);
vma_res->bound_flags |= flags;
}
@ -900,7 +922,9 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
ggtt->vm.scratch[0]->encode =
ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
I915_CACHE_NONE, pte_flags);
i915_gem_get_pat_index(i915,
I915_CACHE_NONE),
pte_flags);
return 0;
}
@ -981,11 +1005,19 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
else
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
return ggtt_probe_common(ggtt, size);
}
/*
* For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
* so these PTE encode functions are left with using cache_level.
* See translation table LEGACY_CACHELEVEL.
*/
static u64 snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags)
@ -1266,7 +1298,9 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
*/
vma->resource->bound_flags = 0;
vma->ops->bind_vma(vm, NULL, vma->resource,
obj ? obj->cache_level : 0,
obj ? obj->pat_index :
i915_gem_get_pat_index(vm->i915,
I915_CACHE_NONE),
was_bound);
if (obj) { /* only used during resume => exclusive access */

View File

@ -15,6 +15,7 @@
#include "intel_uncore.h"
#include "intel_rps.h"
#include "pxp/intel_pxp_irq.h"
#include "uc/intel_gsc_proxy.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@ -81,6 +82,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
if (instance == OTHER_GSC_INSTANCE)
return intel_gsc_irq_handler(gt, iir);
if (instance == OTHER_GSC_HECI_2_INSTANCE)
return intel_gsc_proxy_irq_handler(&gt->uc.gsc, iir);
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
}
@ -100,7 +104,10 @@ static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance)
case VIDEO_ENHANCEMENT_CLASS:
return media_gt;
case OTHER_CLASS:
if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, GSC0))
if (instance == OTHER_GSC_HECI_2_INSTANCE)
return media_gt;
if ((instance == OTHER_GSC_INSTANCE || instance == OTHER_KCR_INSTANCE) &&
HAS_ENGINE(media_gt, GSC0))
return media_gt;
fallthrough;
default:
@ -256,6 +263,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
u32 irqs = GT_RENDER_USER_INTERRUPT;
u32 guc_mask = intel_uc_wants_guc(&gt->uc) ? GUC_INTR_GUC2HOST : 0;
u32 gsc_mask = 0;
u32 heci_mask = 0;
u32 dmask;
u32 smask;
@ -267,10 +275,16 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
dmask = irqs << 16 | irqs;
smask = irqs << 16;
if (HAS_ENGINE(gt, GSC0))
if (HAS_ENGINE(gt, GSC0)) {
/*
* the heci2 interrupt is enabled via the same register as the
* GSC interrupt, but it has its own mask register.
*/
gsc_mask = irqs;
else if (HAS_HECI_GSC(gt->i915))
heci_mask = GSC_IRQ_INTF(1); /* HECI2 IRQ for SW Proxy*/
} else if (HAS_HECI_GSC(gt->i915)) {
gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
}
BUILD_BUG_ON(irqs & 0xffff0000);
@ -280,7 +294,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
if (CCS_MASK(gt))
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
if (gsc_mask)
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask);
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask | heci_mask);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
@ -308,6 +322,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask);
if (gsc_mask)
intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask);
if (heci_mask)
intel_uncore_write(uncore, GEN12_HECI2_RSVD_INTR_MASK,
~REG_FIELD_PREP(ENGINE1_MASK, heci_mask));
if (guc_mask) {
/* the enable bit is common for both GTs but the masks are separate */

View File

@ -87,7 +87,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
intel_rc6_unpark(&gt->rc6);
intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
i915_pmu_gt_unparked(gt);
intel_guc_busyness_unpark(gt);
intel_gt_unpark_requests(gt);
@ -109,7 +109,7 @@ static int __gt_park(struct intel_wakeref *wf)
intel_guc_busyness_park(gt);
i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
i915_pmu_gt_parked(gt);
intel_rps_park(&gt->rps);
intel_rc6_park(&gt->rc6);

View File

@ -539,6 +539,9 @@ static bool rps_eval(void *data)
{
struct intel_gt *gt = data;
if (intel_guc_slpc_is_used(&gt->uc.guc))
return false;
else
return HAS_RPS(gt->i915);
}

View File

@ -356,7 +356,11 @@
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4)
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
0x4800, 0x4804, \
0x4848, 0x484c)
#define XEHP_PAT_INDEX(index) MCR_REG(_PAT_INDEX(index))
#define XELPMP_PAT_INDEX(index) _MMIO(_PAT_INDEX(index))
#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900)
#define XEHP_TILE_LMEM_RANGE_SHIFT 8
@ -525,6 +529,11 @@
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
#define GEN12_SQCNT1 _MMIO(0x8718)
#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
#define GEN12_SQCNT1_OABPC REG_BIT(29)
#define GEN12_STRICT_RAR_ENABLE REG_BIT(23)
#define XEHP_SQCM MCR_REG(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
@ -1587,6 +1596,7 @@
#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4))
#define GEN11_CSME (31)
#define GEN12_HECI_2 (30)
#define GEN11_GUNIT (28)
#define GEN11_GUC (25)
#define MTL_MGUC (24)
@ -1628,6 +1638,7 @@
/* irq instances for OTHER_CLASS */
#define OTHER_GUC_INSTANCE 0
#define OTHER_GTPM_INSTANCE 1
#define OTHER_GSC_HECI_2_INSTANCE 3
#define OTHER_KCR_INSTANCE 4
#define OTHER_GSC_INSTANCE 6
#define OTHER_MEDIA_GUC_INSTANCE 16
@ -1643,6 +1654,7 @@
#define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4)
#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0)
#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4)
#define GEN12_HECI2_RSVD_INTR_MASK _MMIO(0x1900e4)
#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8)
#define MTL_GUC_MGUC_INTR_MASK _MMIO(0x1900e8) /* MTL+ */
#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec)

View File

@ -451,6 +451,33 @@ static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
return sysfs_emit(buff, "%u\n", preq);
}
static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq);
}
static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
int err;
u32 val;
err = kstrtou32(buff, 0, &val);
if (err)
return err;
err = intel_guc_slpc_set_ignore_eff_freq(slpc, val);
return err ?: count;
}
struct intel_gt_bool_throttle_attr {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
@ -663,6 +690,8 @@ static struct kobj_attribute attr_media_freq_factor_scale =
INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
INTEL_GT_ATTR_RW(slpc_ignore_eff_freq);
static const struct attribute *media_perf_power_attrs[] = {
&attr_media_freq_factor.attr,
&attr_media_freq_factor_scale.attr,
@ -744,6 +773,12 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
if (ret)
gt_warn(gt, "failed to create punit_req_freq_mhz sysfs (%pe)", ERR_PTR(ret));
if (intel_uc_uses_guc_slpc(&gt->uc)) {
ret = sysfs_create_file(kobj, &attr_slpc_ignore_eff_freq.attr);
if (ret)
gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret));
}
if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
ret = sysfs_create_files(kobj, throttle_reason_attrs);
if (ret)

View File

@ -468,6 +468,44 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
}
static void xelpmp_setup_private_ppat(struct intel_uncore *uncore)
{
intel_uncore_write(uncore, XELPMP_PAT_INDEX(0),
MTL_PPAT_L4_0_WB);
intel_uncore_write(uncore, XELPMP_PAT_INDEX(1),
MTL_PPAT_L4_1_WT);
intel_uncore_write(uncore, XELPMP_PAT_INDEX(2),
MTL_PPAT_L4_3_UC);
intel_uncore_write(uncore, XELPMP_PAT_INDEX(3),
MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
intel_uncore_write(uncore, XELPMP_PAT_INDEX(4),
MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
/*
* Remaining PAT entries are left at the hardware-default
* fully-cached setting
*/
}
static void xelpg_setup_private_ppat(struct intel_gt *gt)
{
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0),
MTL_PPAT_L4_0_WB);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1),
MTL_PPAT_L4_1_WT);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2),
MTL_PPAT_L4_3_UC);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3),
MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4),
MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
/*
* Remaining PAT entries are left at the hardware-default
* fully-cached setting
*/
}
static void tgl_setup_private_ppat(struct intel_uncore *uncore)
{
/* TGL doesn't support LLC or AGE settings */
@ -603,7 +641,14 @@ void setup_private_pat(struct intel_gt *gt)
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
if (gt->type == GT_MEDIA) {
xelpmp_setup_private_ppat(gt->uncore);
return;
}
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
xelpg_setup_private_ppat(gt);
else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);

View File

@ -88,9 +88,17 @@ typedef u64 gen8_pte_t;
#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2)
#define BYT_PTE_WRITEABLE REG_BIT(1)
#define MTL_PPGTT_PTE_PAT3 BIT_ULL(62)
#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
#define GEN12_PPGTT_PTE_PAT2 BIT_ULL(7)
#define GEN12_PPGTT_PTE_PAT1 BIT_ULL(4)
#define GEN12_PPGTT_PTE_PAT0 BIT_ULL(3)
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
#define MTL_GGTT_PTE_PAT0 BIT_ULL(52)
#define MTL_GGTT_PTE_PAT1 BIT_ULL(53)
#define GEN12_GGTT_PTE_ADDR_MASK GENMASK_ULL(45, 12)
#define MTL_GGTT_PTE_PAT_MASK GENMASK_ULL(53, 52)
#define GEN12_PDE_64K BIT(6)
#define GEN12_PTE_PS64 BIT(8)
@ -147,7 +155,13 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
enum i915_cache_level;
#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2)
#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0)
#define MTL_PPAT_L4_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
#define MTL_PPAT_L4_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
#define MTL_PPAT_L4_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
struct drm_i915_gem_object;
struct i915_fence_reg;
@ -216,7 +230,7 @@ struct i915_vma_ops {
void (*bind_vma)(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
/*
* Unmap an object from an address space. This usually consists of
@ -288,7 +302,7 @@ struct i915_address_space {
(*alloc_scratch_dma)(struct i915_address_space *vm, int sz);
u64 (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
unsigned int pat_index,
u32 flags); /* Create a valid PTE */
#define PTE_READ_ONLY BIT(0)
#define PTE_LM BIT(1)
@ -303,20 +317,20 @@ struct i915_address_space {
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void (*insert_entries)(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void (*raw_insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void (*raw_insert_entries)(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void (*cleanup)(struct i915_address_space *vm);
@ -493,7 +507,7 @@ static inline void i915_vm_put(struct i915_address_space *vm)
/**
* i915_vm_resv_put - Release a reference on the vm's reservation lock
* @resv: Pointer to a reservation lock obtained from i915_vm_resv_get()
* @vm: The vm whose reservation lock reference we want to release
*/
static inline void i915_vm_resv_put(struct i915_address_space *vm)
{
@ -563,7 +577,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
void intel_ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void intel_ggtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res);
@ -641,7 +655,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res);

View File

@ -1370,7 +1370,9 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
cs, GEN12_GFX_CCS_AUX_NV);
/* Wa_16014892111 */
if (IS_DG2(ce->engine->i915))
if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
IS_DG2(ce->engine->i915))
cs = dg2_emit_draw_watermark_setting(cs);
return cs;

View File

@ -45,7 +45,9 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
* Insert a dummy PTE into every PT that will map to LMEM to ensure
* we have a correctly setup PDE structure for later use.
*/
vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM);
vm->insert_page(vm, 0, d->offset,
i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
PTE_LM);
GEM_BUG_ON(!pt->is_compact);
d->offset += SZ_2M;
}
@ -63,7 +65,9 @@ static void xehpsdv_insert_pte(struct i915_address_space *vm,
* alignment is 64K underneath for the pt, and we are careful
* not to access the space in the void.
*/
vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM);
vm->insert_page(vm, px_dma(pt), d->offset,
i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
PTE_LM);
d->offset += SZ_64K;
}
@ -73,7 +77,8 @@ static void insert_pte(struct i915_address_space *vm,
{
struct insert_pte_data *d = data;
vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
vm->insert_page(vm, px_dma(pt), d->offset,
i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
d->offset += PAGE_SIZE;
}
@ -356,13 +361,13 @@ static int max_pte_pkt_size(struct i915_request *rq, int pkt)
static int emit_pte(struct i915_request *rq,
struct sgt_dma *it,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
u64 offset,
int length)
{
bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
const u64 encode = rq->context->vm->pte_encode(0, cache_level,
const u64 encode = rq->context->vm->pte_encode(0, pat_index,
is_lmem ? PTE_LM : 0);
struct intel_ring *ring = rq->ring;
int pkt, dword_length;
@ -673,17 +678,17 @@ int
intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out)
{
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
struct drm_i915_private *i915 = ce->engine->i915;
u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
unsigned int ccs_pat_index;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
@ -707,12 +712,12 @@ intel_context_migrate_copy(struct intel_context *ce,
dst_sz = scatter_list_length(dst);
if (src_is_lmem) {
it_ccs = it_dst;
ccs_cache_level = dst_cache_level;
ccs_pat_index = dst_pat_index;
ccs_is_src = false;
} else if (dst_is_lmem) {
bytes_to_cpy = dst_sz;
it_ccs = it_src;
ccs_cache_level = src_cache_level;
ccs_pat_index = src_pat_index;
ccs_is_src = true;
}
@ -773,7 +778,7 @@ intel_context_migrate_copy(struct intel_context *ce,
src_sz = calculate_chunk_sz(i915, src_is_lmem,
bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
len = emit_pte(rq, &it_src, src_pat_index, src_is_lmem,
src_offset, src_sz);
if (!len) {
err = -EINVAL;
@ -784,7 +789,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
}
err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
err = emit_pte(rq, &it_dst, dst_pat_index, dst_is_lmem,
dst_offset, len);
if (err < 0)
goto out_rq;
@ -811,7 +816,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
ccs_sz = GET_CCS_BYTES(i915, len);
err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
err = emit_pte(rq, &it_ccs, ccs_pat_index, false,
ccs_is_src ? src_offset : dst_offset,
ccs_sz);
if (err < 0)
@ -920,7 +925,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
if (HAS_FLAT_CCS(i915) && ver >= 12)
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
ring_sz = XY_FAST_COLOR_BLT_DW;
else if (ver >= 8)
ring_sz = 8;
@ -931,7 +936,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size,
if (IS_ERR(cs))
return PTR_ERR(cs);
if (HAS_FLAT_CCS(i915) && ver >= 12) {
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
(XY_FAST_COLOR_BLT_DW - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
@ -979,7 +984,7 @@ int
intel_context_migrate_clear(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out)
@ -1027,7 +1032,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
len = emit_pte(rq, &it, pat_index, is_lmem, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
@ -1074,10 +1079,10 @@ int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out)
{
@ -1098,8 +1103,8 @@ int intel_migrate_copy(struct intel_migrate *m,
goto out;
err = intel_context_migrate_copy(ce, deps,
src, src_cache_level, src_is_lmem,
dst, dst_cache_level, dst_is_lmem,
src, src_pat_index, src_is_lmem,
dst, dst_pat_index, dst_is_lmem,
out);
intel_context_unpin(ce);
@ -1113,7 +1118,7 @@ intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out)
@ -1134,7 +1139,7 @@ intel_migrate_clear(struct intel_migrate *m,
if (err)
goto out;
err = intel_context_migrate_clear(ce, deps, sg, cache_level,
err = intel_context_migrate_clear(ce, deps, sg, pat_index,
is_lmem, value, out);
intel_context_unpin(ce);

View File

@ -16,7 +16,6 @@ struct i915_request;
struct i915_gem_ww_ctx;
struct intel_gt;
struct scatterlist;
enum i915_cache_level;
int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt);
@ -26,20 +25,20 @@ int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out);
int intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
unsigned int dst_pat_index,
bool dst_is_lmem,
struct i915_request **out);
@ -48,7 +47,7 @@ intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out);
@ -56,7 +55,7 @@ int
intel_context_migrate_clear(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
u32 value,
struct i915_request **out);

View File

@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
#define LE_COS(value) ((value) << 15)
#define LE_SSE(value) ((value) << 17)
/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
#define _L4_CACHEABILITY(value) ((value) << 2)
#define IG_PAT(value) ((value) << 8)
/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
#define L3_ESC(value) ((value) << 0)
#define L3_SCC(value) ((value) << 1)
@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
/* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
#define PVC_NUM_MOCS_ENTRIES 3
#define MTL_NUM_MOCS_ENTRIES 16
/* (e)LLC caching options */
/*
@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
#define L3_2_RESERVED _L3_CACHEABILITY(2)
#define L3_3_WB _L3_CACHEABILITY(3)
/* L4 caching options */
#define L4_0_WB _L4_CACHEABILITY(0)
#define L4_1_WT _L4_CACHEABILITY(1)
#define L4_2_RESERVED _L4_CACHEABILITY(2)
#define L4_3_UC _L4_CACHEABILITY(3)
#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
[__idx] = { \
.control_value = __control_value, \
@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
MOCS_ENTRY(2, 0, L3_3_WB),
};
static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
/* Error - Reserved for Non-Use */
MOCS_ENTRY(0,
IG_PAT(0),
L3_LKUP(1) | L3_3_WB),
/* Cached - L3 + L4 */
MOCS_ENTRY(1,
IG_PAT(1),
L3_LKUP(1) | L3_3_WB),
/* L4 - GO:L3 */
MOCS_ENTRY(2,
IG_PAT(1),
L3_LKUP(1) | L3_1_UC),
/* Uncached - GO:L3 */
MOCS_ENTRY(3,
IG_PAT(1) | L4_3_UC,
L3_LKUP(1) | L3_1_UC),
/* L4 - GO:Mem */
MOCS_ENTRY(4,
IG_PAT(1),
L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
/* Uncached - GO:Mem */
MOCS_ENTRY(5,
IG_PAT(1) | L4_3_UC,
L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
/* L4 - L3:NoLKUP; GO:L3 */
MOCS_ENTRY(6,
IG_PAT(1),
L3_1_UC),
/* Uncached - L3:NoLKUP; GO:L3 */
MOCS_ENTRY(7,
IG_PAT(1) | L4_3_UC,
L3_1_UC),
/* L4 - L3:NoLKUP; GO:Mem */
MOCS_ENTRY(8,
IG_PAT(1),
L3_GLBGO(1) | L3_1_UC),
/* Uncached - L3:NoLKUP; GO:Mem */
MOCS_ENTRY(9,
IG_PAT(1) | L4_3_UC,
L3_GLBGO(1) | L3_1_UC),
/* Display - L3; L4:WT */
MOCS_ENTRY(14,
IG_PAT(1) | L4_1_WT,
L3_LKUP(1) | L3_3_WB),
/* CCS - Non-Displayable */
MOCS_ENTRY(15,
IG_PAT(1),
L3_GLBGO(1) | L3_1_UC),
};
enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE;
if (IS_PONTEVECCHIO(i915)) {
if (IS_METEORLAKE(i915)) {
table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES;
table->uc_index = 9;
table->unused_entries_index = 1;
} else if (IS_PONTEVECCHIO(i915)) {
table->size = ARRAY_SIZE(pvc_mocs_table);
table->table = pvc_mocs_table;
table->n_entries = PVC_NUM_MOCS_ENTRIES;

View File

@ -181,7 +181,7 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
unsigned int pat_index,
u32 flags)
{
u32 pte_flags;
@ -199,7 +199,7 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
if (vma_res->bi.lmem)
pte_flags |= PTE_LM;
vm->insert_entries(vm, vma_res, cache_level, pte_flags);
vm->insert_entries(vm, vma_res, pat_index, pte_flags);
wmb();
}

View File

@ -53,11 +53,6 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
return rc6_to_gt(rc)->i915;
}
static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
{
intel_uncore_write_fw(uncore, reg, val);
}
static void gen11_rc6_enable(struct intel_rc6 *rc6)
{
struct intel_gt *gt = rc6_to_gt(rc6);
@ -72,19 +67,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
*/
if (!intel_uc_uses_guc_rc(&gt->uc)) {
/* 2b: Program RC6 thresholds.*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
set(uncore, GEN6_RC_SLEEP, 0);
intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
}
/*
@ -105,8 +100,8 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* Broadwell+, To be conservative, we want to factor in a context
* switch on top (due to ksoftirqd).
*/
set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
/* 3a: Enable RC6
*
@ -122,8 +117,14 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
/* Wa_16011777198 - Render powergating must remain disabled */
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
/*
* Wa_16011777198 and BSpec 52698 - Render powergating must be off.
* FIXME BSpec is outdated, disabling powergating for MTL is just
* temporary wa and should be removed after fixing real cause
* of forcewake timeouts.
*/
if (IS_METEORLAKE(gt->i915) ||
IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
pg_enable =
GEN9_MEDIA_PG_ENABLE |
@ -141,7 +142,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
VDN_MFX_POWERGATE_ENABLE(i));
}
set(uncore, GEN9_PG_ENABLE, pg_enable);
intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
}
static void gen9_rc6_enable(struct intel_rc6 *rc6)
@ -152,26 +153,26 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
/* 2b: Program RC6 thresholds.*/
if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) {
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
} else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
/*
* WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
* when CPG is enabled
*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
} else {
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
}
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
set(uncore, GEN6_RC_SLEEP, 0);
intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
/*
* 2c: Program Coarse Power Gating Policies.
@ -194,11 +195,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
* conservative, we have to factor in a context switch on top (due
* to ksoftirqd).
*/
set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
/* 3a: Enable RC6 */
set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
@ -210,7 +211,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
* - Render/Media PG need to be disabled with RC6.
*/
if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
set(uncore, GEN9_PG_ENABLE,
intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
@ -221,13 +222,13 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
enum intel_engine_id id;
/* 2b: Program RC6 thresholds.*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
/* 3: Enable RC6 */
rc6->ctl_enable =
@ -245,20 +246,20 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
u32 rc6vids, rc6_mask;
int ret;
set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
set(uncore, GEN6_RC1e_THRESHOLD, 1000);
set(uncore, GEN6_RC6_THRESHOLD, 50000);
set(uncore, GEN6_RC6p_THRESHOLD, 150000);
set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000);
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000);
intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000);
intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
/* We don't use those on Haswell */
rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
@ -372,19 +373,19 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
enum intel_engine_id id;
/* 2a: Program RC6 thresholds.*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC_SLEEP, 0);
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
/* TO threshold set to 500 us (0x186 * 1.28 us) */
set(uncore, GEN6_RC6_THRESHOLD, 0x186);
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186);
/* Allows RC6 residency counter to work */
set(uncore, VLV_COUNTER_CONTROL,
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
VLV_MEDIA_RC6_COUNT_EN |
VLV_RENDER_RC6_COUNT_EN));
@ -399,17 +400,17 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
struct intel_engine_cs *engine;
enum intel_engine_id id;
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_engine(engine, rc6_to_gt(rc6), id)
set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
set(uncore, GEN6_RC6_THRESHOLD, 0x557);
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557);
/* Allows RC6 residency counter to work */
set(uncore, VLV_COUNTER_CONTROL,
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
VLV_MEDIA_RC0_COUNT_EN |
VLV_RENDER_RC0_COUNT_EN |
@ -575,9 +576,9 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 9)
set(uncore, GEN9_PG_ENABLE, 0);
set(uncore, GEN6_RC_CONTROL, 0);
set(uncore, GEN6_RC_STATE, 0);
intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
}
@ -684,7 +685,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
return;
/* Restore HW timers for automatic RC6 entry while busy */
set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
}
void intel_rc6_park(struct intel_rc6 *rc6)
@ -704,7 +705,7 @@ void intel_rc6_park(struct intel_rc6 *rc6)
return;
/* Turn off the HW timers and go directly to rc6 */
set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
if (HAS_RC6pp(rc6_to_i915(rc6)))
target = 0x6; /* deepest rc6 */
@ -712,7 +713,7 @@ void intel_rc6_park(struct intel_rc6 *rc6)
target = 0x5; /* deep rc6 */
else
target = 0x4; /* normal rc6 */
set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
}
void intel_rc6_disable(struct intel_rc6 *rc6)
@ -735,7 +736,7 @@ void intel_rc6_fini(struct intel_rc6 *rc6)
/* We want the BIOS C6 state preserved across loads for MTL */
if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured)
set(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
pctx = fetch_and_zero(&rc6->pctx);
if (pctx)
@ -766,17 +767,17 @@ static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
* before we have set the default VLV_COUNTER_CONTROL value. So always
* set the high bit to be safe.
*/
set(uncore, VLV_COUNTER_CONTROL,
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
upper = intel_uncore_read_fw(uncore, reg);
do {
tmp = upper;
set(uncore, VLV_COUNTER_CONTROL,
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
lower = intel_uncore_read_fw(uncore, reg);
set(uncore, VLV_COUNTER_CONTROL,
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
upper = intel_uncore_read_fw(uncore, reg);
} while (upper != tmp && --loop);

View File

@ -812,11 +812,25 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
}
static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
dg2_ctx_gt_tuning_init(engine, wal);
if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}
static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
mtl_ctx_gt_tuning_init(engine, wal);
if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
/* Wa_14014947963 */
@ -1695,14 +1709,20 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018778641 / Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
/* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_14015795083 */
wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
}
/*
@ -1715,8 +1735,8 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0)) {
/*
* Wa_14018778641
* Wa_18018781329
*
* Note that although these registers are MCR on the primary
@ -1725,7 +1745,6 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
}
debug_dump_steering(gt);
}
@ -1743,6 +1762,13 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
if (IS_METEORLAKE(gt->i915)) {
if (gt->type != GT_MEDIA)
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}
if (IS_PONTEVECCHIO(gt->i915)) {
wa_mcr_write(wal, XEHPC_L3SCRUB,
SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
@ -2939,7 +2965,7 @@ static void
add_render_compute_tuning_settings(struct drm_i915_private *i915,
struct i915_wa_list *wal)
{
if (IS_DG2(i915))
if (IS_METEORLAKE(i915) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*

View File

@ -5,6 +5,7 @@
#include <linux/sort.h>
#include "gt/intel_gt_print.h"
#include "i915_selftest.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
@ -402,7 +403,7 @@ static int live_engine_pm(void *arg)
/* gt wakeref is async (deferred to workqueue) */
if (intel_gt_pm_wait_for_idle(gt)) {
pr_err("GT failed to idle\n");
gt_err(gt, "GT failed to idle\n");
return -EINVAL;
}
}

View File

@ -137,7 +137,7 @@ err_free_src:
static int intel_context_copy_ccs(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
@ -185,7 +185,7 @@ static int intel_context_copy_ccs(struct intel_context *ce,
if (err)
goto out_rq;
len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
@ -223,7 +223,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
@ -243,7 +243,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m,
if (err)
goto out;
err = intel_context_copy_ccs(ce, deps, sg, cache_level,
err = intel_context_copy_ccs(ce, deps, sg, pat_index,
write_to_ccs, out);
intel_context_unpin(ce);
@ -300,7 +300,7 @@ static int clear(struct intel_migrate *migrate,
/* Write the obj data into ccs surface */
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
obj->cache_level,
obj->pat_index,
true, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
@ -351,7 +351,7 @@ static int clear(struct intel_migrate *migrate,
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
obj->cache_level,
obj->pat_index,
false, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
@ -414,9 +414,9 @@ static int __migrate_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_migrate_copy(migrate, ww, NULL,
src->mm.pages->sgl, src->cache_level,
src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->cache_level,
dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@ -428,9 +428,9 @@ static int __global_copy(struct intel_migrate *migrate,
struct i915_request **out)
{
return intel_context_migrate_copy(migrate->context, NULL,
src->mm.pages->sgl, src->cache_level,
src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->cache_level,
dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
@ -455,7 +455,7 @@ static int __migrate_clear(struct intel_migrate *migrate,
{
return intel_migrate_clear(migrate, ww, NULL,
obj->mm.pages->sgl,
obj->cache_level,
obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@ -468,7 +468,7 @@ static int __global_clear(struct intel_migrate *migrate,
{
return intel_context_migrate_clear(migrate->context, NULL,
obj->mm.pages->sgl,
obj->cache_level,
obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
@ -648,7 +648,7 @@ static int live_emit_pte_full_ring(void *arg)
*/
pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
it = sg_sgt(obj->mm.pages->sgl);
len = emit_pte(rq, &it, obj->cache_level, false, 0, CHUNK_SZ);
len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
if (!len) {
err = -EINVAL;
goto out_rq;
@ -844,7 +844,7 @@ static int wrap_ktime_compare(const void *A, const void *B)
static int __perf_clear_blt(struct intel_context *ce,
struct scatterlist *sg,
enum i915_cache_level cache_level,
unsigned int pat_index,
bool is_lmem,
size_t sz)
{
@ -858,7 +858,7 @@ static int __perf_clear_blt(struct intel_context *ce,
t0 = ktime_get();
err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
is_lmem, 0, &rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
@ -904,7 +904,8 @@ static int perf_clear_blt(void *arg)
err = __perf_clear_blt(gt->migrate.context,
dst->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sizes[i]);
@ -919,10 +920,10 @@ static int perf_clear_blt(void *arg)
static int __perf_copy_blt(struct intel_context *ce,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
unsigned int dst_pat_index,
bool dst_is_lmem,
size_t sz)
{
@ -937,9 +938,9 @@ static int __perf_copy_blt(struct intel_context *ce,
t0 = ktime_get();
err = intel_context_migrate_copy(ce, NULL,
src, src_cache_level,
src, src_pat_index,
src_is_lmem,
dst, dst_cache_level,
dst, dst_pat_index,
dst_is_lmem,
&rq);
if (rq) {
@ -994,10 +995,12 @@ static int perf_copy_blt(void *arg)
err = __perf_copy_blt(gt->migrate.context,
src->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sz);

View File

@ -131,13 +131,14 @@ static int read_mocs_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table,
u32 *offset)
{
struct intel_gt *gt = rq->engine->gt;
u32 addr;
if (!table)
return 0;
if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
addr = global_mocs_offset();
addr = global_mocs_offset() + gt->uncore->gsi_offset;
else
addr = mocs_offset(rq->engine);

Some files were not shown because too many files have changed in this diff Show More