mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
drm/i915/perf: More documentation hooked to i915.rst
This adds a 'Perf' section to i915.rst with the following sub sections: - Overview - Comparison with Core Perf - i915 Driver Entry Points - i915 Perf Stream - i915 Perf Observation Architecture Stream - All i915 Perf Internals v2: section headers in i915.rst (Daniel Vetter) missing symbol docs + other fixups (Matthew Auld) Signed-off-by: Robert Bragg <robert@sixbynine.org> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/20161207214033.3581-1-robert@sixbynine.org
This commit is contained in:
parent
b3b8e99984
commit
16d98b31f8
@ -356,4 +356,95 @@ switch_mm
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
|
||||
:doc: switch_mm tracepoint
|
||||
|
||||
Perf
|
||||
====
|
||||
|
||||
Overview
|
||||
--------
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:doc: i915 Perf Overview
|
||||
|
||||
Comparison with Core Perf
|
||||
-------------------------
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:doc: i915 Perf History and Comparison with Core Perf
|
||||
|
||||
i915 Driver Entry Points
|
||||
------------------------
|
||||
|
||||
This section covers the entrypoints exported outside of i915_perf.c to
|
||||
integrate with drm/i915 and to handle the `DRM_I915_PERF_OPEN` ioctl.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_init
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_fini
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_register
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_unregister
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_open_ioctl
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_release
|
||||
|
||||
i915 Perf Stream
|
||||
----------------
|
||||
|
||||
This section covers the stream-semantics-agnostic structures and functions
|
||||
for representing an i915 perf stream FD and associated file operations.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
|
||||
:functions: i915_perf_stream
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
|
||||
:functions: i915_perf_stream_ops
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: read_properties_unlocked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_open_ioctl_locked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_destroy_locked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_read
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_ioctl
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_enable_locked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_disable_locked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_poll
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_perf_poll_locked
|
||||
|
||||
i915 Perf Observation Architecture Stream
|
||||
-----------------------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_drv.h
|
||||
:functions: i915_oa_ops
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_stream_init
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_read
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_stream_enable
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_stream_disable
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_wait_unlocked
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:functions: i915_oa_poll_wait
|
||||
|
||||
All i915 Perf Internals
|
||||
-----------------------
|
||||
|
||||
This section simply includes all currently documented i915 perf internals, in
|
||||
no particular order, but may include some more minor utilities or platform
|
||||
specific details than found in the more high-level sections.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c
|
||||
:internal:
|
||||
|
||||
.. WARNING: DOCPROC directive not supported: !Cdrivers/gpu/drm/i915/i915_irq.c
|
||||
|
@ -1930,89 +1930,186 @@ struct i915_oa_reg {
|
||||
|
||||
struct i915_perf_stream;
|
||||
|
||||
/**
|
||||
* struct i915_perf_stream_ops - the OPs to support a specific stream type
|
||||
*/
|
||||
struct i915_perf_stream_ops {
|
||||
/* Enables the collection of HW samples, either in response to
|
||||
* I915_PERF_IOCTL_ENABLE or implicitly called when stream is
|
||||
* opened without I915_PERF_FLAG_DISABLED.
|
||||
/**
|
||||
* @enable: Enables the collection of HW samples, either in response to
|
||||
* `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
|
||||
* without `I915_PERF_FLAG_DISABLED`.
|
||||
*/
|
||||
void (*enable)(struct i915_perf_stream *stream);
|
||||
|
||||
/* Disables the collection of HW samples, either in response to
|
||||
* I915_PERF_IOCTL_DISABLE or implicitly called before
|
||||
* destroying the stream.
|
||||
/**
|
||||
* @disable: Disables the collection of HW samples, either in response
|
||||
* to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
|
||||
* the stream.
|
||||
*/
|
||||
void (*disable)(struct i915_perf_stream *stream);
|
||||
|
||||
/* Call poll_wait, passing a wait queue that will be woken
|
||||
/**
|
||||
* @poll_wait: Call poll_wait, passing a wait queue that will be woken
|
||||
* once there is something ready to read() for the stream
|
||||
*/
|
||||
void (*poll_wait)(struct i915_perf_stream *stream,
|
||||
struct file *file,
|
||||
poll_table *wait);
|
||||
|
||||
/* For handling a blocking read, wait until there is something
|
||||
* to ready to read() for the stream. E.g. wait on the same
|
||||
/**
|
||||
* @wait_unlocked: For handling a blocking read, wait until there is
|
||||
* something to ready to read() for the stream. E.g. wait on the same
|
||||
* wait queue that would be passed to poll_wait().
|
||||
*/
|
||||
int (*wait_unlocked)(struct i915_perf_stream *stream);
|
||||
|
||||
/* read - Copy buffered metrics as records to userspace
|
||||
* @buf: the userspace, destination buffer
|
||||
* @count: the number of bytes to copy, requested by userspace
|
||||
* @offset: zero at the start of the read, updated as the read
|
||||
* proceeds, it represents how many bytes have been
|
||||
* copied so far and the buffer offset for copying the
|
||||
* next record.
|
||||
/**
|
||||
* @read: Copy buffered metrics as records to userspace
|
||||
* **buf**: the userspace, destination buffer
|
||||
* **count**: the number of bytes to copy, requested by userspace
|
||||
* **offset**: zero at the start of the read, updated as the read
|
||||
* proceeds, it represents how many bytes have been copied so far and
|
||||
* the buffer offset for copying the next record.
|
||||
*
|
||||
* Copy as many buffered i915 perf samples and records for
|
||||
* this stream to userspace as will fit in the given buffer.
|
||||
* Copy as many buffered i915 perf samples and records for this stream
|
||||
* to userspace as will fit in the given buffer.
|
||||
*
|
||||
* Only write complete records; returning -ENOSPC if there
|
||||
* isn't room for a complete record.
|
||||
* Only write complete records; returning -%ENOSPC if there isn't room
|
||||
* for a complete record.
|
||||
*
|
||||
* Return any error condition that results in a short read
|
||||
* such as -ENOSPC or -EFAULT, even though these may be
|
||||
* squashed before returning to userspace.
|
||||
* Return any error condition that results in a short read such as
|
||||
* -%ENOSPC or -%EFAULT, even though these may be squashed before
|
||||
* returning to userspace.
|
||||
*/
|
||||
int (*read)(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
size_t *offset);
|
||||
|
||||
/* Cleanup any stream specific resources.
|
||||
/**
|
||||
* @destroy: Cleanup any stream specific resources.
|
||||
*
|
||||
* The stream will always be disabled before this is called.
|
||||
*/
|
||||
void (*destroy)(struct i915_perf_stream *stream);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct i915_perf_stream - state for a single open stream FD
|
||||
*/
|
||||
struct i915_perf_stream {
|
||||
/**
|
||||
* @dev_priv: i915 drm device
|
||||
*/
|
||||
struct drm_i915_private *dev_priv;
|
||||
|
||||
/**
|
||||
* @link: Links the stream into ``&drm_i915_private->streams``
|
||||
*/
|
||||
struct list_head link;
|
||||
|
||||
/**
|
||||
* @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
|
||||
* properties given when opening a stream, representing the contents
|
||||
* of a single sample as read() by userspace.
|
||||
*/
|
||||
u32 sample_flags;
|
||||
|
||||
/**
|
||||
* @sample_size: Considering the configured contents of a sample
|
||||
* combined with the required header size, this is the total size
|
||||
* of a single sample record.
|
||||
*/
|
||||
int sample_size;
|
||||
|
||||
/**
|
||||
* @ctx: %NULL if measuring system-wide across all contexts or a
|
||||
* specific context that is being monitored.
|
||||
*/
|
||||
struct i915_gem_context *ctx;
|
||||
|
||||
/**
|
||||
* @enabled: Whether the stream is currently enabled, considering
|
||||
* whether the stream was opened in a disabled state and based
|
||||
* on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
|
||||
*/
|
||||
bool enabled;
|
||||
|
||||
/**
|
||||
* @ops: The callbacks providing the implementation of this specific
|
||||
* type of configured stream.
|
||||
*/
|
||||
const struct i915_perf_stream_ops *ops;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct i915_oa_ops - Gen specific implementation of an OA unit stream
|
||||
*/
|
||||
struct i915_oa_ops {
|
||||
/**
|
||||
* @init_oa_buffer: Resets the head and tail pointers of the
|
||||
* circular buffer for periodic OA reports.
|
||||
*
|
||||
* Called when first opening a stream for OA metrics, but also may be
|
||||
* called in response to an OA buffer overflow or other error
|
||||
* condition.
|
||||
*
|
||||
* Note it may be necessary to clear the full OA buffer here as part of
|
||||
* maintaining the invariable that new reports must be written to
|
||||
* zeroed memory for us to be able to reliable detect if an expected
|
||||
* report has not yet landed in memory. (At least on Haswell the OA
|
||||
* buffer tail pointer is not synchronized with reports being visible
|
||||
* to the CPU)
|
||||
*/
|
||||
void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
|
||||
|
||||
/**
|
||||
* @enable_metric_set: Applies any MUX configuration to set up the
|
||||
* Boolean and Custom (B/C) counters that are part of the counter
|
||||
* reports being sampled. May apply system constraints such as
|
||||
* disabling EU clock gating as required.
|
||||
*/
|
||||
int (*enable_metric_set)(struct drm_i915_private *dev_priv);
|
||||
|
||||
/**
|
||||
* @disable_metric_set: Remove system constraints associated with using
|
||||
* the OA unit.
|
||||
*/
|
||||
void (*disable_metric_set)(struct drm_i915_private *dev_priv);
|
||||
|
||||
/**
|
||||
* @oa_enable: Enable periodic sampling
|
||||
*/
|
||||
void (*oa_enable)(struct drm_i915_private *dev_priv);
|
||||
|
||||
/**
|
||||
* @oa_disable: Disable periodic sampling
|
||||
*/
|
||||
void (*oa_disable)(struct drm_i915_private *dev_priv);
|
||||
void (*update_oacontrol)(struct drm_i915_private *dev_priv);
|
||||
void (*update_hw_ctx_id_locked)(struct drm_i915_private *dev_priv,
|
||||
u32 ctx_id);
|
||||
|
||||
/**
|
||||
* @read: Copy data from the circular OA buffer into a given userspace
|
||||
* buffer.
|
||||
*/
|
||||
int (*read)(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
size_t *offset);
|
||||
|
||||
/**
|
||||
* @oa_buffer_is_empty: Check if OA buffer empty (false positives OK)
|
||||
*
|
||||
* This is either called via fops or the poll check hrtimer (atomic
|
||||
* ctx) without any locks taken.
|
||||
*
|
||||
* It's safe to read OA config state here unlocked, assuming that this
|
||||
* is only called while the stream is enabled, while the global OA
|
||||
* configuration can't be modified.
|
||||
*
|
||||
* Efficiency is more important than avoiding some false positives
|
||||
* here, which will be handled gracefully - likely resulting in an
|
||||
* %EAGAIN error for userspace.
|
||||
*/
|
||||
bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
|
||||
};
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
|
||||
/**
|
||||
* DOC: i915 Perf, streaming API for GPU metrics
|
||||
* DOC: i915 Perf Overview
|
||||
*
|
||||
* Gen graphics supports a large number of performance counters that can help
|
||||
* driver and application developers understand and optimize their use of the
|
||||
@ -45,6 +45,10 @@
|
||||
* privileges by default, unless changed via the dev.i915.perf_event_paranoid
|
||||
* sysctl option.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* DOC: i915 Perf History and Comparison with Core Perf
|
||||
*
|
||||
* The interface was initially inspired by the core Perf infrastructure but
|
||||
* some notable differences are:
|
||||
@ -75,8 +79,8 @@
|
||||
* gets copied from the GPU mapped buffers to userspace buffers.
|
||||
*
|
||||
*
|
||||
* Some notes regarding Linux Perf:
|
||||
* --------------------------------
|
||||
* Issues hit with first prototype based on Core Perf
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* The first prototype of this driver was based on the core perf
|
||||
* infrastructure, and while we did make that mostly work, with some changes to
|
||||
@ -135,7 +139,7 @@
|
||||
* for combining with the side-band raw reports it captures using
|
||||
* MI_REPORT_PERF_COUNT commands.
|
||||
*
|
||||
* _ As a side note on perf's grouping feature; there was also some concern
|
||||
* - As a side note on perf's grouping feature; there was also some concern
|
||||
* that using PERF_FORMAT_GROUP as a way to pack together counter values
|
||||
* would quite drastically inflate our sample sizes, which would likely
|
||||
* lower the effective sampling resolutions we could use when the available
|
||||
@ -277,6 +281,20 @@ static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
|
||||
|
||||
#define SAMPLE_OA_REPORT (1<<0)
|
||||
|
||||
/**
|
||||
* struct perf_open_properties - for validated properties given to open a stream
|
||||
* @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
|
||||
* @single_context: Whether a single or all gpu contexts should be monitored
|
||||
* @ctx_handle: A gem ctx handle for use with @single_context
|
||||
* @metrics_set: An ID for an OA unit metric set advertised via sysfs
|
||||
* @oa_format: An OA unit HW report format
|
||||
* @oa_periodic: Whether to enable periodic OA unit sampling
|
||||
* @oa_period_exponent: The OA unit sampling period is derived from this
|
||||
*
|
||||
* As read_properties_unlocked() enumerates and validates the properties given
|
||||
* to open a stream of metrics the configuration is built up in the structure
|
||||
* which starts out zero initialized.
|
||||
*/
|
||||
struct perf_open_properties {
|
||||
u32 sample_flags;
|
||||
|
||||
@ -314,7 +332,19 @@ static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_pr
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a status record to a userspace read() buffer.
|
||||
* append_oa_status - Appends a status record to a userspace read() buffer.
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @offset: (inout): the current position for writing into @buf
|
||||
* @type: The kind of status to report to userspace
|
||||
*
|
||||
* Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
|
||||
* into the userspace read() buffer.
|
||||
*
|
||||
* The @buf @offset will only be updated on success.
|
||||
*
|
||||
* Returns: 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int append_oa_status(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
@ -336,7 +366,21 @@ static int append_oa_status(struct i915_perf_stream *stream,
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies single OA report into userspace read() buffer.
|
||||
* append_oa_sample - Copies single OA report into userspace read() buffer.
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @offset: (inout): the current position for writing into @buf
|
||||
* @report: A single OA report to (optionally) include as part of the sample
|
||||
*
|
||||
* The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
|
||||
* properties when opening a stream, tracked as `stream->sample_flags`. This
|
||||
* function copies the requested components of a single sample to the given
|
||||
* read() @buf.
|
||||
*
|
||||
* The @buf @offset will only be updated on success.
|
||||
*
|
||||
* Returns: 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int append_oa_sample(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
@ -380,10 +424,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
|
||||
* @head_ptr: (inout): the current oa buffer cpu read position
|
||||
* @tail: the current oa buffer gpu write position
|
||||
*
|
||||
* Returns 0 on success, negative error code on failure.
|
||||
*
|
||||
* Notably any error condition resulting in a short read (-ENOSPC or
|
||||
* -EFAULT) will be returned even though one or more records may
|
||||
* Notably any error condition resulting in a short read (-%ENOSPC or
|
||||
* -%EFAULT) will be returned even though one or more records may
|
||||
* have been successfully copied. In this case it's up to the caller
|
||||
* to decide if the error should be squashed before returning to
|
||||
* userspace.
|
||||
@ -392,6 +434,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
|
||||
* tail, so the head chases the tail?... If you think that's mad
|
||||
* and back-to-front you're not alone, but this follows the
|
||||
* Gen PRM naming convention.
|
||||
*
|
||||
* Returns: 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int gen7_append_oa_reports(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
@ -496,6 +540,22 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gen7_oa_read - copy status records then buffered OA reports
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @offset: (inout): the current position for writing into @buf
|
||||
*
|
||||
* Checks Gen 7 specific OA unit status registers and if necessary appends
|
||||
* corresponding status records for userspace (such as for a buffer full
|
||||
* condition) and then initiate appending any buffered OA reports.
|
||||
*
|
||||
* Updates @offset according to the number of bytes successfully copied into
|
||||
* the userspace buffer.
|
||||
*
|
||||
* Returns: zero on success or a negative error code
|
||||
*/
|
||||
static int gen7_oa_read(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
@ -597,6 +657,20 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_oa_wait_unlocked - handles blocking IO until OA data available
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
*
|
||||
* Called when userspace tries to read() from a blocking stream FD opened
|
||||
* for OA metrics. It waits until the hrtimer callback finds a non-empty
|
||||
* OA buffer and wakes us.
|
||||
*
|
||||
* Note: it's acceptable to have this return with some false positives
|
||||
* since any subsequent read handling will return -EAGAIN if there isn't
|
||||
* really data ready for userspace yet.
|
||||
*
|
||||
* Returns: zero on success or a negative error code
|
||||
*/
|
||||
static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = stream->dev_priv;
|
||||
@ -615,6 +689,16 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
|
||||
!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_oa_poll_wait - call poll_wait() for an OA stream poll()
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
* @file: An i915 perf stream file
|
||||
* @wait: poll() state table
|
||||
*
|
||||
* For handling userspace polling on an i915 perf stream opened for OA metrics,
|
||||
* this starts a poll_wait with the wait queue that our hrtimer callback wakes
|
||||
* when it sees data ready to read in the circular OA buffer.
|
||||
*/
|
||||
static void i915_oa_poll_wait(struct i915_perf_stream *stream,
|
||||
struct file *file,
|
||||
poll_table *wait)
|
||||
@ -624,6 +708,18 @@ static void i915_oa_poll_wait(struct i915_perf_stream *stream,
|
||||
poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_oa_read - just calls through to &i915_oa_ops->read
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @offset: (inout): the current position for writing into @buf
|
||||
*
|
||||
* Updates @offset according to the number of bytes successfully copied into
|
||||
* the userspace buffer.
|
||||
*
|
||||
* Returns: zero on success or a negative error code
|
||||
*/
|
||||
static int i915_oa_read(struct i915_perf_stream *stream,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
@ -634,9 +730,15 @@ static int i915_oa_read(struct i915_perf_stream *stream,
|
||||
return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
|
||||
}
|
||||
|
||||
/* Determine the render context hw id, and ensure it remains fixed for the
|
||||
/**
|
||||
* oa_get_render_ctx_id - determine and hold ctx hw id
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
*
|
||||
* Determine the render context hw id, and ensure it remains fixed for the
|
||||
* lifetime of the stream. This ensures that we don't have to worry about
|
||||
* updating the context ID in OACONTROL on the fly.
|
||||
*
|
||||
* Returns: zero on success or a negative error code
|
||||
*/
|
||||
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
|
||||
{
|
||||
@ -673,6 +775,13 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
|
||||
* @stream: An i915-perf stream opened for OA metrics
|
||||
*
|
||||
* In case anything needed doing to ensure the context HW ID would remain valid
|
||||
* for the lifetime of the stream, then that can be undone here.
|
||||
*/
|
||||
static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = stream->dev_priv;
|
||||
@ -945,6 +1054,15 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv)
|
||||
spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
|
||||
* @stream: An i915 perf stream opened for OA metrics
|
||||
*
|
||||
* [Re]enables hardware periodic sampling according to the period configured
|
||||
* when opening the stream. This also starts a hrtimer that will periodically
|
||||
* check for data in the circular OA buffer for notifying userspace (e.g.
|
||||
* during a read() or poll()).
|
||||
*/
|
||||
static void i915_oa_stream_enable(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = stream->dev_priv;
|
||||
@ -962,6 +1080,14 @@ static void gen7_oa_disable(struct drm_i915_private *dev_priv)
|
||||
I915_WRITE(GEN7_OACONTROL, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
|
||||
* @stream: An i915 perf stream opened for OA metrics
|
||||
*
|
||||
* Stops the OA unit from periodically writing counter reports into the
|
||||
* circular OA buffer. This also stops the hrtimer that periodically checks for
|
||||
* data in the circular OA buffer, for notifying userspace.
|
||||
*/
|
||||
static void i915_oa_stream_disable(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = stream->dev_priv;
|
||||
@ -987,6 +1113,24 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
|
||||
.read = i915_oa_read,
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_oa_stream_init - validate combined props for OA stream and init
|
||||
* @stream: An i915 perf stream
|
||||
* @param: The open parameters passed to `DRM_I915_PERF_OPEN`
|
||||
* @props: The property state that configures stream (individually validated)
|
||||
*
|
||||
* While read_properties_unlocked() validates properties in isolation it
|
||||
* doesn't ensure that the combination necessarily makes sense.
|
||||
*
|
||||
* At this point it has been determined that userspace wants a stream of
|
||||
* OA metrics, but still we need to further validate the combined
|
||||
* properties are OK.
|
||||
*
|
||||
* If the configuration makes sense then we can allocate memory for
|
||||
* a circular OA buffer and apply the requested metric set configuration.
|
||||
*
|
||||
* Returns: zero on success or a negative error code.
|
||||
*/
|
||||
static int i915_oa_stream_init(struct i915_perf_stream *stream,
|
||||
struct drm_i915_perf_open_param *param,
|
||||
struct perf_open_properties *props)
|
||||
@ -1111,6 +1255,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
|
||||
* @stream: An i915 perf stream
|
||||
* @file: An i915 perf stream file
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @ppos: (inout) file seek position (unused)
|
||||
*
|
||||
* Besides wrapping &i915_perf_stream_ops->read this provides a common place to
|
||||
* ensure that if we've successfully copied any data then reporting that takes
|
||||
* precedence over any internal error status, so the data isn't lost.
|
||||
*
|
||||
* For example ret will be -ENOSPC whenever there is more buffered data than
|
||||
* can be copied to userspace, but that's only interesting if we weren't able
|
||||
* to copy some data because it implies the userspace buffer is too small to
|
||||
* receive a single record (and we never split records).
|
||||
*
|
||||
* Another case with ret == -EFAULT is more of a grey area since it would seem
|
||||
* like bad form for userspace to ask us to overrun its buffer, but the user
|
||||
* knows best:
|
||||
*
|
||||
* http://yarchive.net/comp/linux/partial_reads_writes.html
|
||||
*
|
||||
* Returns: The number of bytes copied or a negative error code on failure.
|
||||
*/
|
||||
static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
|
||||
struct file *file,
|
||||
char __user *buf,
|
||||
@ -1126,25 +1295,27 @@ static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
|
||||
size_t offset = 0;
|
||||
int ret = stream->ops->read(stream, buf, count, &offset);
|
||||
|
||||
/* If we've successfully copied any data then reporting that
|
||||
* takes precedence over any internal error status, so the
|
||||
* data isn't lost.
|
||||
*
|
||||
* For example ret will be -ENOSPC whenever there is more
|
||||
* buffered data than can be copied to userspace, but that's
|
||||
* only interesting if we weren't able to copy some data
|
||||
* because it implies the userspace buffer is too small to
|
||||
* receive a single record (and we never split records).
|
||||
*
|
||||
* Another case with ret == -EFAULT is more of a grey area
|
||||
* since it would seem like bad form for userspace to ask us
|
||||
* to overrun its buffer, but the user knows best:
|
||||
*
|
||||
* http://yarchive.net/comp/linux/partial_reads_writes.html
|
||||
*/
|
||||
return offset ?: (ret ?: -EAGAIN);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_read - handles read() FOP for i915 perf stream FDs
|
||||
* @file: An i915 perf stream file
|
||||
* @buf: destination buffer given by userspace
|
||||
* @count: the number of bytes userspace wants to read
|
||||
* @ppos: (inout) file seek position (unused)
|
||||
*
|
||||
* The entry point for handling a read() on a stream file descriptor from
|
||||
* userspace. Most of the work is left to the i915_perf_read_locked() and
|
||||
* &i915_perf_stream_ops->read but to save having stream implementations (of
|
||||
* which we might have multiple later) we handle blocking read here.
|
||||
*
|
||||
* We can also consistently treat trying to read from a disabled stream
|
||||
* as an IO error so implementations can assume the stream is enabled
|
||||
* while reading.
|
||||
*
|
||||
* Returns: The number of bytes copied or a negative error code on failure.
|
||||
*/
|
||||
static ssize_t i915_perf_read(struct file *file,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
@ -1211,6 +1382,22 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
|
||||
* @dev_priv: i915 device instance
|
||||
* @stream: An i915 perf stream
|
||||
* @file: An i915 perf stream file
|
||||
* @wait: poll() state table
|
||||
*
|
||||
* For handling userspace polling on an i915 perf stream, this calls through to
|
||||
* &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
|
||||
* will be woken for new stream data.
|
||||
*
|
||||
* Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
|
||||
* with any non-file-operation driver hooks.
|
||||
*
|
||||
* Returns: any poll events that are ready without sleeping
|
||||
*/
|
||||
static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
|
||||
struct i915_perf_stream *stream,
|
||||
struct file *file,
|
||||
@ -1232,6 +1419,19 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_poll - call poll_wait() with a suitable wait queue for stream
|
||||
* @file: An i915 perf stream file
|
||||
* @wait: poll() state table
|
||||
*
|
||||
* For handling userspace polling on an i915 perf stream, this ensures
|
||||
* poll_wait() gets called with a wait queue that will be woken for new stream
|
||||
* data.
|
||||
*
|
||||
* Note: Implementation deferred to i915_perf_poll_locked()
|
||||
*
|
||||
* Returns: any poll events that are ready without sleeping
|
||||
*/
|
||||
static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
struct i915_perf_stream *stream = file->private_data;
|
||||
@ -1245,6 +1445,16 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
|
||||
* @stream: A disabled i915 perf stream
|
||||
*
|
||||
* [Re]enables the associated capture of data for this stream.
|
||||
*
|
||||
* If a stream was previously enabled then there's currently no intention
|
||||
* to provide userspace any guarantee about the preservation of previously
|
||||
* buffered data.
|
||||
*/
|
||||
static void i915_perf_enable_locked(struct i915_perf_stream *stream)
|
||||
{
|
||||
if (stream->enabled)
|
||||
@ -1257,6 +1467,20 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
|
||||
stream->ops->enable(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
|
||||
* @stream: An enabled i915 perf stream
|
||||
*
|
||||
* Disables the associated capture of data for this stream.
|
||||
*
|
||||
* The intention is that disabling an re-enabling a stream will ideally be
|
||||
* cheaper than destroying and re-opening a stream with the same configuration,
|
||||
* though there are no formal guarantees about what state or buffered data
|
||||
* must be retained between disabling and re-enabling a stream.
|
||||
*
|
||||
* Note: while a stream is disabled it's considered an error for userspace
|
||||
* to attempt to read from the stream (-EIO).
|
||||
*/
|
||||
static void i915_perf_disable_locked(struct i915_perf_stream *stream)
|
||||
{
|
||||
if (!stream->enabled)
|
||||
@ -1269,6 +1493,18 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
|
||||
stream->ops->disable(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
|
||||
* @stream: An i915 perf stream
|
||||
* @cmd: the ioctl request
|
||||
* @arg: the ioctl data
|
||||
*
|
||||
* Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
|
||||
* with any non-file-operation driver hooks.
|
||||
*
|
||||
* Returns: zero on success or a negative error code. Returns -EINVAL for
|
||||
* an unknown ioctl request.
|
||||
*/
|
||||
static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
|
||||
unsigned int cmd,
|
||||
unsigned long arg)
|
||||
@ -1285,6 +1521,17 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
|
||||
* @file: An i915 perf stream file
|
||||
* @cmd: the ioctl request
|
||||
* @arg: the ioctl data
|
||||
*
|
||||
* Implementation deferred to i915_perf_ioctl_locked().
|
||||
*
|
||||
* Returns: zero on success or a negative error code. Returns -EINVAL for
|
||||
* an unknown ioctl request.
|
||||
*/
|
||||
static long i915_perf_ioctl(struct file *file,
|
||||
unsigned int cmd,
|
||||
unsigned long arg)
|
||||
@ -1300,6 +1547,16 @@ static long i915_perf_ioctl(struct file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_destroy_locked - destroy an i915 perf stream
|
||||
* @stream: An i915 perf stream
|
||||
*
|
||||
* Frees all resources associated with the given i915 perf @stream, disabling
|
||||
* any associated data capture in the process.
|
||||
*
|
||||
* Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
|
||||
* with any non-file-operation driver hooks.
|
||||
*/
|
||||
static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
|
||||
{
|
||||
struct drm_i915_private *dev_priv = stream->dev_priv;
|
||||
@ -1321,6 +1578,17 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
|
||||
kfree(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_release - handles userspace close() of a stream file
|
||||
* @inode: anonymous inode associated with file
|
||||
* @file: An i915 perf stream file
|
||||
*
|
||||
* Cleans up any resources associated with an open i915 perf stream file.
|
||||
*
|
||||
* NB: close() can't really fail from the userspace point of view.
|
||||
*
|
||||
* Returns: zero on success or a negative error code.
|
||||
*/
|
||||
static int i915_perf_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct i915_perf_stream *stream = file->private_data;
|
||||
@ -1365,6 +1633,30 @@ lookup_context(struct drm_i915_private *dev_priv,
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
|
||||
* @dev_priv: i915 device instance
|
||||
* @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
|
||||
* @props: individually validated u64 property value pairs
|
||||
* @file: drm file
|
||||
*
|
||||
* See i915_perf_ioctl_open() for interface details.
|
||||
*
|
||||
* Implements further stream config validation and stream initialization on
|
||||
* behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
|
||||
* taken to serialize with any non-file-operation driver hooks.
|
||||
*
|
||||
* Note: at this point the @props have only been validated in isolation and
|
||||
* it's still necessary to validate that the combination of properties makes
|
||||
* sense.
|
||||
*
|
||||
* In the case where userspace is interested in OA unit metrics then further
|
||||
* config validation and stream initialization details will be handled by
|
||||
* i915_oa_stream_init(). The code here should only validate config state that
|
||||
* will be relevant to all stream types / backends.
|
||||
*
|
||||
* Returns: zero on success or a negative error code.
|
||||
*/
|
||||
static int
|
||||
i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
|
||||
struct drm_i915_perf_open_param *param,
|
||||
@ -1459,12 +1751,20 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Note we copy the properties from userspace outside of the i915 perf
|
||||
* mutex to avoid an awkward lockdep with mmap_sem.
|
||||
/**
|
||||
* read_properties_unlocked - validate + copy userspace stream open properties
|
||||
* @dev_priv: i915 device instance
|
||||
* @uprops: The array of u64 key value pairs given by userspace
|
||||
* @n_props: The number of key value pairs expected in @uprops
|
||||
* @props: The stream configuration built up while validating properties
|
||||
*
|
||||
* Note this function only validates properties in isolation it doesn't
|
||||
* validate that the combination of properties makes sense or that all
|
||||
* properties necessary for a particular kind of stream have been set.
|
||||
*
|
||||
* Note that there currently aren't any ordering requirements for properties so
|
||||
* we shouldn't validate or assume anything about ordering here. This doesn't
|
||||
* rule out defining new properties with ordering requirements in the future.
|
||||
*/
|
||||
static int read_properties_unlocked(struct drm_i915_private *dev_priv,
|
||||
u64 __user *uprops,
|
||||
@ -1586,6 +1886,30 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
|
||||
* @dev: drm device
|
||||
* @data: ioctl data copied from userspace (unvalidated)
|
||||
* @file: drm file
|
||||
*
|
||||
* Validates the stream open parameters given by userspace including flags
|
||||
* and an array of u64 key, value pair properties.
|
||||
*
|
||||
* Very little is assumed up front about the nature of the stream being
|
||||
* opened (for instance we don't assume it's for periodic OA unit metrics). An
|
||||
* i915-perf stream is expected to be a suitable interface for other forms of
|
||||
* buffered data written by the GPU besides periodic OA metrics.
|
||||
*
|
||||
* Note we copy the properties from userspace outside of the i915 perf
|
||||
* mutex to avoid an awkward lockdep with mmap_sem.
|
||||
*
|
||||
* Most of the implementation details are handled by
|
||||
* i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
|
||||
* mutex for serializing with any non-file-operation driver hooks.
|
||||
*
|
||||
* Return: A newly opened i915 Perf stream file descriptor or negative
|
||||
* error code on failure.
|
||||
*/
|
||||
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file)
|
||||
{
|
||||
@ -1622,6 +1946,14 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_register - exposes i915-perf to userspace
|
||||
* @dev_priv: i915 device instance
|
||||
*
|
||||
* In particular OA metric sets are advertised under a sysfs metrics/
|
||||
* directory allowing userspace to enumerate valid IDs that can be
|
||||
* used to open an i915-perf stream.
|
||||
*/
|
||||
void i915_perf_register(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (!IS_HASWELL(dev_priv))
|
||||
@ -1651,6 +1983,15 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
|
||||
mutex_unlock(&dev_priv->perf.lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_unregister - hide i915-perf from userspace
|
||||
* @dev_priv: i915 device instance
|
||||
*
|
||||
* i915-perf state cleanup is split up into an 'unregister' and
|
||||
* 'deinit' phase where the interface is first hidden from
|
||||
* userspace by i915_perf_unregister() before cleaning up
|
||||
* remaining state in i915_perf_fini().
|
||||
*/
|
||||
void i915_perf_unregister(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (!IS_HASWELL(dev_priv))
|
||||
@ -1707,6 +2048,15 @@ static struct ctl_table dev_root[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_perf_init - initialize i915-perf state on module load
|
||||
* @dev_priv: i915 device instance
|
||||
*
|
||||
* Initializes i915-perf state without exposing anything to userspace.
|
||||
*
|
||||
* Note: i915-perf initialization is split into an 'init' and 'register'
|
||||
* phase with the i915_perf_register() exposing state to userspace.
|
||||
*/
|
||||
void i915_perf_init(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (!IS_HASWELL(dev_priv))
|
||||
@ -1742,6 +2092,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
|
||||
dev_priv->perf.initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_perf_fini - Counter part to i915_perf_init()
|
||||
* @dev_priv: i915 device instance
|
||||
*/
|
||||
void i915_perf_fini(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (!dev_priv->perf.initialized)
|
||||
|
Loading…
Reference in New Issue
Block a user