mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 21:35:07 +00:00
habanalabs: add support for notification via eventfd
The driver will be able to send notification events towards a user process, using user's registered event file descriptor. The driver uses the notification mechanism to inform the user about an occurred event. A user thread can wait until a notification is received from the driver. The driver stores the occurred event until the user reads it, using HL_INFO_GET_EVENTS - new ioctl opcode in the INFO ioctl. Gaudi specific implementation includes sending a notification on a TPC assertion event that is received from f/w. Signed-off-by: Tal Cohen <talcohen@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
f2daa2d97e
commit
422ef17103
@ -285,6 +285,14 @@ static void hpriv_release(struct kref *ref)
|
||||
|
||||
hdev->compute_ctx_in_release = 0;
|
||||
|
||||
/* release the eventfd */
|
||||
if (hpriv->notifier_event.eventfd) {
|
||||
eventfd_ctx_put(hpriv->notifier_event.eventfd);
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
}
|
||||
|
||||
mutex_destroy(&hpriv->notifier_event.lock);
|
||||
|
||||
kfree(hpriv);
|
||||
}
|
||||
|
||||
@ -355,6 +363,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
|
||||
list_del(&hpriv->dev_node);
|
||||
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
|
||||
out:
|
||||
/* release the eventfd */
|
||||
if (hpriv->notifier_event.eventfd) {
|
||||
eventfd_ctx_put(hpriv->notifier_event.eventfd);
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
}
|
||||
|
||||
mutex_destroy(&hpriv->notifier_event.lock);
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
kfree(hpriv);
|
||||
@ -1506,6 +1521,43 @@ out_err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event)
|
||||
{
|
||||
mutex_lock(¬ifier_event->lock);
|
||||
notifier_event->events_mask |= event;
|
||||
if (notifier_event->eventfd)
|
||||
eventfd_signal(notifier_event->eventfd, 1);
|
||||
|
||||
mutex_unlock(¬ifier_event->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_notifier_event_send_all - notify all user processes via eventfd
|
||||
*
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @event: the occurred event
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
void hl_notifier_event_send_all(struct hl_device *hdev, u64 event)
|
||||
{
|
||||
struct hl_fpriv *hpriv;
|
||||
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
|
||||
list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
|
||||
hl_notifier_event_send(&hpriv->notifier_event, event);
|
||||
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
/* control device */
|
||||
mutex_lock(&hdev->fpriv_ctrl_list_lock);
|
||||
|
||||
list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
|
||||
hl_notifier_event_send(&hpriv->notifier_event, event);
|
||||
|
||||
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_init - main initialization function for habanalabs device
|
||||
*
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/sched/signal.h>
|
||||
@ -1932,6 +1933,18 @@ struct hl_debug_params {
|
||||
bool enable;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_notifier_event - holds the notifier data structure
|
||||
* @eventfd: the event file descriptor to raise the notifications
|
||||
* @lock: mutex lock to protect the notifier data flows
|
||||
* @events_mask: indicates the bitmap events
|
||||
*/
|
||||
struct hl_notifier_event {
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct mutex lock;
|
||||
u64 events_mask;
|
||||
};
|
||||
|
||||
/*
|
||||
* FILE PRIVATE STRUCTURE
|
||||
*/
|
||||
@ -1943,24 +1956,25 @@ struct hl_debug_params {
|
||||
* @taskpid: current process ID.
|
||||
* @ctx: current executing context. TODO: remove for multiple ctx per process
|
||||
* @ctx_mgr: context manager to handle multiple context for this FD.
|
||||
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
|
||||
* @mem_mgr: manager descriptor for memory exportable via mmap
|
||||
* @notifier_event: notifier eventfd towards user process
|
||||
* @debugfs_list: list of relevant ASIC debugfs.
|
||||
* @dev_node: node in the device list of file private data
|
||||
* @refcount: number of related contexts.
|
||||
* @restore_phase_mutex: lock for context switch and restore phase.
|
||||
*/
|
||||
struct hl_fpriv {
|
||||
struct hl_device *hdev;
|
||||
struct file *filp;
|
||||
struct pid *taskpid;
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_ctx_mgr ctx_mgr;
|
||||
struct hl_mem_mgr mem_mgr;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head dev_node;
|
||||
struct kref refcount;
|
||||
struct mutex restore_phase_mutex;
|
||||
struct hl_device *hdev;
|
||||
struct file *filp;
|
||||
struct pid *taskpid;
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_ctx_mgr ctx_mgr;
|
||||
struct hl_mem_mgr mem_mgr;
|
||||
struct hl_notifier_event notifier_event;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head dev_node;
|
||||
struct kref refcount;
|
||||
struct mutex restore_phase_mutex;
|
||||
};
|
||||
|
||||
|
||||
@ -2676,8 +2690,8 @@ struct hl_reset_info {
|
||||
* @state_dump_specs: constants and dictionaries needed to dump system state.
|
||||
* @multi_cs_completion: array of multi-CS completion.
|
||||
* @clk_throttling: holds information about current/previous clock throttling events
|
||||
* @reset_info: holds current device reset information.
|
||||
* @last_error: holds information about last session in which CS timeout or razwi error occurred.
|
||||
* @reset_info: holds current device reset information.
|
||||
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
|
||||
* @fw_major_version: major version of current loaded preboot
|
||||
* @dram_used_mem: current DRAM memory consumption.
|
||||
@ -3071,6 +3085,8 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
|
||||
int hl_build_hwmon_channel_info(struct hl_device *hdev,
|
||||
struct cpucp_sensor *sensors_arr);
|
||||
|
||||
void hl_notifier_event_send_all(struct hl_device *hdev, u64 event);
|
||||
|
||||
int hl_sysfs_init(struct hl_device *hdev);
|
||||
void hl_sysfs_fini(struct hl_device *hdev);
|
||||
|
||||
|
@ -134,6 +134,10 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
hpriv->notifier_event.events_mask = 0;
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
|
||||
mutex_init(&hpriv->notifier_event.lock);
|
||||
mutex_init(&hpriv->restore_phase_mutex);
|
||||
kref_init(&hpriv->refcount);
|
||||
nonseekable_open(inode, filp);
|
||||
@ -208,6 +212,7 @@ out_err:
|
||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||
filp->private_data = NULL;
|
||||
mutex_destroy(&hpriv->restore_phase_mutex);
|
||||
mutex_destroy(&hpriv->notifier_event.lock);
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
kfree(hpriv);
|
||||
@ -241,6 +246,10 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
hpriv->notifier_event.events_mask = 0;
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
|
||||
mutex_init(&hpriv->notifier_event.lock);
|
||||
nonseekable_open(inode, filp);
|
||||
|
||||
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||
|
@ -116,6 +116,25 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate,
|
||||
return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int events_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
int rc;
|
||||
u32 max_size = args->return_size;
|
||||
u64 events_mask;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
|
||||
if ((max_size < sizeof(u64)) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&hpriv->notifier_event.lock);
|
||||
events_mask = hpriv->notifier_event.events_mask;
|
||||
hpriv->notifier_event.events_mask = 0;
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
|
||||
rc = copy_to_user(out, &events_mask, sizeof(u64));
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
@ -614,6 +633,43 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
|
||||
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* check if there is already a registered on that process */
|
||||
mutex_lock(&hpriv->notifier_event.lock);
|
||||
if (hpriv->notifier_event.eventfd) {
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hpriv->notifier_event.eventfd = eventfd_ctx_fdget(args->eventfd);
|
||||
if (IS_ERR(hpriv->notifier_event.eventfd)) {
|
||||
rc = PTR_ERR(hpriv->notifier_event.eventfd);
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
mutex_lock(&hpriv->notifier_event.lock);
|
||||
if (!hpriv->notifier_event.eventfd) {
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
eventfd_ctx_put(hpriv->notifier_event.eventfd);
|
||||
hpriv->notifier_event.eventfd = 0;
|
||||
mutex_unlock(&hpriv->notifier_event.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
struct device *dev)
|
||||
{
|
||||
@ -667,6 +723,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES:
|
||||
return dev_mem_alloc_page_sizes_info(hpriv, args);
|
||||
|
||||
case HL_INFO_GET_EVENTS:
|
||||
return events_info(hpriv, args);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -717,6 +776,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
|
||||
case HL_INFO_DRAM_PENDING_ROWS:
|
||||
return dram_pending_rows_info(hpriv, args);
|
||||
|
||||
case HL_INFO_REGISTER_EVENTFD:
|
||||
return eventfd_register(hpriv, args);
|
||||
|
||||
case HL_INFO_UNREGISTER_EVENTFD:
|
||||
return eventfd_unregister(hpriv, args);
|
||||
|
||||
default:
|
||||
dev_err(dev, "Invalid request %d\n", args->op);
|
||||
rc = -EINVAL;
|
||||
|
@ -7879,7 +7879,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
case GAUDI_EVENT_MMU_PAGE_FAULT:
|
||||
case GAUDI_EVENT_MMU_WR_PERM:
|
||||
case GAUDI_EVENT_RAZWI_OR_ADC:
|
||||
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
|
||||
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
|
||||
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
|
||||
fallthrough;
|
||||
@ -7899,6 +7898,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_qman_err(hdev, event_type);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
|
||||
/* In TPC QM event, notify on TPC assertion. While there isn't
|
||||
* a specific event for assertion yet, the FW generates QM event.
|
||||
* The SW upper layer will inspect an internal mapped area to indicate
|
||||
* if the event is a tpc assertion or tpc QM.
|
||||
*/
|
||||
hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
goto reset_device;
|
||||
|
@ -349,6 +349,9 @@ enum hl_server_type {
|
||||
* Razwi initiator.
|
||||
* Razwi cause, was it a page fault or MMU access error.
|
||||
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
|
||||
* HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
|
||||
* HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
|
||||
* HL_INFO_GET_EVENTS - Retrieve the last occurred events
|
||||
*/
|
||||
#define HL_INFO_HW_IP_INFO 0
|
||||
#define HL_INFO_HW_EVENTS 1
|
||||
@ -374,6 +377,9 @@ enum hl_server_type {
|
||||
#define HL_INFO_CS_TIMEOUT_EVENT 24
|
||||
#define HL_INFO_RAZWI_EVENT 25
|
||||
#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26
|
||||
#define HL_INFO_REGISTER_EVENTFD 28
|
||||
#define HL_INFO_UNREGISTER_EVENTFD 29
|
||||
#define HL_INFO_GET_EVENTS 30
|
||||
|
||||
#define HL_INFO_VERSION_MAX_LEN 128
|
||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||
@ -679,6 +685,7 @@ enum gaudi_dcores {
|
||||
* @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
|
||||
* resolution. Currently not in use.
|
||||
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
|
||||
* @eventfd: event file descriptor for event notifications.
|
||||
* @pad: Padding to 64 bit.
|
||||
*/
|
||||
struct hl_info_args {
|
||||
@ -691,6 +698,7 @@ struct hl_info_args {
|
||||
__u32 ctx_id;
|
||||
__u32 period_ms;
|
||||
__u32 pll_index;
|
||||
__u32 eventfd;
|
||||
};
|
||||
|
||||
__u32 pad;
|
||||
@ -1390,6 +1398,13 @@ struct hl_debug_args {
|
||||
__u32 ctx_id;
|
||||
};
|
||||
|
||||
/*
|
||||
* Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command
|
||||
*
|
||||
* HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event
|
||||
*/
|
||||
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1 << 0)
|
||||
|
||||
/*
|
||||
* Various information operations such as:
|
||||
* - H/W IP information
|
||||
|
Loading…
x
Reference in New Issue
Block a user