mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 02:36:21 +00:00
habanalabs: Timestamps buffers registration
Timestamp registration API allows the user to register a timestamp record event which will make the driver set timestamp when CQ counter reaches the target value and write it to a specific location specified by the user. This is a non blocking API, unlike the wait_for_interrupt which is a blocking one. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
b32cd10480
commit
9158bf69e7
@ -14,6 +14,8 @@
|
||||
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
|
||||
HL_CS_FLAGS_COLLECTIVE_WAIT)
|
||||
|
||||
#define MAX_TS_ITER_NUM 10
|
||||
|
||||
/**
|
||||
* enum hl_cs_wait_status - cs wait status
|
||||
* @CS_WAIT_STATUS_BUSY: cs was not completed yet
|
||||
@ -924,7 +926,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
|
||||
int i;
|
||||
struct hl_cs *cs, *tmp;
|
||||
|
||||
flush_workqueue(hdev->sob_reset_wq);
|
||||
flush_workqueue(hdev->ts_free_obj_wq);
|
||||
|
||||
/* flush all completions before iterating over the CS mirror list in
|
||||
* order to avoid a race with the release functions
|
||||
@ -948,13 +950,19 @@ void hl_cs_rollback_all(struct hl_device *hdev)
|
||||
static void
|
||||
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_user_pending_interrupt *pend, *temp;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
|
||||
if (pend->ts_reg_info.ts_buff) {
|
||||
list_del(&pend->wait_list_node);
|
||||
hl_ts_put(pend->ts_reg_info.ts_buff);
|
||||
hl_cb_put(pend->ts_reg_info.cq_cb);
|
||||
} else {
|
||||
pend->fence.error = -EIO;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
}
|
||||
@ -2857,57 +2865,153 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
|
||||
struct hl_cb *cq_cb,
|
||||
u64 ts_offset, u64 cq_offset, u64 target_value,
|
||||
spinlock_t *wait_list_lock,
|
||||
struct hl_user_pending_interrupt **pend)
|
||||
{
|
||||
struct hl_user_pending_interrupt *requested_offset_record =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
ts_offset;
|
||||
struct hl_user_pending_interrupt *cb_last =
|
||||
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
|
||||
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
|
||||
unsigned long flags, iter_counter = 0;
|
||||
u64 current_cq_counter;
|
||||
|
||||
/* Validate ts_offset not exceeding last max */
|
||||
if (requested_offset_record > cb_last) {
|
||||
dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
|
||||
(u64)(uintptr_t)cb_last);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
start_over:
|
||||
spin_lock_irqsave(wait_list_lock, flags);
|
||||
|
||||
/* Unregister only if we didn't reach the target value
|
||||
* since in this case there will be no handling in irq context
|
||||
* and then it's safe to delete the node out of the interrupt list
|
||||
* then re-use it on other interrupt
|
||||
*/
|
||||
if (requested_offset_record->ts_reg_info.in_use) {
|
||||
current_cq_counter = *requested_offset_record->cq_kernel_addr;
|
||||
if (current_cq_counter < requested_offset_record->cq_target_value) {
|
||||
list_del(&requested_offset_record->wait_list_node);
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
|
||||
hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
|
||||
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
|
||||
|
||||
dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
|
||||
} else {
|
||||
dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
|
||||
|
||||
/* irq handling in the middle give it time to finish */
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
usleep_range(1, 10);
|
||||
if (++iter_counter == MAX_TS_ITER_NUM) {
|
||||
dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
goto start_over;
|
||||
}
|
||||
} else {
|
||||
spin_unlock_irqrestore(wait_list_lock, flags);
|
||||
}
|
||||
|
||||
/* Fill up the new registration node info */
|
||||
requested_offset_record->ts_reg_info.in_use = 1;
|
||||
requested_offset_record->ts_reg_info.ts_buff = ts_buff;
|
||||
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
|
||||
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
|
||||
(u64 *) ts_buff->user_buff_address + ts_offset;
|
||||
requested_offset_record->cq_kernel_addr =
|
||||
(u64 *) cq_cb->kernel_address + cq_offset;
|
||||
requested_offset_record->cq_target_value = target_value;
|
||||
|
||||
*pend = requested_offset_record;
|
||||
|
||||
dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
|
||||
(u64)(uintptr_t)requested_offset_record);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
struct hl_cb_mgr *cb_mgr, u64 timeout_us,
|
||||
u64 cq_counters_handle, u64 cq_counters_offset,
|
||||
struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
|
||||
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
|
||||
u64 target_value, struct hl_user_interrupt *interrupt,
|
||||
bool register_ts_record, u64 ts_handle, u64 ts_offset,
|
||||
u32 *status, u64 *timestamp)
|
||||
{
|
||||
u32 cq_patched_handle, ts_patched_handle;
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
struct hl_cb *cq_cb;
|
||||
unsigned long timeout, flags;
|
||||
long completion_rc;
|
||||
struct hl_cb *cb;
|
||||
int rc = 0;
|
||||
u32 handle;
|
||||
|
||||
timeout = hl_usecs64_to_jiffies(timeout_us);
|
||||
|
||||
hl_ctx_get(hdev, ctx);
|
||||
|
||||
cq_counters_handle >>= PAGE_SHIFT;
|
||||
handle = (u32) cq_counters_handle;
|
||||
|
||||
cb = hl_cb_get(hdev, cb_mgr, handle);
|
||||
if (!cb) {
|
||||
hl_ctx_put(ctx);
|
||||
return -EINVAL;
|
||||
cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
|
||||
cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
|
||||
if (!cq_cb) {
|
||||
rc = -EINVAL;
|
||||
goto put_ctx;
|
||||
}
|
||||
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
hl_cb_put(cb);
|
||||
hl_ctx_put(ctx);
|
||||
return -ENOMEM;
|
||||
if (register_ts_record) {
|
||||
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
|
||||
interrupt->interrupt_id, ts_offset, cq_counters_offset);
|
||||
|
||||
ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
|
||||
ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
|
||||
if (!ts_buff) {
|
||||
rc = -EINVAL;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
|
||||
/* Find first available record */
|
||||
rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
|
||||
cq_counters_offset, target_value,
|
||||
&interrupt->wait_list_lock, &pend);
|
||||
if (rc)
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
|
||||
if (!pend) {
|
||||
rc = -ENOMEM;
|
||||
goto put_cq_cb;
|
||||
}
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
|
||||
pend->cq_target_value = target_value;
|
||||
}
|
||||
|
||||
hl_fence_init(&pend->fence, ULONG_MAX);
|
||||
|
||||
pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
|
||||
pend->cq_target_value = target_value;
|
||||
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
*/
|
||||
if (*pend->cq_kernel_addr >= target_value) {
|
||||
if (register_ts_record)
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
*status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
/* There was no interrupt, we assume the completion is now. */
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
|
||||
if (register_ts_record) {
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
|
||||
goto put_ts_buff;
|
||||
} else {
|
||||
pend->fence.timestamp = ktime_get();
|
||||
goto set_timestamp;
|
||||
}
|
||||
} else if (!timeout_us) {
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
@ -2916,11 +3020,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
}
|
||||
|
||||
/* Add pending user interrupt to relevant list for the interrupt
|
||||
* handler to monitor
|
||||
* handler to monitor.
|
||||
* Note that we cannot have sorted list by target value,
|
||||
* in order to shorten the list pass loop, since
|
||||
* same list could have nodes for different cq counter handle.
|
||||
*/
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
if (register_ts_record) {
|
||||
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
goto ts_registration_exit;
|
||||
}
|
||||
|
||||
/* Wait for interrupt handler to signal completion */
|
||||
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
|
||||
timeout);
|
||||
@ -2952,15 +3064,30 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We keep removing the node from list here, and not at the irq handler
|
||||
* for completion timeout case. and if it's a registration
|
||||
* for ts record, the node will be deleted in the irq handler after
|
||||
* we reach the target value.
|
||||
*/
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_del(&pend->wait_list_node);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
set_timestamp:
|
||||
*timestamp = ktime_to_ns(pend->fence.timestamp);
|
||||
|
||||
kfree(pend);
|
||||
hl_cb_put(cb);
|
||||
hl_cb_put(cq_cb);
|
||||
ts_registration_exit:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
|
||||
put_ts_buff:
|
||||
hl_ts_put(ts_buff);
|
||||
put_cq_cb:
|
||||
hl_cb_put(cq_cb);
|
||||
put_ctx:
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
return rc;
|
||||
@ -3119,11 +3246,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
|
||||
|
||||
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
|
||||
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
|
||||
args->in.cq_counters_offset,
|
||||
args->in.target, interrupt, &status,
|
||||
×tamp);
|
||||
args->in.target, interrupt,
|
||||
!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
|
||||
args->in.timestamp_handle, args->in.timestamp_offset,
|
||||
&status, ×tamp);
|
||||
else
|
||||
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
|
||||
args->in.interrupt_timeout_us, args->in.addr,
|
||||
|
@ -145,6 +145,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
||||
hl_release_pending_user_interrupts(hpriv->hdev);
|
||||
|
||||
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
|
||||
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
|
||||
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
||||
|
||||
if (!hl_hpriv_put(hpriv))
|
||||
@ -209,6 +210,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
case HL_MMAP_TYPE_BLOCK:
|
||||
return hl_hw_block_mmap(hpriv, vma);
|
||||
|
||||
case HL_MMAP_TYPE_TS_BUFF:
|
||||
return hl_ts_mmap(hpriv, vma);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
@ -410,10 +414,10 @@ static int device_early_init(struct hl_device *hdev)
|
||||
goto free_cq_wq;
|
||||
}
|
||||
|
||||
hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
|
||||
if (!hdev->sob_reset_wq) {
|
||||
hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
|
||||
if (!hdev->ts_free_obj_wq) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate SOB reset workqueue\n");
|
||||
"Failed to allocate Timestamp registration free workqueue\n");
|
||||
rc = -ENOMEM;
|
||||
goto free_eq_wq;
|
||||
}
|
||||
@ -422,7 +426,7 @@ static int device_early_init(struct hl_device *hdev)
|
||||
GFP_KERNEL);
|
||||
if (!hdev->hl_chip_info) {
|
||||
rc = -ENOMEM;
|
||||
goto free_sob_reset_wq;
|
||||
goto free_ts_free_wq;
|
||||
}
|
||||
|
||||
rc = hl_mmu_if_set_funcs(hdev);
|
||||
@ -461,8 +465,8 @@ free_cb_mgr:
|
||||
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
|
||||
free_chip_info:
|
||||
kfree(hdev->hl_chip_info);
|
||||
free_sob_reset_wq:
|
||||
destroy_workqueue(hdev->sob_reset_wq);
|
||||
free_ts_free_wq:
|
||||
destroy_workqueue(hdev->ts_free_obj_wq);
|
||||
free_eq_wq:
|
||||
destroy_workqueue(hdev->eq_wq);
|
||||
free_cq_wq:
|
||||
@ -501,7 +505,7 @@ static void device_early_fini(struct hl_device *hdev)
|
||||
|
||||
kfree(hdev->hl_chip_info);
|
||||
|
||||
destroy_workqueue(hdev->sob_reset_wq);
|
||||
destroy_workqueue(hdev->ts_free_obj_wq);
|
||||
destroy_workqueue(hdev->eq_wq);
|
||||
destroy_workqueue(hdev->device_reset_work.wq);
|
||||
|
||||
|
@ -31,14 +31,15 @@
|
||||
#define HL_NAME "habanalabs"
|
||||
|
||||
/* Use upper bits of mmap offset to store habana driver specific information.
|
||||
* bits[63:61] - Encode mmap type
|
||||
* bits[63:59] - Encode mmap type
|
||||
* bits[45:0] - mmap offset value
|
||||
*
|
||||
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
|
||||
* defines are w.r.t to PAGE_SIZE
|
||||
*/
|
||||
#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
|
||||
#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT)
|
||||
#define HL_MMAP_TYPE_MASK (0x1full << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_TS_BUFF (0x10ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
|
||||
#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
|
||||
|
||||
@ -709,6 +710,40 @@ struct hl_cb_mgr {
|
||||
struct idr cb_handles; /* protected by cb_lock */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ts_mgr - describes the timestamp registration memory manager.
|
||||
* @ts_lock: protects ts_handles.
|
||||
* @ts_handles: an idr to hold all ts bufferes handles.
|
||||
*/
|
||||
struct hl_ts_mgr {
|
||||
spinlock_t ts_lock;
|
||||
struct idr ts_handles;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ts_buff - describes a timestamp buffer.
|
||||
* @refcount: reference counter for usage of the buffer.
|
||||
* @hdev: pointer to device this buffer belongs to.
|
||||
* @mmap: true if the buff is currently mapped to user.
|
||||
* @kernel_buff_address: Holds the internal buffer's kernel virtual address.
|
||||
* @user_buff_address: Holds the user buffer's kernel virtual address.
|
||||
* @id: the buffer ID.
|
||||
* @mmap_size: Holds the buffer size that was mmaped.
|
||||
* @kernel_buff_size: Holds the internal kernel buffer size.
|
||||
* @user_buff_size: Holds the user buffer size.
|
||||
*/
|
||||
struct hl_ts_buff {
|
||||
struct kref refcount;
|
||||
struct hl_device *hdev;
|
||||
atomic_t mmap;
|
||||
void *kernel_buff_address;
|
||||
void *user_buff_address;
|
||||
u32 id;
|
||||
u32 mmap_size;
|
||||
u32 kernel_buff_size;
|
||||
u32 user_buff_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_cb - describes a Command Buffer.
|
||||
* @refcount: reference counter for usage of the CB.
|
||||
@ -886,9 +921,54 @@ struct hl_user_interrupt {
|
||||
u32 interrupt_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct timestamp_reg_free_node - holds the timestamp registration free objects node
|
||||
* @free_objects_node: node in the list free_obj_jobs
|
||||
* @cq_cb: pointer to cq command buffer to be freed
|
||||
* @ts_buff: pointer to timestamp buffer to be freed
|
||||
*/
|
||||
struct timestamp_reg_free_node {
|
||||
struct list_head free_objects_node;
|
||||
struct hl_cb *cq_cb;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
|
||||
* the job will be to pass over the free_obj_jobs list and put refcount to objects
|
||||
* in each node of the list
|
||||
* @free_obj: workqueue object to free timestamp registration node objects
|
||||
* @hdev: pointer to the device structure
|
||||
* @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
|
||||
*/
|
||||
struct timestamp_reg_work_obj {
|
||||
struct work_struct free_obj;
|
||||
struct hl_device *hdev;
|
||||
struct list_head *free_obj_head;
|
||||
};
|
||||
|
||||
/* struct timestamp_reg_info - holds the timestamp registration related data.
|
||||
* @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
|
||||
* relevant only when doing timestamps records registration.
|
||||
* @cq_cb: pointer to CQ counter CB.
|
||||
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
|
||||
* relevant only when doing timestamps records
|
||||
* registration.
|
||||
* @in_use: indicates if the node already in use. relevant only when doing
|
||||
* timestamps records registration, since in this case the driver
|
||||
* will have it's own buffer which serve as a records pool instead of
|
||||
* allocating records dynamically.
|
||||
*/
|
||||
struct timestamp_reg_info {
|
||||
struct hl_ts_buff *ts_buff;
|
||||
struct hl_cb *cq_cb;
|
||||
u64 *timestamp_kernel_addr;
|
||||
u8 in_use;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_user_pending_interrupt - holds a context to a user thread
|
||||
* pending on an interrupt
|
||||
* @ts_reg_info: holds the timestamps registration nodes info
|
||||
* @wait_list_node: node in the list of user threads pending on an interrupt
|
||||
* @fence: hl fence object for interrupt completion
|
||||
* @cq_target_value: CQ target value
|
||||
@ -896,10 +976,11 @@ struct hl_user_interrupt {
|
||||
* handler for taget value comparison
|
||||
*/
|
||||
struct hl_user_pending_interrupt {
|
||||
struct list_head wait_list_node;
|
||||
struct hl_fence fence;
|
||||
u64 cq_target_value;
|
||||
u64 *cq_kernel_addr;
|
||||
struct timestamp_reg_info ts_reg_info;
|
||||
struct list_head wait_list_node;
|
||||
struct hl_fence fence;
|
||||
u64 cq_target_value;
|
||||
u64 *cq_kernel_addr;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1833,6 +1914,7 @@ struct hl_debug_params {
|
||||
* @ctx: current executing context. TODO: remove for multiple ctx per process
|
||||
* @ctx_mgr: context manager to handle multiple context for this FD.
|
||||
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
|
||||
* @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
|
||||
* @debugfs_list: list of relevant ASIC debugfs.
|
||||
* @dev_node: node in the device list of file private data
|
||||
* @refcount: number of related contexts.
|
||||
@ -1845,6 +1927,7 @@ struct hl_fpriv {
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_ctx_mgr ctx_mgr;
|
||||
struct hl_cb_mgr cb_mgr;
|
||||
struct hl_ts_mgr ts_mem_mgr;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head dev_node;
|
||||
struct kref refcount;
|
||||
@ -2517,7 +2600,7 @@ struct hl_reset_info {
|
||||
* @cq_wq: work queues of completion queues for executing work in process
|
||||
* context.
|
||||
* @eq_wq: work queue of event queue for executing work in process context.
|
||||
* @sob_reset_wq: work queue for sob reset executions.
|
||||
* @ts_free_obj_wq: work queue for timestamp registration objects release.
|
||||
* @kernel_ctx: Kernel driver context structure.
|
||||
* @kernel_queues: array of hl_hw_queue.
|
||||
* @cs_mirror_list: CS mirror list for TDR.
|
||||
@ -2645,7 +2728,7 @@ struct hl_device {
|
||||
struct hl_user_interrupt common_user_interrupt;
|
||||
struct workqueue_struct **cq_wq;
|
||||
struct workqueue_struct *eq_wq;
|
||||
struct workqueue_struct *sob_reset_wq;
|
||||
struct workqueue_struct *ts_free_obj_wq;
|
||||
struct hl_ctx *kernel_ctx;
|
||||
struct hl_hw_queue *kernel_queues;
|
||||
struct list_head cs_mirror_list;
|
||||
@ -3128,6 +3211,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||
const char *format, ...);
|
||||
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
|
||||
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
|
||||
void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
|
||||
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
|
||||
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
|
||||
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
|
||||
void hl_ts_put(struct hl_ts_buff *buff);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
|
||||
hl_cb_mgr_init(&hpriv->cb_mgr);
|
||||
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
||||
hl_ts_mgr_init(&hpriv->ts_mem_mgr);
|
||||
|
||||
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||
|
||||
@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
out_err:
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
|
||||
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
|
||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||
filp->private_data = NULL;
|
||||
mutex_destroy(&hpriv->restore_phase_mutex);
|
||||
|
@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_ts_free_objects - handler of the free objects workqueue.
|
||||
* This function should put refcount to objects that the registration node
|
||||
* took refcount to them.
|
||||
* @work: workqueue object pointer
|
||||
*/
|
||||
static void hl_ts_free_objects(struct work_struct *work)
|
||||
{
|
||||
struct timestamp_reg_work_obj *job =
|
||||
container_of(work, struct timestamp_reg_work_obj, free_obj);
|
||||
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
|
||||
struct list_head *free_list_head = job->free_obj_head;
|
||||
struct hl_device *hdev = job->hdev;
|
||||
|
||||
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
|
||||
dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
|
||||
free_obj->ts_buff,
|
||||
free_obj->cq_cb);
|
||||
|
||||
hl_ts_put(free_obj->ts_buff);
|
||||
hl_cb_put(free_obj->cq_cb);
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kfree(free_list_head);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function called with spin_lock of wait_list_lock taken
|
||||
* This function will set timestamp and delete the registration node from the
|
||||
* wait_list_lock.
|
||||
* and since we're protected with spin_lock here, so we cannot just put the refcount
|
||||
* for the objects here, since the release function may be called and it's also a long
|
||||
* logic (which might sleep also) that cannot be handled in irq context.
|
||||
* so here we'll be filling a list with nodes of "put" jobs and then will send this
|
||||
* list to a dedicated workqueue to do the actual put.
|
||||
*/
|
||||
int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
|
||||
struct list_head **free_list)
|
||||
{
|
||||
struct timestamp_reg_free_node *free_node;
|
||||
u64 timestamp;
|
||||
|
||||
if (!(*free_list)) {
|
||||
/* Alloc/Init the timestamp registration free objects list */
|
||||
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
|
||||
if (!(*free_list))
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(*free_list);
|
||||
}
|
||||
|
||||
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
|
||||
if (!free_node)
|
||||
return -ENOMEM;
|
||||
|
||||
timestamp = ktime_get_ns();
|
||||
|
||||
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
|
||||
|
||||
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
|
||||
pend->ts_reg_info.timestamp_kernel_addr,
|
||||
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
|
||||
|
||||
list_del(&pend->wait_list_node);
|
||||
|
||||
/* Mark kernel CB node as free */
|
||||
pend->ts_reg_info.in_use = 0;
|
||||
|
||||
/* Putting the refcount for ts_buff and cq_cb objects will be handled
|
||||
* in workqueue context, just add job to free_list.
|
||||
*/
|
||||
free_node->ts_buff = pend->ts_reg_info.ts_buff;
|
||||
free_node->cq_cb = pend->ts_reg_info.cq_cb;
|
||||
list_add(&free_node->free_objects_node, *free_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void handle_user_cq(struct hl_device *hdev,
|
||||
struct hl_user_interrupt *user_cq)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_user_pending_interrupt *pend, *temp_pend;
|
||||
struct list_head *ts_reg_free_list_head = NULL;
|
||||
struct timestamp_reg_work_obj *job;
|
||||
bool reg_node_handle_fail = false;
|
||||
ktime_t now = ktime_get();
|
||||
int rc;
|
||||
|
||||
/* For registration nodes:
|
||||
* As part of handling the registration nodes, we should put refcount to
|
||||
* some objects. the problem is that we cannot do that under spinlock
|
||||
* or in irq handler context at all (since release functions are long and
|
||||
* might sleep), so we will need to handle that part in workqueue context.
|
||||
* To avoid handling kmalloc failure which compels us rolling back actions
|
||||
* and move nodes hanged on the free list back to the interrupt wait list
|
||||
* we always alloc the job of the WQ at the beginning.
|
||||
*/
|
||||
job = kmalloc(sizeof(*job), GFP_ATOMIC);
|
||||
if (!job)
|
||||
return;
|
||||
|
||||
spin_lock(&user_cq->wait_list_lock);
|
||||
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
|
||||
if ((pend->cq_kernel_addr &&
|
||||
*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
|
||||
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
|
||||
!pend->cq_kernel_addr) {
|
||||
pend->fence.timestamp = now;
|
||||
complete_all(&pend->fence.completion);
|
||||
if (pend->ts_reg_info.ts_buff) {
|
||||
if (!reg_node_handle_fail) {
|
||||
rc = handle_registration_node(hdev, pend,
|
||||
&ts_reg_free_list_head);
|
||||
if (rc)
|
||||
reg_node_handle_fail = true;
|
||||
}
|
||||
} else {
|
||||
/* Handle wait target value node */
|
||||
pend->fence.timestamp = now;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&user_cq->wait_list_lock);
|
||||
|
||||
if (ts_reg_free_list_head) {
|
||||
INIT_WORK(&job->free_obj, hl_ts_free_objects);
|
||||
job->free_obj_head = ts_reg_free_list_head;
|
||||
job->hdev = hdev;
|
||||
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
|
||||
} else {
|
||||
kfree(job);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -20,6 +20,9 @@ MODULE_IMPORT_NS(DMA_BUF);
|
||||
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
|
||||
#define DRAM_POOL_PAGE_SIZE SZ_8M
|
||||
|
||||
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
|
||||
struct hl_mem_in *args, u64 *handle);
|
||||
|
||||
/*
|
||||
* The va ranges in context object contain a list with the available chunks of
|
||||
* device virtual memory.
|
||||
@ -2021,6 +2024,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
||||
rc = -EPERM;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_TS_ALLOC:
|
||||
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -EINVAL;
|
||||
@ -2031,6 +2037,258 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void ts_buff_release(struct kref *ref)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
|
||||
buff = container_of(ref, struct hl_ts_buff, refcount);
|
||||
|
||||
vfree(buff->kernel_buff_address);
|
||||
vfree(buff->user_buff_address);
|
||||
kfree(buff);
|
||||
}
|
||||
|
||||
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
|
||||
u32 handle)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
|
||||
spin_lock(&mgr->ts_lock);
|
||||
buff = idr_find(&mgr->ts_handles, handle);
|
||||
if (!buff) {
|
||||
spin_unlock(&mgr->ts_lock);
|
||||
dev_warn(hdev->dev,
|
||||
"TS buff get failed, no match to handle 0x%x\n", handle);
|
||||
return NULL;
|
||||
}
|
||||
kref_get(&buff->refcount);
|
||||
spin_unlock(&mgr->ts_lock);
|
||||
|
||||
return buff;
|
||||
}
|
||||
|
||||
void hl_ts_put(struct hl_ts_buff *buff)
|
||||
{
|
||||
kref_put(&buff->refcount, ts_buff_release);
|
||||
}
|
||||
|
||||
static void buff_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
|
||||
long new_mmap_size;
|
||||
|
||||
new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
|
||||
|
||||
if (new_mmap_size > 0) {
|
||||
buff->mmap_size = new_mmap_size;
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_set(&buff->mmap, 0);
|
||||
hl_ts_put(buff);
|
||||
vma->vm_private_data = NULL;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct ts_buff_vm_ops = {
|
||||
.close = buff_vm_close
|
||||
};
|
||||
|
||||
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ts_buff *buff;
|
||||
u32 handle, user_buff_size;
|
||||
int rc;
|
||||
|
||||
/* We use the page offset to hold the idr and thus we need to clear
|
||||
* it before doing the mmap itself
|
||||
*/
|
||||
handle = vma->vm_pgoff;
|
||||
vma->vm_pgoff = 0;
|
||||
|
||||
buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
|
||||
if (!buff) {
|
||||
dev_err(hdev->dev,
|
||||
"TS buff mmap failed, no match to handle 0x%x\n", handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Validation check */
|
||||
user_buff_size = vma->vm_end - vma->vm_start;
|
||||
if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
|
||||
dev_err(hdev->dev,
|
||||
"TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
|
||||
user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
|
||||
if (!access_ok(VERIFY_WRITE,
|
||||
(void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
|
||||
#else
|
||||
if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
|
||||
user_buff_size)) {
|
||||
#endif
|
||||
dev_err(hdev->dev,
|
||||
"user pointer is invalid - 0x%lx\n",
|
||||
vma->vm_start);
|
||||
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
|
||||
dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
|
||||
rc = -EINVAL;
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
vma->vm_ops = &ts_buff_vm_ops;
|
||||
vma->vm_private_data = buff;
|
||||
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
|
||||
rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
|
||||
if (rc) {
|
||||
atomic_set(&buff->mmap, 0);
|
||||
goto put_buff;
|
||||
}
|
||||
|
||||
buff->mmap_size = buff->user_buff_size;
|
||||
vma->vm_pgoff = handle;
|
||||
|
||||
return 0;
|
||||
|
||||
put_buff:
|
||||
hl_ts_put(buff);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
|
||||
{
|
||||
spin_lock_init(&mgr->ts_lock);
|
||||
idr_init(&mgr->ts_handles);
|
||||
}
|
||||
|
||||
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
|
||||
{
|
||||
struct hl_ts_buff *buff;
|
||||
struct idr *idp;
|
||||
u32 id;
|
||||
|
||||
idp = &mgr->ts_handles;
|
||||
|
||||
idr_for_each_entry(idp, buff, id) {
|
||||
if (kref_put(&buff->refcount, ts_buff_release) != 1)
|
||||
dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
|
||||
id);
|
||||
}
|
||||
|
||||
idr_destroy(&mgr->ts_handles);
|
||||
}
|
||||
|
||||
static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
|
||||
{
|
||||
struct hl_ts_buff *ts_buff = NULL;
|
||||
u32 size;
|
||||
void *p;
|
||||
|
||||
ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
|
||||
if (!ts_buff)
|
||||
return NULL;
|
||||
|
||||
/* Allocate the user buffer */
|
||||
size = num_elements * sizeof(u64);
|
||||
p = vmalloc_user(size);
|
||||
if (!p)
|
||||
goto free_mem;
|
||||
|
||||
ts_buff->user_buff_address = p;
|
||||
ts_buff->user_buff_size = size;
|
||||
|
||||
/* Allocate the internal kernel buffer */
|
||||
size = num_elements * sizeof(struct hl_user_pending_interrupt);
|
||||
p = vmalloc(size);
|
||||
if (!p)
|
||||
goto free_user_buff;
|
||||
|
||||
ts_buff->kernel_buff_address = p;
|
||||
ts_buff->kernel_buff_size = size;
|
||||
|
||||
return ts_buff;
|
||||
|
||||
free_user_buff:
|
||||
vfree(ts_buff->user_buff_address);
|
||||
free_mem:
|
||||
kfree(ts_buff);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate_timestamps_buffers() - allocate timestamps buffers
|
||||
* This function will allocate ts buffer that will later on be mapped to the user
|
||||
* in order to be able to read the timestamp.
|
||||
* in additon it'll allocate an extra buffer for registration management.
|
||||
* since we cannot fail during registration for out-of-memory situation, so
|
||||
* we'll prepare a pool which will be used as user interrupt nodes and instead
|
||||
* of dynamically allocating nodes while registration we'll pick the node from
|
||||
* this pool. in addtion it'll add node to the mapping hash which will be used
|
||||
* to map user ts buffer to the internal kernel ts buffer.
|
||||
* @hpriv: pointer to the private data of the fd
|
||||
* @args: ioctl input
|
||||
* @handle: user timestamp buffer handle as an output
|
||||
*/
|
||||
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
|
||||
{
|
||||
struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ts_buff *ts_buff;
|
||||
int rc = 0;
|
||||
|
||||
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
|
||||
dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
|
||||
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Allocate ts buffer object
|
||||
* This object will contain two buffers one that will be mapped to the user
|
||||
* and another internal buffer for the driver use only, which won't be mapped
|
||||
* to the user.
|
||||
*/
|
||||
ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
|
||||
if (!ts_buff) {
|
||||
rc = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
spin_lock(&ts_mgr->ts_lock);
|
||||
rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
|
||||
spin_unlock(&ts_mgr->ts_lock);
|
||||
if (rc < 0) {
|
||||
dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
|
||||
goto release_ts_buff;
|
||||
}
|
||||
|
||||
ts_buff->id = rc;
|
||||
ts_buff->hdev = hdev;
|
||||
|
||||
kref_init(&ts_buff->refcount);
|
||||
|
||||
/* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
|
||||
*handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
|
||||
*handle <<= PAGE_SHIFT;
|
||||
|
||||
dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
|
||||
|
||||
return 0;
|
||||
|
||||
release_ts_buff:
|
||||
kref_put(&ts_buff->refcount, ts_buff_release);
|
||||
out_err:
|
||||
*handle = 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
{
|
||||
enum hl_device_status status;
|
||||
@ -2146,6 +2404,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
args->out.fd = dmabuf_fd;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_TS_ALLOC:
|
||||
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -EINVAL;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
*
|
||||
* Copyright 2016-2020 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@ -30,6 +30,9 @@
|
||||
*/
|
||||
#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72
|
||||
|
||||
/* Max number of elements in timestamps registration buffers */
|
||||
#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */
|
||||
|
||||
/*
|
||||
* Goya queue Numbering
|
||||
*
|
||||
@ -695,10 +698,12 @@ struct hl_cb_in {
|
||||
__u64 cb_handle;
|
||||
/* HL_CB_OP_* */
|
||||
__u32 op;
|
||||
|
||||
/* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
|
||||
* will be allocated, regardless of this parameter's value, is PAGE_SIZE
|
||||
*/
|
||||
__u32 cb_size;
|
||||
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
/* HL_CB_FLAGS_* */
|
||||
@ -964,6 +969,7 @@ union hl_cs_args {
|
||||
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
|
||||
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
|
||||
#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10
|
||||
#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20
|
||||
|
||||
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
|
||||
|
||||
@ -1036,6 +1042,20 @@ struct hl_wait_cs_in {
|
||||
* relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
|
||||
*/
|
||||
__u64 cq_counters_offset;
|
||||
|
||||
/*
|
||||
* Timestamp_handle timestamps buffer handle.
|
||||
* relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
|
||||
*/
|
||||
__u64 timestamp_handle;
|
||||
|
||||
/*
|
||||
* Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above.
|
||||
* upon interrupt, if the cq reached the target value then driver will write
|
||||
* timestamp to this offset.
|
||||
* relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
|
||||
*/
|
||||
__u64 timestamp_offset;
|
||||
};
|
||||
|
||||
#define HL_WAIT_CS_STATUS_COMPLETED 0
|
||||
@ -1082,6 +1102,14 @@ union hl_wait_cs_args {
|
||||
*/
|
||||
#define HL_MEM_OP_EXPORT_DMABUF_FD 5
|
||||
|
||||
/* Opcode to create timestamps pool for user interrupts registration support
|
||||
* The memory will be allocated by the kernel driver, A timestamp buffer which the user
|
||||
* will get handle to it for mmap, and another internal buffer used by the
|
||||
* driver for registration management
|
||||
* The memory will be freed when the user closes the file descriptor(ctx close)
|
||||
*/
|
||||
#define HL_MEM_OP_TS_ALLOC 6
|
||||
|
||||
/* Memory flags */
|
||||
#define HL_MEM_CONTIGUOUS 0x1
|
||||
#define HL_MEM_SHARED 0x2
|
||||
@ -1173,9 +1201,14 @@ struct hl_mem_in {
|
||||
* DMA-BUF file/FD flags.
|
||||
*/
|
||||
__u32 flags;
|
||||
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
__u32 pad;
|
||||
|
||||
/* number of timestamp elements
|
||||
* used only when HL_MEM_OP_TS_ALLOC opcode
|
||||
*/
|
||||
__u32 num_of_elements;
|
||||
};
|
||||
|
||||
struct hl_mem_out {
|
||||
|
Loading…
x
Reference in New Issue
Block a user