habanalabs: add status of reset after device release

The user might want to know the device is in reset after device
release, which is not an erroneous event as a regular reset.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Oded Gabbay
2022-07-07 11:42:15 +03:00
parent bd4a338886
commit e3b20f3ee4
3 changed files with 20 additions and 8 deletions
+11 -6
View File
@@ -271,16 +271,20 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
if (hdev->reset_info.in_reset)
status = HL_DEVICE_STATUS_IN_RESET;
else if (hdev->reset_info.needs_reset)
if (hdev->reset_info.in_reset) {
if (hdev->reset_info.is_in_soft_reset)
status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
else
status = HL_DEVICE_STATUS_IN_RESET;
} else if (hdev->reset_info.needs_reset) {
status = HL_DEVICE_STATUS_NEEDS_RESET;
else if (hdev->disabled)
} else if (hdev->disabled) {
status = HL_DEVICE_STATUS_MALFUNCTION;
else if (!hdev->init_done)
} else if (!hdev->init_done) {
status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
else
} else {
status = HL_DEVICE_STATUS_OPERATIONAL;
}
return status;
}
@@ -296,6 +300,7 @@ bool hl_device_operational(struct hl_device *hdev,
switch (current_status) {
case HL_DEVICE_STATUS_IN_RESET:
case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
case HL_DEVICE_STATUS_MALFUNCTION:
case HL_DEVICE_STATUS_NEEDS_RESET:
return false;
@@ -165,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
if (status == HL_DEVICE_STATUS_IN_RESET)
if (status == HL_DEVICE_STATUS_IN_RESET ||
status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
rc = -EAGAIN;
else
rc = -EPERM;
@@ -395,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
"in reset after device release", HL_STR_MAX);
/* First, we must find out which ASIC are we handling. This is needed
* to configure the behavior of the driver (kernel parameters)