Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

habanalabs: add "in device creation" status

On init, the disabled state is cleared right before hw_init and that
causes the device to report on "Operational" state before the device
initialization is finished. Although the char device is not yet exposed
to the user at this stage, the sysfs entries are exposed.

This can cause errors in monitoring applications that use the sysfs
entries.

In order to avoid this, a new state "in device creation" is introduced
to ne reported when the device is not disabled but is still in init
flow.

Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>

authored by

Omer Shpigelman and committed by
Oded Gabbay
71731090 e1b61f8e

+20 -17
+3
drivers/misc/habanalabs/common/device.c
··· 23 23 status = HL_DEVICE_STATUS_NEEDS_RESET; 24 24 else if (hdev->disabled) 25 25 status = HL_DEVICE_STATUS_MALFUNCTION; 26 + else if (!hdev->init_done) 27 + status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 26 28 else 27 29 status = HL_DEVICE_STATUS_OPERATIONAL; 28 30 ··· 46 44 case HL_DEVICE_STATUS_NEEDS_RESET: 47 45 return false; 48 46 case HL_DEVICE_STATUS_OPERATIONAL: 47 + case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 49 48 default: 50 49 return true; 51 50 }
+1 -1
drivers/misc/habanalabs/common/habanalabs.h
··· 1990 1990 1991 1991 #define HL_STR_MAX 32 1992 1992 1993 - #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1) 1993 + #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) 1994 1994 1995 1995 /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe 1996 1996 * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
+6 -2
drivers/misc/habanalabs/common/habanalabs_drv.c
··· 317 317 hdev->asic_prop.fw_security_enabled = false; 318 318 319 319 /* Assign status description string */ 320 - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], 321 - "disabled", HL_STR_MAX); 320 + strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], 321 + "operational", HL_STR_MAX); 322 322 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], 323 323 "in reset", HL_STR_MAX); 324 + strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], 325 + "disabled", HL_STR_MAX); 324 326 strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], 325 327 "needs reset", HL_STR_MAX); 328 + strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], 329 + "in device creation", HL_STR_MAX); 326 330 327 331 hdev->major = hl_major; 328 332 hdev->reset_on_lockup = reset_on_lockup;
+7 -13
drivers/misc/habanalabs/common/sysfs.c
··· 9 9 10 10 #include <linux/pci.h> 11 11 12 - long hl_get_frequency(struct hl_device *hdev, u32 pll_index, 13 - bool curr) 12 + long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) 14 13 { 15 14 struct cpucp_packet pkt; 16 15 u32 used_pll_idx; ··· 43 44 return (long) result; 44 45 } 45 46 46 - void hl_set_frequency(struct hl_device *hdev, u32 pll_index, 47 - u64 freq) 47 + void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) 48 48 { 49 49 struct cpucp_packet pkt; 50 50 u32 used_pll_idx; ··· 283 285 char *buf) 284 286 { 285 287 struct hl_device *hdev = dev_get_drvdata(dev); 286 - char *str; 288 + char str[HL_STR_MAX]; 287 289 288 - if (atomic_read(&hdev->in_reset)) 289 - str = "In reset"; 290 - else if (hdev->disabled) 291 - str = "Malfunction"; 292 - else if (hdev->needs_reset) 293 - str = "Needs Reset"; 294 - else 295 - str = "Operational"; 290 + strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX); 291 + 292 + /* use uppercase for backward compatibility */ 293 + str[0] = 'A' + (str[0] - 'a'); 296 294 297 295 return sprintf(buf, "%s\n", str); 298 296 }
+3 -1
include/uapi/misc/habanalabs.h
··· 276 276 HL_DEVICE_STATUS_OPERATIONAL, 277 277 HL_DEVICE_STATUS_IN_RESET, 278 278 HL_DEVICE_STATUS_MALFUNCTION, 279 - HL_DEVICE_STATUS_NEEDS_RESET 279 + HL_DEVICE_STATUS_NEEDS_RESET, 280 + HL_DEVICE_STATUS_IN_DEVICE_CREATION, 281 + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION 280 282 }; 281 283 282 284 enum hl_server_type {