Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Hwmon client for disk and solid state drives with temperature sensors
4 * Copyright (C) 2019 Zodiac Inflight Innovations
5 *
6 * With input from:
7 * Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors.
8 * (C) 2018 Linus Walleij
9 *
10 * hwmon: Driver for SCSI/ATA temperature sensors
11 * by Constantin Baranov <const@mimas.ru>, submitted September 2009
12 *
13 * This drive supports reporting the temperature of SATA drives. It can be
14 * easily extended to report the temperature of SCSI drives.
15 *
16 * The primary means to read drive temperatures and temperature limits
17 * for ATA drives is the SCT Command Transport feature set as specified in
18 * ATA8-ACS.
19 * It can be used to read the current drive temperature, temperature limits,
20 * and historic minimum and maximum temperatures. The SCT Command Transport
21 * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set
22 * (ATA8-ACS)".
23 *
24 * If the SCT Command Transport feature set is not available, drive temperatures
25 * may be readable through SMART attributes. Since SMART attributes are not well
26 * defined, this method is only used as fallback mechanism.
27 *
28 * There are three SMART attributes which may report drive temperatures.
29 * Those are defined as follows (from
30 * http://www.cropel.com/library/smart-attribute-list.aspx).
31 *
32 * 190 Temperature Temperature, monitored by a sensor somewhere inside
33 * the drive. Raw value typicaly holds the actual
34 * temperature (hexadecimal) in its rightmost two digits.
35 *
36 * 194 Temperature Temperature, monitored by a sensor somewhere inside
37 * the drive. Raw value typicaly holds the actual
38 * temperature (hexadecimal) in its rightmost two digits.
39 *
40 * 231 Temperature Temperature, monitored by a sensor somewhere inside
41 * the drive. Raw value typicaly holds the actual
42 * temperature (hexadecimal) in its rightmost two digits.
43 *
44 * Wikipedia defines attributes a bit differently.
45 *
46 * 190 Temperature Value is equal to (100-temp. °C), allowing manufacturer
47 * Difference or to set a minimum threshold which corresponds to a
48 * Airflow maximum temperature. This also follows the convention of
49 * Temperature 100 being a best-case value and lower values being
50 * undesirable. However, some older drives may instead
51 * report raw Temperature (identical to 0xC2) or
52 * Temperature minus 50 here.
53 * 194 Temperature or Indicates the device temperature, if the appropriate
54 * Temperature sensor is fitted. Lowest byte of the raw value contains
55 * Celsius the exact temperature value (Celsius degrees).
56 * 231 Life Left Indicates the approximate SSD life left, in terms of
57 * (SSDs) or program/erase cycles or available reserved blocks.
58 * Temperature A normalized value of 100 represents a new drive, with
59 * a threshold value at 10 indicating a need for
60 * replacement. A value of 0 may mean that the drive is
61 * operating in read-only mode to allow data recovery.
62 * Previously (pre-2010) occasionally used for Drive
63 * Temperature (more typically reported at 0xC2).
64 *
65 * Common denominator is that the first raw byte reports the temperature
66 * in degrees C on almost all drives. Some drives may report a fractional
67 * temperature in the second raw byte.
68 *
69 * Known exceptions (from libatasmart):
70 * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th
71 * degrees C in the first two raw bytes.
72 * - A few Maxtor drives report an unknown or bad value in attribute 194.
73 * - Certain Apple SSD drives report an unknown value in attribute 190.
74 * Only certain firmware versions are affected.
75 *
76 * Those exceptions affect older ATA drives and are currently ignored.
77 * Also, the second raw byte (possibly reporting the fractional temperature)
78 * is currently ignored.
79 *
80 * Many drives also report temperature limits in additional SMART data raw
81 * bytes. The format of those is not well defined and varies widely.
82 * The driver does not currently attempt to report those limits.
83 *
84 * According to data in smartmontools, attribute 231 is rarely used to report
85 * drive temperatures. At the same time, several drives report SSD life left
86 * in attribute 231, but do not support temperature sensors. For this reason,
87 * attribute 231 is currently ignored.
88 *
89 * Following above definitions, temperatures are reported as follows.
90 * If SCT Command Transport is supported, it is used to read the
91 * temperature and, if available, temperature limits.
92 * - Otherwise, if SMART attribute 194 is supported, it is used to read
93 * the temperature.
94 * - Otherwise, if SMART attribute 190 is supported, it is used to read
95 * the temperature.
96 */
97
98#include <linux/ata.h>
99#include <linux/bits.h>
100#include <linux/device.h>
101#include <linux/hwmon.h>
102#include <linux/kernel.h>
103#include <linux/list.h>
104#include <linux/module.h>
105#include <scsi/scsi_cmnd.h>
106#include <scsi/scsi_device.h>
107#include <scsi/scsi_driver.h>
108#include <scsi/scsi_proto.h>
109
110struct drivetemp_data {
111 struct list_head list; /* list of instantiated devices */
112 struct scsi_device *sdev; /* SCSI device */
113 struct device *dev; /* instantiating device */
114 struct device *hwdev; /* hardware monitoring device */
115 u8 smartdata[ATA_SECT_SIZE]; /* local buffer */
116 int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val);
117 bool have_temp_lowest; /* lowest temp in SCT status */
118 bool have_temp_highest; /* highest temp in SCT status */
119 bool have_temp_min; /* have min temp */
120 bool have_temp_max; /* have max temp */
121 bool have_temp_lcrit; /* have lower critical limit */
122 bool have_temp_crit; /* have critical limit */
123 int temp_min; /* min temp */
124 int temp_max; /* max temp */
125 int temp_lcrit; /* lower critical limit */
126 int temp_crit; /* critical limit */
127};
128
129static LIST_HEAD(drivetemp_devlist);
130
131#define ATA_MAX_SMART_ATTRS 30
132#define SMART_TEMP_PROP_190 190
133#define SMART_TEMP_PROP_194 194
134
135#define SCT_STATUS_REQ_ADDR 0xe0
136#define SCT_STATUS_VERSION_LOW 0 /* log byte offsets */
137#define SCT_STATUS_VERSION_HIGH 1
138#define SCT_STATUS_TEMP 200
139#define SCT_STATUS_TEMP_LOWEST 201
140#define SCT_STATUS_TEMP_HIGHEST 202
141#define SCT_READ_LOG_ADDR 0xe1
142#define SMART_READ_LOG 0xd5
143#define SMART_WRITE_LOG 0xd6
144
145#define INVALID_TEMP 0x80
146
147#define temp_is_valid(temp) ((temp) != INVALID_TEMP)
148#define temp_from_sct(temp) (((s8)(temp)) * 1000)
149
150static inline bool ata_id_smart_supported(u16 *id)
151{
152 return id[ATA_ID_COMMAND_SET_1] & BIT(0);
153}
154
155static inline bool ata_id_smart_enabled(u16 *id)
156{
157 return id[ATA_ID_CFS_ENABLE_1] & BIT(0);
158}
159
160static int drivetemp_scsi_command(struct drivetemp_data *st,
161 u8 ata_command, u8 feature,
162 u8 lba_low, u8 lba_mid, u8 lba_high)
163{
164 u8 scsi_cmd[MAX_COMMAND_SIZE];
165 enum req_op op;
166 int err;
167
168 memset(scsi_cmd, 0, sizeof(scsi_cmd));
169 scsi_cmd[0] = ATA_16;
170 if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) {
171 scsi_cmd[1] = (5 << 1); /* PIO Data-out */
172 /*
173 * No off.line or cc, write to dev, block count in sector count
174 * field.
175 */
176 scsi_cmd[2] = 0x06;
177 op = REQ_OP_DRV_OUT;
178 } else {
179 scsi_cmd[1] = (4 << 1); /* PIO Data-in */
180 /*
181 * No off.line or cc, read from dev, block count in sector count
182 * field.
183 */
184 scsi_cmd[2] = 0x0e;
185 op = REQ_OP_DRV_IN;
186 }
187 scsi_cmd[4] = feature;
188 scsi_cmd[6] = 1; /* 1 sector */
189 scsi_cmd[8] = lba_low;
190 scsi_cmd[10] = lba_mid;
191 scsi_cmd[12] = lba_high;
192 scsi_cmd[14] = ata_command;
193
194 err = scsi_execute_cmd(st->sdev, scsi_cmd, op, st->smartdata,
195 ATA_SECT_SIZE, 10 * HZ, 5, NULL);
196 if (err > 0)
197 err = -EIO;
198 return err;
199}
200
201static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature,
202 u8 select)
203{
204 return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select,
205 ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS);
206}
207
208static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr,
209 long *temp)
210{
211 u8 *buf = st->smartdata;
212 bool have_temp = false;
213 u8 temp_raw;
214 u8 csum;
215 int err;
216 int i;
217
218 err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0);
219 if (err)
220 return err;
221
222 /* Checksum the read value table */
223 csum = 0;
224 for (i = 0; i < ATA_SECT_SIZE; i++)
225 csum += buf[i];
226 if (csum) {
227 dev_dbg(&st->sdev->sdev_gendev,
228 "checksum error reading SMART values\n");
229 return -EIO;
230 }
231
232 for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) {
233 u8 *attr = buf + i * 12;
234 int id = attr[2];
235
236 if (!id)
237 continue;
238
239 if (id == SMART_TEMP_PROP_190) {
240 temp_raw = attr[7];
241 have_temp = true;
242 }
243 if (id == SMART_TEMP_PROP_194) {
244 temp_raw = attr[7];
245 have_temp = true;
246 break;
247 }
248 }
249
250 if (have_temp) {
251 *temp = temp_raw * 1000;
252 return 0;
253 }
254
255 return -ENXIO;
256}
257
258static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val)
259{
260 u8 *buf = st->smartdata;
261 int err;
262
263 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
264 if (err)
265 return err;
266 switch (attr) {
267 case hwmon_temp_input:
268 if (!temp_is_valid(buf[SCT_STATUS_TEMP]))
269 return -ENODATA;
270 *val = temp_from_sct(buf[SCT_STATUS_TEMP]);
271 break;
272 case hwmon_temp_lowest:
273 if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]))
274 return -ENODATA;
275 *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]);
276 break;
277 case hwmon_temp_highest:
278 if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]))
279 return -ENODATA;
280 *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]);
281 break;
282 default:
283 err = -EINVAL;
284 break;
285 }
286 return err;
287}
288
289static const char * const sct_avoid_models[] = {
290/*
291 * These drives will have WRITE FPDMA QUEUED command timeouts and sometimes just
292 * freeze until power-cycled under heavy write loads when their temperature is
293 * getting polled in SCT mode. The SMART mode seems to be fine, though.
294 *
295 * While only the 3 TB model (DT01ACA3) was actually caught exhibiting the
296 * problem let's play safe here to avoid data corruption and ban the whole
297 * DT01ACAx family.
298
299 * The models from this array are prefix-matched.
300 */
301 "TOSHIBA DT01ACA",
302};
303
304static bool drivetemp_sct_avoid(struct drivetemp_data *st)
305{
306 struct scsi_device *sdev = st->sdev;
307 unsigned int ctr;
308
309 if (!sdev->model)
310 return false;
311
312 /*
313 * The "model" field contains just the raw SCSI INQUIRY response
314 * "product identification" field, which has a width of 16 bytes.
315 * This field is space-filled, but is NOT NULL-terminated.
316 */
317 for (ctr = 0; ctr < ARRAY_SIZE(sct_avoid_models); ctr++)
318 if (!strncmp(sdev->model, sct_avoid_models[ctr],
319 strlen(sct_avoid_models[ctr])))
320 return true;
321
322 return false;
323}
324
325static int drivetemp_identify_sata(struct drivetemp_data *st)
326{
327 struct scsi_device *sdev = st->sdev;
328 u8 *buf = st->smartdata;
329 struct scsi_vpd *vpd;
330 bool is_ata, is_sata;
331 bool have_sct_data_table;
332 bool have_sct_temp;
333 bool have_smart;
334 bool have_sct;
335 u16 *ata_id;
336 u16 version;
337 long temp;
338 int err;
339
340 /* SCSI-ATA Translation present? */
341 rcu_read_lock();
342 vpd = rcu_dereference(sdev->vpd_pg89);
343
344 /*
345 * Verify that ATA IDENTIFY DEVICE data is included in ATA Information
346 * VPD and that the drive implements the SATA protocol.
347 */
348 if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA ||
349 vpd->data[36] != 0x34) {
350 rcu_read_unlock();
351 return -ENODEV;
352 }
353 ata_id = (u16 *)&vpd->data[60];
354 is_ata = ata_id_is_ata(ata_id);
355 is_sata = ata_id_is_sata(ata_id);
356 have_sct = ata_id_sct_supported(ata_id);
357 have_sct_data_table = ata_id_sct_data_tables(ata_id);
358 have_smart = ata_id_smart_supported(ata_id) &&
359 ata_id_smart_enabled(ata_id);
360
361 rcu_read_unlock();
362
363 /* bail out if this is not a SATA device */
364 if (!is_ata || !is_sata)
365 return -ENODEV;
366
367 if (have_sct && drivetemp_sct_avoid(st)) {
368 dev_notice(&sdev->sdev_gendev,
369 "will avoid using SCT for temperature monitoring\n");
370 have_sct = false;
371 }
372
373 if (!have_sct)
374 goto skip_sct;
375
376 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
377 if (err)
378 goto skip_sct;
379
380 version = (buf[SCT_STATUS_VERSION_HIGH] << 8) |
381 buf[SCT_STATUS_VERSION_LOW];
382 if (version != 2 && version != 3)
383 goto skip_sct;
384
385 have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]);
386 if (!have_sct_temp)
387 goto skip_sct;
388
389 st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]);
390 st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]);
391
392 if (!have_sct_data_table)
393 goto skip_sct_data;
394
395 /* Request and read temperature history table */
396 memset(buf, '\0', sizeof(st->smartdata));
397 buf[0] = 5; /* data table command */
398 buf[2] = 1; /* read table */
399 buf[4] = 2; /* temperature history table */
400
401 err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR);
402 if (err)
403 goto skip_sct_data;
404
405 err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR);
406 if (err)
407 goto skip_sct_data;
408
409 /*
410 * Temperature limits per AT Attachment 8 -
411 * ATA/ATAPI Command Set (ATA8-ACS)
412 */
413 st->have_temp_max = temp_is_valid(buf[6]);
414 st->have_temp_crit = temp_is_valid(buf[7]);
415 st->have_temp_min = temp_is_valid(buf[8]);
416 st->have_temp_lcrit = temp_is_valid(buf[9]);
417
418 st->temp_max = temp_from_sct(buf[6]);
419 st->temp_crit = temp_from_sct(buf[7]);
420 st->temp_min = temp_from_sct(buf[8]);
421 st->temp_lcrit = temp_from_sct(buf[9]);
422
423skip_sct_data:
424 if (have_sct_temp) {
425 st->get_temp = drivetemp_get_scttemp;
426 return 0;
427 }
428skip_sct:
429 if (!have_smart)
430 return -ENODEV;
431 st->get_temp = drivetemp_get_smarttemp;
432 return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp);
433}
434
435static int drivetemp_identify(struct drivetemp_data *st)
436{
437 struct scsi_device *sdev = st->sdev;
438
439 /* Bail out immediately if there is no inquiry data */
440 if (!sdev->inquiry || sdev->inquiry_len < 16)
441 return -ENODEV;
442
443 /* Disk device? */
444 if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC)
445 return -ENODEV;
446
447 return drivetemp_identify_sata(st);
448}
449
450static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type,
451 u32 attr, int channel, long *val)
452{
453 struct drivetemp_data *st = dev_get_drvdata(dev);
454 int err = 0;
455
456 if (type != hwmon_temp)
457 return -EINVAL;
458
459 switch (attr) {
460 case hwmon_temp_input:
461 case hwmon_temp_lowest:
462 case hwmon_temp_highest:
463 err = st->get_temp(st, attr, val);
464 break;
465 case hwmon_temp_lcrit:
466 *val = st->temp_lcrit;
467 break;
468 case hwmon_temp_min:
469 *val = st->temp_min;
470 break;
471 case hwmon_temp_max:
472 *val = st->temp_max;
473 break;
474 case hwmon_temp_crit:
475 *val = st->temp_crit;
476 break;
477 default:
478 err = -EINVAL;
479 break;
480 }
481 return err;
482}
483
484static umode_t drivetemp_is_visible(const void *data,
485 enum hwmon_sensor_types type,
486 u32 attr, int channel)
487{
488 const struct drivetemp_data *st = data;
489
490 switch (type) {
491 case hwmon_temp:
492 switch (attr) {
493 case hwmon_temp_input:
494 return 0444;
495 case hwmon_temp_lowest:
496 if (st->have_temp_lowest)
497 return 0444;
498 break;
499 case hwmon_temp_highest:
500 if (st->have_temp_highest)
501 return 0444;
502 break;
503 case hwmon_temp_min:
504 if (st->have_temp_min)
505 return 0444;
506 break;
507 case hwmon_temp_max:
508 if (st->have_temp_max)
509 return 0444;
510 break;
511 case hwmon_temp_lcrit:
512 if (st->have_temp_lcrit)
513 return 0444;
514 break;
515 case hwmon_temp_crit:
516 if (st->have_temp_crit)
517 return 0444;
518 break;
519 default:
520 break;
521 }
522 break;
523 default:
524 break;
525 }
526 return 0;
527}
528
529static const struct hwmon_channel_info * const drivetemp_info[] = {
530 HWMON_CHANNEL_INFO(chip,
531 HWMON_C_REGISTER_TZ),
532 HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT |
533 HWMON_T_LOWEST | HWMON_T_HIGHEST |
534 HWMON_T_MIN | HWMON_T_MAX |
535 HWMON_T_LCRIT | HWMON_T_CRIT),
536 NULL
537};
538
539static const struct hwmon_ops drivetemp_ops = {
540 .is_visible = drivetemp_is_visible,
541 .read = drivetemp_read,
542};
543
544static const struct hwmon_chip_info drivetemp_chip_info = {
545 .ops = &drivetemp_ops,
546 .info = drivetemp_info,
547};
548
549/*
550 * The device argument points to sdev->sdev_dev. Its parent is
551 * sdev->sdev_gendev, which we can use to get the scsi_device pointer.
552 */
553static int drivetemp_add(struct device *dev)
554{
555 struct scsi_device *sdev = to_scsi_device(dev->parent);
556 struct drivetemp_data *st;
557 int err;
558
559 st = kzalloc(sizeof(*st), GFP_KERNEL);
560 if (!st)
561 return -ENOMEM;
562
563 st->sdev = sdev;
564 st->dev = dev;
565
566 if (drivetemp_identify(st)) {
567 err = -ENODEV;
568 goto abort;
569 }
570
571 st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp",
572 st, &drivetemp_chip_info,
573 NULL);
574 if (IS_ERR(st->hwdev)) {
575 err = PTR_ERR(st->hwdev);
576 goto abort;
577 }
578
579 list_add(&st->list, &drivetemp_devlist);
580 return 0;
581
582abort:
583 kfree(st);
584 return err;
585}
586
587static void drivetemp_remove(struct device *dev)
588{
589 struct drivetemp_data *st, *tmp;
590
591 list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) {
592 if (st->dev == dev) {
593 list_del(&st->list);
594 hwmon_device_unregister(st->hwdev);
595 kfree(st);
596 break;
597 }
598 }
599}
600
601static struct class_interface drivetemp_interface = {
602 .add_dev = drivetemp_add,
603 .remove_dev = drivetemp_remove,
604};
605
606static int __init drivetemp_init(void)
607{
608 return scsi_register_interface(&drivetemp_interface);
609}
610
611static void __exit drivetemp_exit(void)
612{
613 scsi_unregister_interface(&drivetemp_interface);
614}
615
616module_init(drivetemp_init);
617module_exit(drivetemp_exit);
618
619MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>");
620MODULE_DESCRIPTION("Hard drive temperature monitor");
621MODULE_LICENSE("GPL");
622MODULE_ALIAS("platform:drivetemp");