Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/papr_scm: Fetch nvdimm performance stats from PHYP

Update papr_scm.c to query dimm performance statistics from PHYP via
H_SCM_PERFORMANCE_STATS hcall and export them to user-space as PAPR
specific NVDIMM attribute 'perf_stats' in sysfs. The patch also
provide a sysfs ABI documentation for the stats being reported and
their meanings.

During NVDIMM probe time in papr_scm_nvdimm_init() a special variant
of H_SCM_PERFORMANCE_STATS hcall is issued to check if collection of
performance statistics is supported or not. If successful then a PHYP
returns a maximum possible buffer length needed to read all
performance stats. This returned value is stored in a per-nvdimm
attribute 'stat_buffer_len'.

The layout of request buffer for reading NVDIMM performance stats from
PHYP is defined in 'struct papr_scm_perf_stats' and 'struct
papr_scm_perf_stat'. These structs are used in newly introduced
drc_pmem_query_stats() that issues the H_SCM_PERFORMANCE_STATS hcall.

The sysfs access function perf_stats_show() uses value
'stat_buffer_len' to allocate a buffer large enough to hold all
possible NVDIMM performance stats and passes it to
drc_pmem_query_stats() to populate. Finally statistics reported in the
buffer are formatted into the sysfs access function output buffer.

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200731064153.182203-2-vaibhav@linux.ibm.com

authored by

Vaibhav Jain and committed by
Michael Ellerman
2d02bf83 d947fb4c

+177
+27
Documentation/ABI/testing/sysfs-bus-papr-pmem
··· 25 25 NVDIMM have been scrubbed. 26 26 * "locked" : Indicating that NVDIMM contents cant 27 27 be modified until next power cycle. 28 + 29 + What: /sys/bus/nd/devices/nmemX/papr/perf_stats 30 + Date: May, 2020 31 + KernelVersion: v5.9 32 + Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org, 33 + Description: 34 + (RO) Report various performance stats related to papr-scm NVDIMM 35 + device. Each stat is reported on a new line with each line 36 + composed of a stat-identifier followed by it value. Below are 37 + currently known dimm performance stats which are reported: 38 + 39 + * "CtlResCt" : Controller Reset Count 40 + * "CtlResTm" : Controller Reset Elapsed Time 41 + * "PonSecs " : Power-on Seconds 42 + * "MemLife " : Life Remaining 43 + * "CritRscU" : Critical Resource Utilization 44 + * "HostLCnt" : Host Load Count 45 + * "HostSCnt" : Host Store Count 46 + * "HostSDur" : Host Store Duration 47 + * "HostLDur" : Host Load Duration 48 + * "MedRCnt " : Media Read Count 49 + * "MedWCnt " : Media Write Count 50 + * "MedRDur " : Media Read Duration 51 + * "MedWDur " : Media Write Duration 52 + * "CchRHCnt" : Cache Read Hit Count 53 + * "CchWHCnt" : Cache Write Hit Count 54 + * "FastWCnt" : Fast Write Count
+150
arch/powerpc/platforms/pseries/papr_scm.c
··· 64 64 PAPR_PMEM_HEALTH_FATAL | \ 65 65 PAPR_PMEM_HEALTH_UNHEALTHY) 66 66 67 + #define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) 68 + #define PAPR_SCM_PERF_STATS_VERSION 0x1 69 + 70 + /* Struct holding a single performance metric */ 71 + struct papr_scm_perf_stat { 72 + u8 stat_id[8]; 73 + __be64 stat_val; 74 + } __packed; 75 + 76 + /* Struct exchanged between kernel and PHYP for fetching drc perf stats */ 77 + struct papr_scm_perf_stats { 78 + u8 eye_catcher[8]; 79 + /* Should be PAPR_SCM_PERF_STATS_VERSION */ 80 + __be32 stats_version; 81 + /* Number of stats following */ 82 + __be32 num_statistics; 83 + /* zero or more performance matrics */ 84 + struct papr_scm_perf_stat scm_statistic[]; 85 + } __packed; 86 + 67 87 /* private struct associated with each region */ 68 88 struct papr_scm_priv { 69 89 struct platform_device *pdev; ··· 112 92 113 93 /* Health information for the dimm */ 114 94 u64 health_bitmap; 95 + 96 + /* length of the stat buffer as expected by phyp */ 97 + size_t stat_buffer_len; 115 98 }; 116 99 117 100 static LIST_HEAD(papr_nd_regions); ··· 221 198 "Failed to query, trying an unbind followed by bind"); 222 199 drc_pmem_unbind(p); 223 200 return drc_pmem_bind(p); 201 + } 202 + 203 + /* 204 + * Query the Dimm performance stats from PHYP and copy them (if returned) to 205 + * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast 206 + * (num_stats + header) bytes. 207 + * - If buff_stats == NULL the return value is the size in byes of the buffer 208 + * needed to hold all supported performance-statistics. 209 + * - If buff_stats != NULL and num_stats == 0 then we copy all known 210 + * performance-statistics to 'buff_stat' and expect to be large enough to 211 + * hold them. 212 + * - if buff_stats != NULL and num_stats > 0 then copy the requested 213 + * performance-statistics to buff_stats. 214 + */ 215 + static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, 216 + struct papr_scm_perf_stats *buff_stats, 217 + unsigned int num_stats) 218 + { 219 + unsigned long ret[PLPAR_HCALL_BUFSIZE]; 220 + size_t size; 221 + s64 rc; 222 + 223 + /* Setup the out buffer */ 224 + if (buff_stats) { 225 + memcpy(buff_stats->eye_catcher, 226 + PAPR_SCM_PERF_STATS_EYECATCHER, 8); 227 + buff_stats->stats_version = 228 + cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); 229 + buff_stats->num_statistics = 230 + cpu_to_be32(num_stats); 231 + 232 + /* 233 + * Calculate the buffer size based on num-stats provided 234 + * or use the prefetched max buffer length 235 + */ 236 + if (num_stats) 237 + /* Calculate size from the num_stats */ 238 + size = sizeof(struct papr_scm_perf_stats) + 239 + num_stats * sizeof(struct papr_scm_perf_stat); 240 + else 241 + size = p->stat_buffer_len; 242 + } else { 243 + /* In case of no out buffer ignore the size */ 244 + size = 0; 245 + } 246 + 247 + /* Do the HCALL asking PHYP for info */ 248 + rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, 249 + buff_stats ? virt_to_phys(buff_stats) : 0, 250 + size); 251 + 252 + /* Check if the error was due to an unknown stat-id */ 253 + if (rc == H_PARTIAL) { 254 + dev_err(&p->pdev->dev, 255 + "Unknown performance stats, Err:0x%016lX\n", ret[0]); 256 + return -ENOENT; 257 + } else if (rc != H_SUCCESS) { 258 + dev_err(&p->pdev->dev, 259 + "Failed to query performance stats, Err:%lld\n", rc); 260 + return -EIO; 261 + 262 + } else if (!size) { 263 + /* Handle case where stat buffer size was requested */ 264 + dev_dbg(&p->pdev->dev, 265 + "Performance stats size %ld\n", ret[0]); 266 + return ret[0]; 267 + } 268 + 269 + /* Successfully fetched the requested stats from phyp */ 270 + dev_dbg(&p->pdev->dev, 271 + "Performance stats returned %d stats\n", 272 + be32_to_cpu(buff_stats->num_statistics)); 273 + return 0; 224 274 } 225 275 226 276 /* ··· 733 637 return 0; 734 638 } 735 639 640 + static ssize_t perf_stats_show(struct device *dev, 641 + struct device_attribute *attr, char *buf) 642 + { 643 + int index, rc; 644 + struct seq_buf s; 645 + struct papr_scm_perf_stat *stat; 646 + struct papr_scm_perf_stats *stats; 647 + struct nvdimm *dimm = to_nvdimm(dev); 648 + struct papr_scm_priv *p = nvdimm_provider_data(dimm); 649 + 650 + if (!p->stat_buffer_len) 651 + return -ENOENT; 652 + 653 + /* Allocate the buffer for phyp where stats are written */ 654 + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 655 + if (!stats) 656 + return -ENOMEM; 657 + 658 + /* Ask phyp to return all dimm perf stats */ 659 + rc = drc_pmem_query_stats(p, stats, 0); 660 + if (rc) 661 + goto free_stats; 662 + /* 663 + * Go through the returned output buffer and print stats and 664 + * values. Since stat_id is essentially a char string of 665 + * 8 bytes, simply use the string format specifier to print it. 666 + */ 667 + seq_buf_init(&s, buf, PAGE_SIZE); 668 + for (index = 0, stat = stats->scm_statistic; 669 + index < be32_to_cpu(stats->num_statistics); 670 + ++index, ++stat) { 671 + seq_buf_printf(&s, "%.8s = 0x%016llX\n", 672 + stat->stat_id, 673 + be64_to_cpu(stat->stat_val)); 674 + } 675 + 676 + free_stats: 677 + kfree(stats); 678 + return rc ? rc : seq_buf_used(&s); 679 + } 680 + DEVICE_ATTR_RO(perf_stats); 681 + 736 682 static ssize_t flags_show(struct device *dev, 737 683 struct device_attribute *attr, char *buf) 738 684 { ··· 820 682 /* papr_scm specific dimm attributes */ 821 683 static struct attribute *papr_nd_attributes[] = { 822 684 &dev_attr_flags.attr, 685 + &dev_attr_perf_stats.attr, 823 686 NULL, 824 687 }; 825 688 ··· 841 702 struct nd_region_desc ndr_desc; 842 703 unsigned long dimm_flags; 843 704 int target_nid, online_nid; 705 + ssize_t stat_size; 844 706 845 707 p->bus_desc.ndctl = papr_scm_ndctl; 846 708 p->bus_desc.module = THIS_MODULE; ··· 908 768 mutex_lock(&papr_ndr_lock); 909 769 list_add_tail(&p->region_list, &papr_nd_regions); 910 770 mutex_unlock(&papr_ndr_lock); 771 + 772 + /* Try retriving the stat buffer and see if its supported */ 773 + stat_size = drc_pmem_query_stats(p, NULL, 0); 774 + if (stat_size > 0) { 775 + p->stat_buffer_len = stat_size; 776 + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", 777 + p->stat_buffer_len); 778 + } else { 779 + dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); 780 + } 911 781 912 782 return 0; 913 783