Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

spi: spi.c: Convert statistics to per-cpu u64_stats_t

This change gives a dramatic performance improvement in the hot path,
since many costly spin_lock_irqsave() calls can be avoided.

On an i.MX8MM system with a MCP2518FD CAN controller connected via SPI,
the time the driver takes to handle interrupts, or in other words the time
the IRQ line of the CAN controller stays low is mainly dominated by the
time it takes to do 3 relatively short sync SPI transfers. The effect of
this patch is a reduction of this time from 136us down to only 98us.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David Jander <david@protonic.nl>
Link: https://lore.kernel.org/r/20220524091808.2269898-1-david@protonic.nl
Signed-off-by: Mark Brown <broonie@kernel.org>

authored by

David Jander and committed by
Mark Brown
6598b91b b658be56

+127 -68
+98 -45
drivers/spi/spi.c
··· 33 33 #include <linux/idr.h> 34 34 #include <linux/platform_data/x86/apple.h> 35 35 #include <linux/ptp_clock_kernel.h> 36 + #include <linux/percpu.h> 36 37 37 38 #define CREATE_TRACE_POINTS 38 39 #include <trace/events/spi.h> ··· 50 49 51 50 spi_controller_put(spi->controller); 52 51 kfree(spi->driver_override); 52 + free_percpu(spi->pcpu_statistics); 53 53 kfree(spi); 54 54 } 55 55 ··· 95 93 } 96 94 static DEVICE_ATTR_RW(driver_override); 97 95 96 + static struct spi_statistics *spi_alloc_pcpu_stats(struct device *dev) 97 + { 98 + struct spi_statistics __percpu *pcpu_stats; 99 + 100 + if (dev) 101 + pcpu_stats = devm_alloc_percpu(dev, struct spi_statistics); 102 + else 103 + pcpu_stats = alloc_percpu_gfp(struct spi_statistics, GFP_KERNEL); 104 + 105 + if (pcpu_stats) { 106 + int cpu; 107 + 108 + for_each_possible_cpu(cpu) { 109 + struct spi_statistics *stat; 110 + 111 + stat = per_cpu_ptr(pcpu_stats, cpu); 112 + u64_stats_init(&stat->syncp); 113 + } 114 + } 115 + return pcpu_stats; 116 + } 117 + 118 + #define spi_pcpu_stats_totalize(ret, in, field) \ 119 + do { \ 120 + int i; \ 121 + ret = 0; \ 122 + for_each_possible_cpu(i) { \ 123 + const struct spi_statistics *pcpu_stats; \ 124 + u64 inc; \ 125 + unsigned int start; \ 126 + pcpu_stats = per_cpu_ptr(in, i); \ 127 + do { \ 128 + start = u64_stats_fetch_begin_irq( \ 129 + &pcpu_stats->syncp); \ 130 + inc = u64_stats_read(&pcpu_stats->field); \ 131 + } while (u64_stats_fetch_retry_irq( \ 132 + &pcpu_stats->syncp, start)); \ 133 + ret += inc; \ 134 + } \ 135 + } while (0) 136 + 98 137 #define SPI_STATISTICS_ATTRS(field, file) \ 99 138 static ssize_t spi_controller_##field##_show(struct device *dev, \ 100 139 struct device_attribute *attr, \ ··· 143 100 { \ 144 101 struct spi_controller *ctlr = container_of(dev, \ 145 102 struct spi_controller, dev); \ 146 - return spi_statistics_##field##_show(&ctlr->statistics, buf); \ 103 + return spi_statistics_##field##_show(ctlr->pcpu_statistics, buf); \ 147 104 } \ 148 105 static struct device_attribute dev_attr_spi_controller_##field = { \ 149 106 .attr = { .name = file, .mode = 0444 }, \ ··· 154 111 char *buf) \ 155 112 { \ 156 113 struct spi_device *spi = to_spi_device(dev); \ 157 - return spi_statistics_##field##_show(&spi->statistics, buf); \ 114 + return spi_statistics_##field##_show(spi->pcpu_statistics, buf); \ 158 115 } \ 159 116 static struct device_attribute dev_attr_spi_device_##field = { \ 160 117 .attr = { .name = file, .mode = 0444 }, \ 161 118 .show = spi_device_##field##_show, \ 162 119 } 163 120 164 - #define SPI_STATISTICS_SHOW_NAME(name, file, field, format_string) \ 121 + #define SPI_STATISTICS_SHOW_NAME(name, file, field) \ 165 122 static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \ 166 123 char *buf) \ 167 124 { \ 168 - unsigned long flags; \ 169 125 ssize_t len; \ 170 - spin_lock_irqsave(&stat->lock, flags); \ 171 - len = sysfs_emit(buf, format_string "\n", stat->field); \ 172 - spin_unlock_irqrestore(&stat->lock, flags); \ 126 + u64 val; \ 127 + spi_pcpu_stats_totalize(val, stat, field); \ 128 + len = sysfs_emit(buf, "%llu\n", val); \ 173 129 return len; \ 174 130 } \ 175 131 SPI_STATISTICS_ATTRS(name, file) 176 132 177 - #define SPI_STATISTICS_SHOW(field, format_string) \ 133 + #define SPI_STATISTICS_SHOW(field) \ 178 134 SPI_STATISTICS_SHOW_NAME(field, __stringify(field), \ 179 - field, format_string) 135 + field) 180 136 181 - SPI_STATISTICS_SHOW(messages, "%lu"); 182 - SPI_STATISTICS_SHOW(transfers, "%lu"); 183 - SPI_STATISTICS_SHOW(errors, "%lu"); 184 - SPI_STATISTICS_SHOW(timedout, "%lu"); 137 + SPI_STATISTICS_SHOW(messages); 138 + SPI_STATISTICS_SHOW(transfers); 139 + SPI_STATISTICS_SHOW(errors); 140 + SPI_STATISTICS_SHOW(timedout); 185 141 186 - SPI_STATISTICS_SHOW(spi_sync, "%lu"); 187 - SPI_STATISTICS_SHOW(spi_sync_immediate, "%lu"); 188 - SPI_STATISTICS_SHOW(spi_async, "%lu"); 142 + SPI_STATISTICS_SHOW(spi_sync); 143 + SPI_STATISTICS_SHOW(spi_sync_immediate); 144 + SPI_STATISTICS_SHOW(spi_async); 189 145 190 - SPI_STATISTICS_SHOW(bytes, "%llu"); 191 - SPI_STATISTICS_SHOW(bytes_rx, "%llu"); 192 - SPI_STATISTICS_SHOW(bytes_tx, "%llu"); 146 + SPI_STATISTICS_SHOW(bytes); 147 + SPI_STATISTICS_SHOW(bytes_rx); 148 + SPI_STATISTICS_SHOW(bytes_tx); 193 149 194 150 #define SPI_STATISTICS_TRANSFER_BYTES_HISTO(index, number) \ 195 151 SPI_STATISTICS_SHOW_NAME(transfer_bytes_histo##index, \ 196 152 "transfer_bytes_histo_" number, \ 197 - transfer_bytes_histo[index], "%lu") 153 + transfer_bytes_histo[index]) 198 154 SPI_STATISTICS_TRANSFER_BYTES_HISTO(0, "0-1"); 199 155 SPI_STATISTICS_TRANSFER_BYTES_HISTO(1, "2-3"); 200 156 SPI_STATISTICS_TRANSFER_BYTES_HISTO(2, "4-7"); ··· 212 170 SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535"); 213 171 SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+"); 214 172 215 - SPI_STATISTICS_SHOW(transfers_split_maxsize, "%lu"); 173 + SPI_STATISTICS_SHOW(transfers_split_maxsize); 216 174 217 175 static struct attribute *spi_dev_attrs[] = { 218 176 &dev_attr_modalias.attr, ··· 309 267 NULL, 310 268 }; 311 269 312 - static void spi_statistics_add_transfer_stats(struct spi_statistics *stats, 270 + static void spi_statistics_add_transfer_stats(struct spi_statistics *pcpu_stats, 313 271 struct spi_transfer *xfer, 314 272 struct spi_controller *ctlr) 315 273 { 316 - unsigned long flags; 317 274 int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1; 275 + struct spi_statistics *stats = this_cpu_ptr(pcpu_stats); 318 276 319 277 if (l2len < 0) 320 278 l2len = 0; 321 279 322 - spin_lock_irqsave(&stats->lock, flags); 280 + u64_stats_update_begin(&stats->syncp); 323 281 324 - stats->transfers++; 325 - stats->transfer_bytes_histo[l2len]++; 282 + u64_stats_inc(&stats->transfers); 283 + u64_stats_inc(&stats->transfer_bytes_histo[l2len]); 326 284 327 - stats->bytes += xfer->len; 285 + u64_stats_add(&stats->bytes, xfer->len); 328 286 if ((xfer->tx_buf) && 329 287 (xfer->tx_buf != ctlr->dummy_tx)) 330 - stats->bytes_tx += xfer->len; 288 + u64_stats_add(&stats->bytes_tx, xfer->len); 331 289 if ((xfer->rx_buf) && 332 290 (xfer->rx_buf != ctlr->dummy_rx)) 333 - stats->bytes_rx += xfer->len; 291 + u64_stats_add(&stats->bytes_rx, xfer->len); 334 292 335 - spin_unlock_irqrestore(&stats->lock, flags); 293 + u64_stats_update_end(&stats->syncp); 336 294 } 337 295 338 296 /* ··· 561 519 return NULL; 562 520 } 563 521 522 + spi->pcpu_statistics = spi_alloc_pcpu_stats(NULL); 523 + if (!spi->pcpu_statistics) { 524 + kfree(spi); 525 + spi_controller_put(ctlr); 526 + return NULL; 527 + } 528 + 564 529 spi->master = spi->controller = ctlr; 565 530 spi->dev.parent = &ctlr->dev; 566 531 spi->dev.bus = &spi_bus_type; 567 532 spi->dev.release = spidev_release; 568 533 spi->mode = ctlr->buswidth_override_bits; 569 - 570 - spin_lock_init(&spi->statistics.lock); 571 534 572 535 device_initialize(&spi->dev); 573 536 return spi; ··· 1272 1225 struct spi_message *msg, 1273 1226 struct spi_transfer *xfer) 1274 1227 { 1275 - struct spi_statistics *statm = &ctlr->statistics; 1276 - struct spi_statistics *stats = &msg->spi->statistics; 1228 + struct spi_statistics *statm = ctlr->pcpu_statistics; 1229 + struct spi_statistics *stats = msg->spi->pcpu_statistics; 1277 1230 u32 speed_hz = xfer->speed_hz; 1278 1231 unsigned long long ms; 1279 1232 ··· 1429 1382 struct spi_transfer *xfer; 1430 1383 bool keep_cs = false; 1431 1384 int ret = 0; 1432 - struct spi_statistics *statm = &ctlr->statistics; 1433 - struct spi_statistics *stats = &msg->spi->statistics; 1385 + struct spi_statistics *statm = ctlr->pcpu_statistics; 1386 + struct spi_statistics *stats = msg->spi->pcpu_statistics; 1434 1387 1435 1388 spi_set_cs(msg->spi, true, false); 1436 1389 ··· 3076 3029 } 3077 3030 } 3078 3031 /* add statistics */ 3079 - spin_lock_init(&ctlr->statistics.lock); 3032 + ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev); 3033 + if (!ctlr->pcpu_statistics) { 3034 + dev_err(dev, "Error allocating per-cpu statistics\n"); 3035 + goto destroy_queue; 3036 + } 3080 3037 3081 3038 mutex_lock(&board_lock); 3082 3039 list_add_tail(&ctlr->list, &spi_controller_list); ··· 3093 3042 acpi_register_spi_devices(ctlr); 3094 3043 return status; 3095 3044 3045 + destroy_queue: 3046 + spi_destroy_queue(ctlr); 3096 3047 free_bus_id: 3097 3048 mutex_lock(&board_lock); 3098 3049 idr_remove(&spi_master_idr, ctlr->bus_num); ··· 3420 3367 *xferp = &xfers[count - 1]; 3421 3368 3422 3369 /* increment statistics counters */ 3423 - SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, 3370 + SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, 3424 3371 transfers_split_maxsize); 3425 - SPI_STATISTICS_INCREMENT_FIELD(&msg->spi->statistics, 3372 + SPI_STATISTICS_INCREMENT_FIELD(msg->spi->pcpu_statistics, 3426 3373 transfers_split_maxsize); 3427 3374 3428 3375 return 0; ··· 3813 3760 3814 3761 message->spi = spi; 3815 3762 3816 - SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_async); 3817 - SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_async); 3763 + SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_async); 3764 + SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_async); 3818 3765 3819 3766 trace_spi_message_submit(message); 3820 3767 ··· 3961 3908 message->context = &done; 3962 3909 message->spi = spi; 3963 3910 3964 - SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_sync); 3965 - SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_sync); 3911 + SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync); 3912 + SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync); 3966 3913 3967 3914 /* 3968 3915 * If we're not using the legacy transfer method then we will ··· 3985 3932 if (status == 0) { 3986 3933 /* Push out the messages in the calling context if we can */ 3987 3934 if (ctlr->transfer == spi_queued_transfer) { 3988 - SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, 3935 + SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, 3989 3936 spi_sync_immediate); 3990 - SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, 3937 + SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, 3991 3938 spi_sync_immediate); 3992 3939 __spi_pump_messages(ctlr, false); 3993 3940 }
+29 -23
include/linux/spi/spi.h
··· 17 17 18 18 #include <uapi/linux/spi/spi.h> 19 19 #include <linux/acpi.h> 20 + #include <linux/u64_stats_sync.h> 20 21 21 22 struct dma_chan; 22 23 struct software_node; ··· 60 59 * maxsize limit 61 60 */ 62 61 struct spi_statistics { 63 - spinlock_t lock; /* lock for the whole structure */ 62 + struct u64_stats_sync syncp; 64 63 65 - unsigned long messages; 66 - unsigned long transfers; 67 - unsigned long errors; 68 - unsigned long timedout; 64 + u64_stats_t messages; 65 + u64_stats_t transfers; 66 + u64_stats_t errors; 67 + u64_stats_t timedout; 69 68 70 - unsigned long spi_sync; 71 - unsigned long spi_sync_immediate; 72 - unsigned long spi_async; 69 + u64_stats_t spi_sync; 70 + u64_stats_t spi_sync_immediate; 71 + u64_stats_t spi_async; 73 72 74 - unsigned long long bytes; 75 - unsigned long long bytes_rx; 76 - unsigned long long bytes_tx; 73 + u64_stats_t bytes; 74 + u64_stats_t bytes_rx; 75 + u64_stats_t bytes_tx; 77 76 78 77 #define SPI_STATISTICS_HISTO_SIZE 17 79 - unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; 78 + u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; 80 79 81 - unsigned long transfers_split_maxsize; 80 + u64_stats_t transfers_split_maxsize; 82 81 }; 83 82 84 - #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ 85 - do { \ 86 - unsigned long flags; \ 87 - spin_lock_irqsave(&(stats)->lock, flags); \ 88 - (stats)->field += count; \ 89 - spin_unlock_irqrestore(&(stats)->lock, flags); \ 83 + #define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ 84 + do { \ 85 + struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ 86 + u64_stats_update_begin(&__lstats->syncp); \ 87 + u64_stats_add(&__lstats->field, count); \ 88 + u64_stats_update_end(&__lstats->syncp); \ 90 89 } while (0) 91 90 92 - #define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \ 93 - SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) 91 + #define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ 92 + do { \ 93 + struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ 94 + u64_stats_update_begin(&__lstats->syncp); \ 95 + u64_stats_inc(&__lstats->field); \ 96 + u64_stats_update_end(&__lstats->syncp); \ 97 + } while (0) 94 98 95 99 /** 96 100 * struct spi_delay - SPI delay information ··· 200 194 struct spi_delay cs_inactive; 201 195 202 196 /* the statistics */ 203 - struct spi_statistics statistics; 197 + struct spi_statistics __percpu *pcpu_statistics; 204 198 205 199 /* 206 200 * likely need more hooks for more protocol options affecting how ··· 653 647 s8 max_native_cs; 654 648 655 649 /* statistics */ 656 - struct spi_statistics statistics; 650 + struct spi_statistics __percpu *pcpu_statistics; 657 651 658 652 /* DMA channels for use with core dmaengine helpers */ 659 653 struct dma_chan *dma_tx;