Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: fcoe: Use per-CPU API to update per-CPU statistics

The per-CPU statistics (struct fc_stats) is updated by getting a stable
per-CPU pointer via get_cpu() + per_cpu_ptr() and then performing the
increment. This can be optimized by using this_cpu_*() which will do
whatever is needed on the architecture to perform the update safe and
efficient. The read out of the individual value (fc_get_host_stats())
should be done by using READ_ONCE() instead of a plain-C access. The
difference is that READ_ONCE() will always perform a single access while
the plain-C access can be split by the compiler into two loads if it
appears beneficial. The usage of u64 has the side-effect that it is also
64bit wide on 32bit architectures and the read is always split into two
loads. The can lead to strange values if the read happens during an update
which alters both 32bit parts of the 64bit value. This can be circumvented
by either using a 32bit variables on 32bit architecures or extending the
statistics with a sequence counter.

Use this_cpu_*() API to update the statistics and READ_ONCE() to read it.

Link: https://lore.kernel.org/r/20220506105758.283887-3-bigeasy@linutronix.de
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Sebastian Andrzej Siewior and committed by
Martin K. Petersen
a912460e 848b8977

+62 -107
+6 -16
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
··· 273 273 struct fcoe_port *port; 274 274 struct fcoe_hdr *hp; 275 275 struct bnx2fc_rport *tgt; 276 - struct fc_stats *stats; 277 276 u8 sof, eof; 278 277 u32 crc; 279 278 unsigned int hlen, tlen, elen; ··· 398 399 } 399 400 400 401 /*update tx stats */ 401 - stats = per_cpu_ptr(lport->stats, get_cpu()); 402 - stats->TxFrames++; 403 - stats->TxWords += wlen; 404 - put_cpu(); 402 + this_cpu_inc(lport->stats->TxFrames); 403 + this_cpu_add(lport->stats->TxWords, wlen); 405 404 406 405 /* send down to lld */ 407 406 fr_dev(fp) = lport; ··· 509 512 u32 fr_len, fr_crc; 510 513 struct fc_lport *lport; 511 514 struct fcoe_rcv_info *fr; 512 - struct fc_stats *stats; 513 515 struct fc_frame_header *fh; 514 516 struct fcoe_crc_eof crc_eof; 515 517 struct fc_frame *fp; ··· 539 543 skb_pull(skb, sizeof(struct fcoe_hdr)); 540 544 fr_len = skb->len - sizeof(struct fcoe_crc_eof); 541 545 542 - stats = per_cpu_ptr(lport->stats, get_cpu()); 543 - stats->RxFrames++; 544 - stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; 545 - put_cpu(); 546 + this_cpu_inc(lport->stats->RxFrames); 547 + this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); 546 548 547 549 fp = (struct fc_frame *)skb; 548 550 fc_frame_init(fp); ··· 627 633 fr_crc = le32_to_cpu(fr_crc(fp)); 628 634 629 635 if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { 630 - stats = per_cpu_ptr(lport->stats, get_cpu()); 631 - crc_err = (stats->InvalidCRCCount++); 632 - put_cpu(); 636 + crc_err = this_cpu_inc_return(lport->stats->InvalidCRCCount); 633 637 if (crc_err < 5) 634 638 printk(KERN_WARNING PFX "dropping frame with " 635 639 "CRC error\n"); ··· 956 964 mutex_unlock(&lport->lp_mutex); 957 965 fc_host_port_type(lport->host) = 958 966 FC_PORTTYPE_UNKNOWN; 959 - per_cpu_ptr(lport->stats, 960 - get_cpu())->LinkFailureCount++; 961 - put_cpu(); 967 + this_cpu_inc(lport->stats->LinkFailureCount); 962 968 fcoe_clean_pending_queue(lport); 963 969 wait_for_upload = 1; 964 970 }
+5 -8
drivers/scsi/bnx2fc/bnx2fc_io.c
··· 2032 2032 struct bnx2fc_interface *interface = port->priv; 2033 2033 struct bnx2fc_hba *hba = interface->hba; 2034 2034 struct fc_lport *lport = port->lport; 2035 - struct fc_stats *stats; 2036 2035 int task_idx, index; 2037 2036 u16 xid; 2038 2037 ··· 2044 2045 io_req->data_xfer_len = scsi_bufflen(sc_cmd); 2045 2046 bnx2fc_priv(sc_cmd)->io_req = io_req; 2046 2047 2047 - stats = per_cpu_ptr(lport->stats, get_cpu()); 2048 2048 if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { 2049 2049 io_req->io_req_flags = BNX2FC_READ; 2050 - stats->InputRequests++; 2051 - stats->InputBytes += io_req->data_xfer_len; 2050 + this_cpu_inc(lport->stats->InputRequests); 2051 + this_cpu_add(lport->stats->InputBytes, io_req->data_xfer_len); 2052 2052 } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { 2053 2053 io_req->io_req_flags = BNX2FC_WRITE; 2054 - stats->OutputRequests++; 2055 - stats->OutputBytes += io_req->data_xfer_len; 2054 + this_cpu_inc(lport->stats->OutputRequests); 2055 + this_cpu_add(lport->stats->OutputBytes, io_req->data_xfer_len); 2056 2056 } else { 2057 2057 io_req->io_req_flags = 0; 2058 - stats->ControlRequests++; 2058 + this_cpu_inc(lport->stats->ControlRequests); 2059 2059 } 2060 - put_cpu(); 2061 2060 2062 2061 xid = io_req->xid; 2063 2062
+12 -24
drivers/scsi/fcoe/fcoe.c
··· 1434 1434 1435 1435 return NET_RX_SUCCESS; 1436 1436 err: 1437 - per_cpu_ptr(lport->stats, get_cpu())->ErrorFrames++; 1438 - put_cpu(); 1437 + this_cpu_inc(lport->stats->ErrorFrames); 1439 1438 err2: 1440 1439 kfree_skb(skb); 1441 1440 return NET_RX_DROP; ··· 1474 1475 struct ethhdr *eh; 1475 1476 struct fcoe_crc_eof *cp; 1476 1477 struct sk_buff *skb; 1477 - struct fc_stats *stats; 1478 1478 struct fc_frame_header *fh; 1479 1479 unsigned int hlen; /* header length implies the version */ 1480 1480 unsigned int tlen; /* trailer length */ ··· 1584 1586 skb_shinfo(skb)->gso_size = 0; 1585 1587 } 1586 1588 /* update tx stats: regardless if LLD fails */ 1587 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1588 - stats->TxFrames++; 1589 - stats->TxWords += wlen; 1590 - put_cpu(); 1589 + this_cpu_inc(lport->stats->TxFrames); 1590 + this_cpu_add(lport->stats->TxWords, wlen); 1591 1591 1592 1592 /* send down to lld */ 1593 1593 fr_dev(fp) = lport; ··· 1607 1611 struct fcoe_interface *fcoe; 1608 1612 struct fc_frame_header *fh; 1609 1613 struct sk_buff *skb = (struct sk_buff *)fp; 1610 - struct fc_stats *stats; 1611 1614 1612 1615 /* 1613 1616 * We only check CRC if no offload is available and if it is ··· 1636 1641 return 0; 1637 1642 } 1638 1643 1639 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1640 - stats->InvalidCRCCount++; 1641 - if (stats->InvalidCRCCount < 5) 1644 + if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < 5) 1642 1645 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); 1643 - put_cpu(); 1644 1646 return -EINVAL; 1645 1647 } 1646 1648 ··· 1650 1658 u32 fr_len; 1651 1659 struct fc_lport *lport; 1652 1660 struct fcoe_rcv_info *fr; 1653 - struct fc_stats *stats; 1654 1661 struct fcoe_crc_eof crc_eof; 1655 1662 struct fc_frame *fp; 1656 1663 struct fcoe_hdr *hp; ··· 1677 1686 */ 1678 1687 hp = (struct fcoe_hdr *) skb_network_header(skb); 1679 1688 1680 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1681 1689 if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { 1682 - if (stats->ErrorFrames < 5) 1690 + struct fc_stats *stats; 1691 + 1692 + stats = per_cpu_ptr(lport->stats, raw_smp_processor_id()); 1693 + if (READ_ONCE(stats->ErrorFrames) < 5) 1683 1694 printk(KERN_WARNING "fcoe: FCoE version " 1684 1695 "mismatch: The frame has " 1685 1696 "version %x, but the " ··· 1694 1701 skb_pull(skb, sizeof(struct fcoe_hdr)); 1695 1702 fr_len = skb->len - sizeof(struct fcoe_crc_eof); 1696 1703 1697 - stats->RxFrames++; 1698 - stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; 1704 + this_cpu_inc(lport->stats->RxFrames); 1705 + this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); 1699 1706 1700 1707 fp = (struct fc_frame *)skb; 1701 1708 fc_frame_init(fp); ··· 1711 1718 goto drop; 1712 1719 1713 1720 if (!fcoe_filter_frames(lport, fp)) { 1714 - put_cpu(); 1715 1721 fc_exch_recv(lport, fp); 1716 1722 return; 1717 1723 } 1718 1724 drop: 1719 - stats->ErrorFrames++; 1720 - put_cpu(); 1725 + this_cpu_inc(lport->stats->ErrorFrames); 1721 1726 kfree_skb(skb); 1722 1727 } 1723 1728 ··· 1839 1848 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 1840 1849 struct fcoe_ctlr *ctlr; 1841 1850 struct fcoe_interface *fcoe; 1842 - struct fc_stats *stats; 1843 1851 u32 link_possible = 1; 1844 1852 u32 mfs; 1845 1853 int rc = NOTIFY_OK; ··· 1912 1922 break; 1913 1923 case FCOE_CTLR_ENABLED: 1914 1924 case FCOE_CTLR_UNUSED: 1915 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1916 - stats->LinkFailureCount++; 1917 - put_cpu(); 1925 + this_cpu_inc(lport->stats->LinkFailureCount); 1918 1926 fcoe_clean_pending_queue(lport); 1919 1927 } 1920 1928 }
+9 -17
drivers/scsi/fcoe/fcoe_ctlr.c
··· 824 824 unsigned long deadline; 825 825 unsigned long sel_time = 0; 826 826 struct list_head del_list; 827 - struct fc_stats *stats; 828 827 829 828 INIT_LIST_HEAD(&del_list); 830 - 831 - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); 832 829 833 830 list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { 834 831 deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; 835 832 if (fip->sel_fcf == fcf) { 836 833 if (time_after(jiffies, deadline)) { 837 - stats->MissDiscAdvCount++; 834 + u64 miss_cnt; 835 + 836 + miss_cnt = this_cpu_inc_return(fip->lp->stats->MissDiscAdvCount); 838 837 printk(KERN_INFO "libfcoe: host%d: " 839 838 "Missing Discovery Advertisement " 840 839 "for fab %16.16llx count %lld\n", 841 840 fip->lp->host->host_no, fcf->fabric_name, 842 - stats->MissDiscAdvCount); 841 + miss_cnt); 843 842 } else if (time_after(next_timer, deadline)) 844 843 next_timer = deadline; 845 844 } ··· 854 855 */ 855 856 list_del(&fcf->list); 856 857 list_add(&fcf->list, &del_list); 857 - stats->VLinkFailureCount++; 858 + this_cpu_inc(fip->lp->stats->VLinkFailureCount); 858 859 } else { 859 860 if (time_after(next_timer, deadline)) 860 861 next_timer = deadline; ··· 863 864 sel_time = fcf->time; 864 865 } 865 866 } 866 - put_cpu(); 867 867 868 868 list_for_each_entry_safe(fcf, next, &del_list, list) { 869 869 /* Removes fcf from current list */ ··· 1140 1142 struct fip_desc *desc; 1141 1143 struct fip_encaps *els; 1142 1144 struct fcoe_fcf *sel; 1143 - struct fc_stats *stats; 1144 1145 enum fip_desc_type els_dtype = 0; 1145 1146 u8 els_op; 1146 1147 u8 sub; ··· 1283 1286 fr_dev(fp) = lport; 1284 1287 fr_encaps(fp) = els_dtype; 1285 1288 1286 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1287 - stats->RxFrames++; 1288 - stats->RxWords += skb->len / FIP_BPW; 1289 - put_cpu(); 1289 + this_cpu_inc(lport->stats->RxFrames); 1290 + this_cpu_add(lport->stats->RxWords, skb->len / FIP_BPW); 1290 1291 1291 1292 fc_exch_recv(lport, fp); 1292 1293 return; ··· 1422 1427 ntoh24(vp->fd_fc_id)); 1423 1428 if (vn_port && (vn_port == lport)) { 1424 1429 mutex_lock(&fip->ctlr_mutex); 1425 - per_cpu_ptr(lport->stats, 1426 - get_cpu())->VLinkFailureCount++; 1427 - put_cpu(); 1430 + this_cpu_inc(lport->stats->VLinkFailureCount); 1428 1431 fcoe_ctlr_reset(fip); 1429 1432 mutex_unlock(&fip->ctlr_mutex); 1430 1433 } ··· 1450 1457 * followed by physical port 1451 1458 */ 1452 1459 mutex_lock(&fip->ctlr_mutex); 1453 - per_cpu_ptr(lport->stats, get_cpu())->VLinkFailureCount++; 1454 - put_cpu(); 1460 + this_cpu_inc(lport->stats->VLinkFailureCount); 1455 1461 fcoe_ctlr_reset(fip); 1456 1462 mutex_unlock(&fip->ctlr_mutex); 1457 1463
+3 -3
drivers/scsi/fcoe/fcoe_transport.c
··· 183 183 memset(lesb, 0, sizeof(*lesb)); 184 184 for_each_possible_cpu(cpu) { 185 185 stats = per_cpu_ptr(lport->stats, cpu); 186 - lfc += stats->LinkFailureCount; 187 - vlfc += stats->VLinkFailureCount; 188 - mdac += stats->MissDiscAdvCount; 186 + lfc += READ_ONCE(stats->LinkFailureCount); 187 + vlfc += READ_ONCE(stats->VLinkFailureCount); 188 + mdac += READ_ONCE(stats->MissDiscAdvCount); 189 189 } 190 190 lesb->lesb_link_fail = htonl(lfc); 191 191 lesb->lesb_vlink_fail = htonl(vlfc);
+10 -19
drivers/scsi/libfc/fc_fcp.c
··· 143 143 INIT_LIST_HEAD(&fsp->list); 144 144 spin_lock_init(&fsp->scsi_pkt_lock); 145 145 } else { 146 - per_cpu_ptr(lport->stats, get_cpu())->FcpPktAllocFails++; 147 - put_cpu(); 146 + this_cpu_inc(lport->stats->FcpPktAllocFails); 148 147 } 149 148 return fsp; 150 149 } ··· 265 266 if (!fsp->seq_ptr) 266 267 return -EINVAL; 267 268 268 - per_cpu_ptr(fsp->lp->stats, get_cpu())->FcpPktAborts++; 269 - put_cpu(); 269 + this_cpu_inc(fsp->lp->stats->FcpPktAborts); 270 270 271 271 fsp->state |= FC_SRB_ABORT_PENDING; 272 272 rc = fc_seq_exch_abort(fsp->seq_ptr, 0); ··· 434 436 if (likely(fp)) 435 437 return fp; 436 438 437 - per_cpu_ptr(lport->stats, get_cpu())->FcpFrameAllocFails++; 438 - put_cpu(); 439 + this_cpu_inc(lport->stats->FcpFrameAllocFails); 439 440 /* error case */ 440 441 fc_fcp_can_queue_ramp_down(lport); 441 442 shost_printk(KERN_ERR, lport->host, ··· 468 471 { 469 472 struct scsi_cmnd *sc = fsp->cmd; 470 473 struct fc_lport *lport = fsp->lp; 471 - struct fc_stats *stats; 472 474 struct fc_frame_header *fh; 473 475 size_t start_offset; 474 476 size_t offset; ··· 529 533 530 534 if (~crc != le32_to_cpu(fr_crc(fp))) { 531 535 crc_err: 532 - stats = per_cpu_ptr(lport->stats, get_cpu()); 533 - stats->ErrorFrames++; 536 + this_cpu_inc(lport->stats->ErrorFrames); 534 537 /* per cpu count, not total count, but OK for limit */ 535 - if (stats->InvalidCRCCount++ < FC_MAX_ERROR_CNT) 538 + if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < FC_MAX_ERROR_CNT) 536 539 printk(KERN_WARNING "libfc: CRC error on data " 537 540 "frame for port (%6.6x)\n", 538 541 lport->port_id); 539 - put_cpu(); 540 542 /* 541 543 * Assume the frame is total garbage. 542 544 * We may have copied it over the good part ··· 1855 1861 struct fc_fcp_pkt *fsp; 1856 1862 int rval; 1857 1863 int rc = 0; 1858 - struct fc_stats *stats; 1859 1864 1860 1865 rval = fc_remote_port_chkready(rport); 1861 1866 if (rval) { ··· 1906 1913 /* 1907 1914 * setup the data direction 1908 1915 */ 1909 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1910 1916 if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { 1911 1917 fsp->req_flags = FC_SRB_READ; 1912 - stats->InputRequests++; 1913 - stats->InputBytes += fsp->data_len; 1918 + this_cpu_inc(lport->stats->InputRequests); 1919 + this_cpu_add(lport->stats->InputBytes, fsp->data_len); 1914 1920 } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { 1915 1921 fsp->req_flags = FC_SRB_WRITE; 1916 - stats->OutputRequests++; 1917 - stats->OutputBytes += fsp->data_len; 1922 + this_cpu_inc(lport->stats->OutputRequests); 1923 + this_cpu_add(lport->stats->OutputBytes, fsp->data_len); 1918 1924 } else { 1919 1925 fsp->req_flags = 0; 1920 - stats->ControlRequests++; 1926 + this_cpu_inc(lport->stats->ControlRequests); 1921 1927 } 1922 - put_cpu(); 1923 1928 1924 1929 /* 1925 1930 * send it to the lower layer
+15 -15
drivers/scsi/libfc/fc_lport.c
··· 308 308 309 309 stats = per_cpu_ptr(lport->stats, cpu); 310 310 311 - fc_stats->tx_frames += stats->TxFrames; 312 - fc_stats->tx_words += stats->TxWords; 313 - fc_stats->rx_frames += stats->RxFrames; 314 - fc_stats->rx_words += stats->RxWords; 315 - fc_stats->error_frames += stats->ErrorFrames; 316 - fc_stats->invalid_crc_count += stats->InvalidCRCCount; 317 - fc_stats->fcp_input_requests += stats->InputRequests; 318 - fc_stats->fcp_output_requests += stats->OutputRequests; 319 - fc_stats->fcp_control_requests += stats->ControlRequests; 320 - fcp_in_bytes += stats->InputBytes; 321 - fcp_out_bytes += stats->OutputBytes; 322 - fc_stats->fcp_packet_alloc_failures += stats->FcpPktAllocFails; 323 - fc_stats->fcp_packet_aborts += stats->FcpPktAborts; 324 - fc_stats->fcp_frame_alloc_failures += stats->FcpFrameAllocFails; 325 - fc_stats->link_failure_count += stats->LinkFailureCount; 311 + fc_stats->tx_frames += READ_ONCE(stats->TxFrames); 312 + fc_stats->tx_words += READ_ONCE(stats->TxWords); 313 + fc_stats->rx_frames += READ_ONCE(stats->RxFrames); 314 + fc_stats->rx_words += READ_ONCE(stats->RxWords); 315 + fc_stats->error_frames += READ_ONCE(stats->ErrorFrames); 316 + fc_stats->invalid_crc_count += READ_ONCE(stats->InvalidCRCCount); 317 + fc_stats->fcp_input_requests += READ_ONCE(stats->InputRequests); 318 + fc_stats->fcp_output_requests += READ_ONCE(stats->OutputRequests); 319 + fc_stats->fcp_control_requests += READ_ONCE(stats->ControlRequests); 320 + fcp_in_bytes += READ_ONCE(stats->InputBytes); 321 + fcp_out_bytes += READ_ONCE(stats->OutputBytes); 322 + fc_stats->fcp_packet_alloc_failures += READ_ONCE(stats->FcpPktAllocFails); 323 + fc_stats->fcp_packet_aborts += READ_ONCE(stats->FcpPktAborts); 324 + fc_stats->fcp_frame_alloc_failures += READ_ONCE(stats->FcpFrameAllocFails); 325 + fc_stats->link_failure_count += READ_ONCE(stats->LinkFailureCount); 326 326 } 327 327 fc_stats->fcp_input_megabytes = div_u64(fcp_in_bytes, 1000000); 328 328 fc_stats->fcp_output_megabytes = div_u64(fcp_out_bytes, 1000000);
+2 -5
drivers/scsi/qedf/qedf_main.c
··· 1067 1067 u32 crc; 1068 1068 unsigned int hlen, tlen, elen; 1069 1069 int wlen; 1070 - struct fc_stats *stats; 1071 1070 struct fc_lport *tmp_lport; 1072 1071 struct fc_lport *vn_port = NULL; 1073 1072 struct qedf_rport *fcport; ··· 1214 1215 hp->fcoe_sof = sof; 1215 1216 1216 1217 /*update tx stats */ 1217 - stats = per_cpu_ptr(lport->stats, get_cpu()); 1218 - stats->TxFrames++; 1219 - stats->TxWords += wlen; 1220 - put_cpu(); 1218 + this_cpu_inc(lport->stats->TxFrames); 1219 + this_cpu_add(lport->stats->TxWords, wlen); 1221 1220 1222 1221 /* Get VLAN ID from skb for printing purposes */ 1223 1222 __vlan_hwaccel_get_tag(skb, &vlan_tci);