Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: mpt3sas: Affinity high iops queues IRQs to local node

High iops queues are mapped to non-managed irqs. Set affinity of
non-managed irqs to local numa node. Low latency queues are mapped to
managed irqs.

Driver reserves some reply queues for max iops (through
pci_alloc_irq_vectors_affinity and .pre_vectors interface). The rest of
queues are for low latency.

Based on io workload in io submission path, driver will decide which group
of reply queues (either high iops queues or low latency queues) to be
used. High iops queues will be mapped to local numa node of controller and
low latency queues will be mapped to cpus across numa nodes. In general,
high iops and low latency queues should fit into 128 reply queues
which is the max number of reply queues supported by Aero/Sea.

Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Suganath Prabu S and committed by
Martin K. Petersen
728bbc6c 998c3001

+62 -11
+62 -11
drivers/scsi/mpt3sas/mpt3sas_base.c
··· 2793 2793 2794 2794 list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) { 2795 2795 list_del(&reply_q->list); 2796 + if (smp_affinity_enable) 2797 + irq_set_affinity_hint(pci_irq_vector(ioc->pdev, 2798 + reply_q->msix_index), NULL); 2796 2799 free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index), 2797 2800 reply_q); 2798 2801 kfree(reply_q); ··· 2860 2857 { 2861 2858 unsigned int cpu, nr_cpus, nr_msix, index = 0; 2862 2859 struct adapter_reply_queue *reply_q; 2860 + int local_numa_node; 2863 2861 2864 2862 if (!_base_is_controller_msix_enabled(ioc)) 2865 2863 return; ··· 2879 2875 return; 2880 2876 2881 2877 if (smp_affinity_enable) { 2878 + 2879 + /* 2880 + * set irq affinity to local numa node for those irqs 2881 + * corresponding to high iops queues. 2882 + */ 2883 + if (ioc->high_iops_queues) { 2884 + local_numa_node = dev_to_node(&ioc->pdev->dev); 2885 + for (index = 0; index < ioc->high_iops_queues; 2886 + index++) { 2887 + irq_set_affinity_hint(pci_irq_vector(ioc->pdev, 2888 + index), cpumask_of_node(local_numa_node)); 2889 + } 2890 + } 2891 + 2882 2892 list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { 2883 - const cpumask_t *mask = pci_irq_get_affinity(ioc->pdev, 2884 - reply_q->msix_index); 2893 + const cpumask_t *mask; 2894 + 2895 + if (reply_q->msix_index < ioc->high_iops_queues) 2896 + continue; 2897 + 2898 + mask = pci_irq_get_affinity(ioc->pdev, 2899 + reply_q->msix_index); 2885 2900 if (!mask) { 2886 2901 ioc_warn(ioc, "no affinity for msi %x\n", 2887 2902 reply_q->msix_index); 2888 - continue; 2903 + goto fall_back; 2889 2904 } 2890 2905 2891 2906 for_each_cpu_and(cpu, mask, cpu_online_mask) { ··· 2915 2892 } 2916 2893 return; 2917 2894 } 2895 + 2896 + fall_back: 2918 2897 cpu = cpumask_first(cpu_online_mask); 2898 + nr_msix -= ioc->high_iops_queues; 2899 + index = 0; 2919 2900 2920 2901 list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { 2921 - 2922 2902 unsigned int i, group = nr_cpus / nr_msix; 2903 + 2904 + if (reply_q->msix_index < ioc->high_iops_queues) 2905 + continue; 2923 2906 2924 2907 if (cpu >= nr_cpus) 2925 2908 break; ··· 2979 2950 { 2980 2951 if (!ioc->msix_enable) 2981 2952 return; 2982 - pci_disable_msix(ioc->pdev); 2953 + pci_free_irq_vectors(ioc->pdev); 2983 2954 ioc->msix_enable = 0; 2955 + } 2956 + 2957 + /** 2958 + * _base_alloc_irq_vectors - allocate msix vectors 2959 + * @ioc: per adapter object 2960 + * 2961 + */ 2962 + static int 2963 + _base_alloc_irq_vectors(struct MPT3SAS_ADAPTER *ioc) 2964 + { 2965 + int i, irq_flags = PCI_IRQ_MSIX; 2966 + struct irq_affinity desc = { .pre_vectors = ioc->high_iops_queues }; 2967 + struct irq_affinity *descp = &desc; 2968 + 2969 + if (smp_affinity_enable) 2970 + irq_flags |= PCI_IRQ_AFFINITY; 2971 + else 2972 + descp = NULL; 2973 + 2974 + ioc_info(ioc, " %d %d\n", ioc->high_iops_queues, 2975 + ioc->msix_vector_count); 2976 + 2977 + i = pci_alloc_irq_vectors_affinity(ioc->pdev, 2978 + ioc->high_iops_queues, 2979 + ioc->msix_vector_count, irq_flags, descp); 2980 + 2981 + return i; 2984 2982 } 2985 2983 2986 2984 /** ··· 3021 2965 int r; 3022 2966 int i, local_max_msix_vectors; 3023 2967 u8 try_msix = 0; 3024 - unsigned int irq_flags = PCI_IRQ_MSIX; 3025 2968 3026 2969 if (msix_disable == -1 || msix_disable == 0) 3027 2970 try_msix = 1; ··· 3054 2999 if (ioc->msix_vector_count < ioc->cpu_count) 3055 3000 smp_affinity_enable = 0; 3056 3001 3057 - if (smp_affinity_enable) 3058 - irq_flags |= PCI_IRQ_AFFINITY; 3059 - 3060 - r = pci_alloc_irq_vectors(ioc->pdev, 1, ioc->reply_queue_count, 3061 - irq_flags); 3002 + r = _base_alloc_irq_vectors(ioc); 3062 3003 if (r < 0) { 3063 3004 dfailprintk(ioc, 3064 3005 ioc_info(ioc, "pci_alloc_irq_vectors failed (r=%d) !!!\n",