Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bus: arm-ccn: cpumask attribute

This patch adds a "cpumask" attribute to CCN's event_source class sysfs
directory. Perf user tool uses it to restrict events to the
processor(s) enumerated in this mask.

This patch provides a single CPU mask, making it possible to run "-a"
perf session (previously it would request the same CCN event, for
example cycle counter, on each available core and most likely fail).
Initially the mask is set to the CPU that happened to probe the driver,
but it will be changed when it is hot-un-plugged (active events are
migrated to another CPU then).

Example:

Performance counter stats for 'system wide':

CPU0 2968148 cycles
CPU1 2236736 cycles
CPU2 1797968 cycles
CPU3 1831715 cycles
CPU1 1201850868 ccn/cycles/

1.001241383 seconds time elapsed

Signed-off-by: Pawel Moll <pawel.moll@arm.com>

+126 -17
+9 -6
Documentation/arm/CCN.txt
··· 33 33 Cycle counter is described by a "type" value 0xff and does 34 34 not require any other settings. 35 35 36 + The driver also provides a "cpumask" sysfs attribute, which contains 37 + a single CPU ID, of the processor which will be used to handle all 38 + the CCN PMU events. It is recommended that the user space tools 39 + request the events on this processor (if not, the perf_event->cpu value 40 + will be overwritten anyway). In case of this processor being offlined, 41 + the events are migrated to another one and the attribute is updated. 42 + 36 43 Example of perf tool use: 37 44 38 45 / # perf list | grep ccn ··· 48 41 ccn/xp_valid_flit/ [Kernel PMU event] 49 42 <...> 50 43 51 - / # perf stat -C 0 -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \ 44 + / # perf stat -a -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \ 52 45 sleep 1 53 46 54 47 The driver does not support sampling, therefore "perf record" will 55 - not work. Also notice that only single cpu is being selected 56 - ("-C 0") - this is because perf framework does not support 57 - "non-CPU related" counters (yet?) so system-wide session ("-a") 58 - would try (and in most cases fail) to set up the same event 59 - per each CPU. 48 + not work. Per-task (without "-a") perf sessions are not supported.
+117 -11
drivers/bus/arm-ccn.c
··· 166 166 167 167 struct hrtimer hrtimer; 168 168 169 + cpumask_t cpu; 170 + struct notifier_block cpu_nb; 171 + 169 172 struct pmu pmu; 170 173 }; 171 174 172 175 struct arm_ccn { 173 176 struct device *dev; 174 177 void __iomem *base; 175 - unsigned irq_used:1; 178 + unsigned int irq; 179 + 176 180 unsigned sbas_present:1; 177 181 unsigned sbsx_present:1; 178 182 ··· 525 521 .attrs = arm_ccn_pmu_cmp_mask_attrs, 526 522 }; 527 523 524 + static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev, 525 + struct device_attribute *attr, char *buf) 526 + { 527 + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); 528 + 529 + return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu); 530 + } 531 + 532 + static struct device_attribute arm_ccn_pmu_cpumask_attr = 533 + __ATTR(cpumask, S_IRUGO, arm_ccn_pmu_cpumask_show, NULL); 534 + 535 + static struct attribute *arm_ccn_pmu_cpumask_attrs[] = { 536 + &arm_ccn_pmu_cpumask_attr.attr, 537 + NULL, 538 + }; 539 + 540 + static struct attribute_group arm_ccn_pmu_cpumask_attr_group = { 541 + .attrs = arm_ccn_pmu_cpumask_attrs, 542 + }; 528 543 529 544 /* 530 545 * Default poll period is 10ms, which is way over the top anyway, ··· 565 542 &arm_ccn_pmu_events_attr_group, 566 543 &arm_ccn_pmu_format_attr_group, 567 544 &arm_ccn_pmu_cmp_mask_attr_group, 545 + &arm_ccn_pmu_cpumask_attr_group, 568 546 NULL 569 547 }; 570 548 ··· 666 642 dev_warn(ccn->dev, "Can't provide per-task data!\n"); 667 643 return -EOPNOTSUPP; 668 644 } 645 + /* 646 + * Many perf core operations (eg. events rotation) operate on a 647 + * single CPU context. This is obvious for CPU PMUs, where one 648 + * expects the same sets of events being observed on all CPUs, 649 + * but can lead to issues for off-core PMUs, like CCN, where each 650 + * event could be theoretically assigned to a different CPU. To 651 + * mitigate this, we enforce CPU assignment to one, selected 652 + * processor (the one described in the "cpumask" attribute). 653 + */ 654 + event->cpu = cpumask_first(&ccn->dt.cpu); 669 655 670 656 node_xp = CCN_CONFIG_NODE(event->attr.config); 671 657 type = CCN_CONFIG_TYPE(event->attr.config); ··· 869 835 arm_ccn_pmu_read_counter(ccn, hw->idx)); 870 836 hw->state = 0; 871 837 872 - if (!ccn->irq_used) 873 - hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), 874 - HRTIMER_MODE_REL); 838 + /* 839 + * Pin the timer, so that the overflows are handled by the chosen 840 + * event->cpu (this is the same one as presented in "cpumask" 841 + * attribute). 842 + */ 843 + if (!ccn->irq) 844 + __hrtimer_start_range_ns(&ccn->dt.hrtimer, 845 + arm_ccn_pmu_timer_period(), 0, 846 + HRTIMER_MODE_REL_PINNED, 0); 875 847 876 848 /* Set the DT bus input, engaging the counter */ 877 849 arm_ccn_pmu_xp_dt_config(event, 1); ··· 892 852 /* Disable counting, setting the DT bus to pass-through mode */ 893 853 arm_ccn_pmu_xp_dt_config(event, 0); 894 854 895 - if (!ccn->irq_used) 855 + if (!ccn->irq) 896 856 hrtimer_cancel(&ccn->dt.hrtimer); 897 857 898 858 /* Let the DT bus drain */ ··· 1119 1079 } 1120 1080 1121 1081 1082 + static int arm_ccn_pmu_cpu_notifier(struct notifier_block *nb, 1083 + unsigned long action, void *hcpu) 1084 + { 1085 + struct arm_ccn_dt *dt = container_of(nb, struct arm_ccn_dt, cpu_nb); 1086 + struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt); 1087 + unsigned int cpu = (long)hcpu; /* for (long) see kernel/cpu.c */ 1088 + unsigned int target; 1089 + 1090 + switch (action & ~CPU_TASKS_FROZEN) { 1091 + case CPU_DOWN_PREPARE: 1092 + if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu)) 1093 + break; 1094 + target = cpumask_any_but(cpu_online_mask, cpu); 1095 + if (target < 0) 1096 + break; 1097 + perf_pmu_migrate_context(&dt->pmu, cpu, target); 1098 + cpumask_set_cpu(target, &dt->cpu); 1099 + WARN_ON(irq_set_affinity(ccn->irq, &dt->cpu) != 0); 1100 + default: 1101 + break; 1102 + } 1103 + 1104 + return NOTIFY_OK; 1105 + } 1106 + 1107 + 1122 1108 static DEFINE_IDA(arm_ccn_pmu_ida); 1123 1109 1124 1110 static int arm_ccn_pmu_init(struct arm_ccn *ccn) 1125 1111 { 1126 1112 int i; 1127 1113 char *name; 1114 + int err; 1128 1115 1129 1116 /* Initialize DT subsystem */ 1130 1117 ccn->dt.base = ccn->base + CCN_REGION_SIZE; ··· 1203 1136 }; 1204 1137 1205 1138 /* No overflow interrupt? Have to use a timer instead. */ 1206 - if (!ccn->irq_used) { 1139 + if (!ccn->irq) { 1207 1140 dev_info(ccn->dev, "No access to interrupts, using timer.\n"); 1208 1141 hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC, 1209 1142 HRTIMER_MODE_REL); 1210 1143 ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler; 1211 1144 } 1212 1145 1213 - return perf_pmu_register(&ccn->dt.pmu, name, -1); 1146 + /* Pick one CPU which we will use to collect data from CCN... */ 1147 + cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu); 1148 + 1149 + /* 1150 + * ... and change the selection when it goes offline. Priority is 1151 + * picked to have a chance to migrate events before perf is notified. 1152 + */ 1153 + ccn->dt.cpu_nb.notifier_call = arm_ccn_pmu_cpu_notifier; 1154 + ccn->dt.cpu_nb.priority = CPU_PRI_PERF + 1, 1155 + err = register_cpu_notifier(&ccn->dt.cpu_nb); 1156 + if (err) 1157 + goto error_cpu_notifier; 1158 + 1159 + /* Also make sure that the overflow interrupt is handled by this CPU */ 1160 + if (ccn->irq) { 1161 + err = irq_set_affinity(ccn->irq, &ccn->dt.cpu); 1162 + if (err) { 1163 + dev_err(ccn->dev, "Failed to set interrupt affinity!\n"); 1164 + goto error_set_affinity; 1165 + } 1166 + } 1167 + 1168 + err = perf_pmu_register(&ccn->dt.pmu, name, -1); 1169 + if (err) 1170 + goto error_pmu_register; 1171 + 1172 + return 0; 1173 + 1174 + error_pmu_register: 1175 + error_set_affinity: 1176 + unregister_cpu_notifier(&ccn->dt.cpu_nb); 1177 + error_cpu_notifier: 1178 + ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); 1179 + for (i = 0; i < ccn->num_xps; i++) 1180 + writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); 1181 + writel(0, ccn->dt.base + CCN_DT_PMCR); 1182 + return err; 1214 1183 } 1215 1184 1216 1185 static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn) 1217 1186 { 1218 1187 int i; 1219 1188 1189 + irq_set_affinity(ccn->irq, cpu_possible_mask); 1190 + unregister_cpu_notifier(&ccn->dt.cpu_nb); 1220 1191 for (i = 0; i < ccn->num_xps; i++) 1221 1192 writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); 1222 1193 writel(0, ccn->dt.base + CCN_DT_PMCR); ··· 1390 1285 { 1391 1286 struct arm_ccn *ccn; 1392 1287 struct resource *res; 1288 + unsigned int irq; 1393 1289 int err; 1394 1290 1395 1291 ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL); ··· 1415 1309 res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); 1416 1310 if (!res) 1417 1311 return -EINVAL; 1312 + irq = res->start; 1418 1313 1419 1314 /* Check if we can use the interrupt */ 1420 1315 writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE, ··· 1425 1318 /* Can set 'disable' bits, so can acknowledge interrupts */ 1426 1319 writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE, 1427 1320 ccn->base + CCN_MN_ERRINT_STATUS); 1428 - err = devm_request_irq(ccn->dev, res->start, 1429 - arm_ccn_irq_handler, 0, dev_name(ccn->dev), 1430 - ccn); 1321 + err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0, 1322 + dev_name(ccn->dev), ccn); 1431 1323 if (err) 1432 1324 return err; 1433 1325 1434 - ccn->irq_used = 1; 1326 + ccn->irq = irq; 1435 1327 } 1436 1328 1437 1329