Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/resctrl: Add interface to write mbm_total_bytes_config

The event configuration for mbm_total_bytes can be changed by the user by
writing to the file /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config.

The event configuration settings are domain specific and affect all the
CPUs in the domain.

Following are the types of events supported:

==== ===========================================================
Bits Description
==== ===========================================================
6 Dirty Victims from the QOS domain to all types of memory
5 Reads to slow memory in the non-local NUMA domain
4 Reads to slow memory in the local NUMA domain
3 Non-temporal writes to non-local NUMA domain
2 Non-temporal writes to local NUMA domain
1 Reads to memory in the non-local NUMA domain
0 Reads to memory in the local NUMA domain
==== ===========================================================

For example:

To change the mbm_total_bytes to count only reads on domain 0, the bits
0, 1, 4 and 5 needs to be set, which is 110011b (in hex 0x33).
Run the command:

$echo 0=0x33 > /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config

To change the mbm_total_bytes to count all the slow memory reads on domain 1,
the bits 4 and 5 needs to be set which is 110000b (in hex 0x30).
Run the command:

$echo 1=0x30 > /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config

Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/r/20230113152039.770054-12-babu.moger@amd.com

authored by

Babu Moger and committed by
Borislav Petkov (AMD)
92bd5a13 73afb2d3

+151 -1
+17
arch/x86/kernel/cpu/resctrl/monitor.c
··· 204 204 } 205 205 } 206 206 207 + /* 208 + * Assumes that hardware counters are also reset and thus that there is 209 + * no need to record initial non-zero counts. 210 + */ 211 + void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) 212 + { 213 + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); 214 + 215 + if (is_mbm_total_enabled()) 216 + memset(hw_dom->arch_mbm_total, 0, 217 + sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); 218 + 219 + if (is_mbm_local_enabled()) 220 + memset(hw_dom->arch_mbm_local, 0, 221 + sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); 222 + } 223 + 207 224 static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) 208 225 { 209 226 u64 shift = 64 - width, chunks;
+123 -1
arch/x86/kernel/cpu/resctrl/rdtgroup.c
··· 1517 1517 return 0; 1518 1518 } 1519 1519 1520 + static void mon_event_config_write(void *info) 1521 + { 1522 + struct mon_config_info *mon_info = info; 1523 + unsigned int index; 1524 + 1525 + index = mon_event_config_index_get(mon_info->evtid); 1526 + if (index == INVALID_CONFIG_INDEX) { 1527 + pr_warn_once("Invalid event id %d\n", mon_info->evtid); 1528 + return; 1529 + } 1530 + wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); 1531 + } 1532 + 1533 + static int mbm_config_write_domain(struct rdt_resource *r, 1534 + struct rdt_domain *d, u32 evtid, u32 val) 1535 + { 1536 + struct mon_config_info mon_info = {0}; 1537 + int ret = 0; 1538 + 1539 + /* mon_config cannot be more than the supported set of events */ 1540 + if (val > MAX_EVT_CONFIG_BITS) { 1541 + rdt_last_cmd_puts("Invalid event configuration\n"); 1542 + return -EINVAL; 1543 + } 1544 + 1545 + /* 1546 + * Read the current config value first. If both are the same then 1547 + * no need to write it again. 1548 + */ 1549 + mon_info.evtid = evtid; 1550 + mondata_config_read(d, &mon_info); 1551 + if (mon_info.mon_config == val) 1552 + goto out; 1553 + 1554 + mon_info.mon_config = val; 1555 + 1556 + /* 1557 + * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the 1558 + * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE 1559 + * are scoped at the domain level. Writing any of these MSRs 1560 + * on one CPU is observed by all the CPUs in the domain. 1561 + */ 1562 + smp_call_function_any(&d->cpu_mask, mon_event_config_write, 1563 + &mon_info, 1); 1564 + 1565 + /* 1566 + * When an Event Configuration is changed, the bandwidth counters 1567 + * for all RMIDs and Events will be cleared by the hardware. The 1568 + * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for 1569 + * every RMID on the next read to any event for every RMID. 1570 + * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) 1571 + * cleared while it is tracked by the hardware. Clear the 1572 + * mbm_local and mbm_total counts for all the RMIDs. 1573 + */ 1574 + resctrl_arch_reset_rmid_all(r, d); 1575 + 1576 + out: 1577 + return ret; 1578 + } 1579 + 1580 + static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) 1581 + { 1582 + char *dom_str = NULL, *id_str; 1583 + unsigned long dom_id, val; 1584 + struct rdt_domain *d; 1585 + int ret = 0; 1586 + 1587 + next: 1588 + if (!tok || tok[0] == '\0') 1589 + return 0; 1590 + 1591 + /* Start processing the strings for each domain */ 1592 + dom_str = strim(strsep(&tok, ";")); 1593 + id_str = strsep(&dom_str, "="); 1594 + 1595 + if (!id_str || kstrtoul(id_str, 10, &dom_id)) { 1596 + rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); 1597 + return -EINVAL; 1598 + } 1599 + 1600 + if (!dom_str || kstrtoul(dom_str, 16, &val)) { 1601 + rdt_last_cmd_puts("Non-numeric event configuration value\n"); 1602 + return -EINVAL; 1603 + } 1604 + 1605 + list_for_each_entry(d, &r->domains, list) { 1606 + if (d->id == dom_id) { 1607 + ret = mbm_config_write_domain(r, d, evtid, val); 1608 + if (ret) 1609 + return -EINVAL; 1610 + goto next; 1611 + } 1612 + } 1613 + 1614 + return -EINVAL; 1615 + } 1616 + 1617 + static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, 1618 + char *buf, size_t nbytes, 1619 + loff_t off) 1620 + { 1621 + struct rdt_resource *r = of->kn->parent->priv; 1622 + int ret; 1623 + 1624 + /* Valid input requires a trailing newline */ 1625 + if (nbytes == 0 || buf[nbytes - 1] != '\n') 1626 + return -EINVAL; 1627 + 1628 + mutex_lock(&rdtgroup_mutex); 1629 + 1630 + rdt_last_cmd_clear(); 1631 + 1632 + buf[nbytes - 1] = '\0'; 1633 + 1634 + ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); 1635 + 1636 + mutex_unlock(&rdtgroup_mutex); 1637 + 1638 + return ret ?: nbytes; 1639 + } 1640 + 1520 1641 /* rdtgroup information files for one cache resource. */ 1521 1642 static struct rftype res_common_files[] = { 1522 1643 { ··· 1738 1617 }, 1739 1618 { 1740 1619 .name = "mbm_total_bytes_config", 1741 - .mode = 0444, 1620 + .mode = 0644, 1742 1621 .kf_ops = &rdtgroup_kf_single_ops, 1743 1622 .seq_show = mbm_total_bytes_config_show, 1623 + .write = mbm_total_bytes_config_write, 1744 1624 }, 1745 1625 { 1746 1626 .name = "mbm_local_bytes_config",
+11
include/linux/resctrl.h
··· 250 250 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, 251 251 u32 rmid, enum resctrl_event_id eventid); 252 252 253 + /** 254 + * resctrl_arch_reset_rmid_all() - Reset all private state associated with 255 + * all rmids and eventids. 256 + * @r: The resctrl resource. 257 + * @d: The domain for which all architectural counter state will 258 + * be cleared. 259 + * 260 + * This can be called from any CPU. 261 + */ 262 + void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); 263 + 253 264 extern unsigned int resctrl_rmid_realloc_threshold; 254 265 extern unsigned int resctrl_rmid_realloc_limit; 255 266