x86/intel_rdt: Modify the intel_pqr_state for better performance

Currently we have pqr_state and rdt_default_state which store the cached
CLOSID/RMIDs and the user configured cpu default values respectively. We
touch both of these during context switch. Put all of them in one
structure so that we can spare a cache line.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: ravi.v.shankar@intel.com
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
Cc: peterz@infradead.org
Cc: eranian@google.com
Cc: sai.praneeth.prakhya@intel.com
Cc: ak@linux.intel.com
Cc: davidcc@google.com
Link: http://lkml.kernel.org/r/1502304395-7166-3-git-send-email-vikas.shivappa@linux.intel.com

authored by

Vikas Shivappa and committed by
Thomas Gleixner
a9110b55 eda61c26

+26 -24
+17 -13
arch/x86/include/asm/intel_rdt_sched.h
··· 10 10 11 11 /** 12 12 * struct intel_pqr_state - State cache for the PQR MSR 13 - * @rmid: The cached Resource Monitoring ID 14 - * @closid: The cached Class Of Service ID 13 + * @cur_rmid: The cached Resource Monitoring ID 14 + * @cur_closid: The cached Class Of Service ID 15 + * @default_rmid: The user assigned Resource Monitoring ID 16 + * @default_closid: The user assigned cached Class Of Service ID 15 17 * 16 18 * The upper 32 bits of IA32_PQR_ASSOC contain closid and the 17 19 * lower 10 bits rmid. The update to IA32_PQR_ASSOC always ··· 24 22 * not change. 25 23 */ 26 24 struct intel_pqr_state { 27 - u32 rmid; 28 - u32 closid; 25 + u32 cur_rmid; 26 + u32 cur_closid; 27 + u32 default_rmid; 28 + u32 default_closid; 29 29 }; 30 30 31 31 DECLARE_PER_CPU(struct intel_pqr_state, pqr_state); 32 - DECLARE_PER_CPU_READ_MOSTLY(struct intel_pqr_state, rdt_cpu_default); 33 32 34 33 DECLARE_STATIC_KEY_FALSE(rdt_enable_key); 35 34 DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); ··· 52 49 */ 53 50 static void __intel_rdt_sched_in(void) 54 51 { 55 - struct intel_pqr_state newstate = this_cpu_read(rdt_cpu_default); 56 - struct intel_pqr_state *curstate = this_cpu_ptr(&pqr_state); 52 + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); 53 + u32 closid = state->default_closid; 54 + u32 rmid = state->default_rmid; 57 55 58 56 /* 59 57 * If this task has a closid/rmid assigned, use it. ··· 62 58 */ 63 59 if (static_branch_likely(&rdt_alloc_enable_key)) { 64 60 if (current->closid) 65 - newstate.closid = current->closid; 61 + closid = current->closid; 66 62 } 67 63 68 64 if (static_branch_likely(&rdt_mon_enable_key)) { 69 65 if (current->rmid) 70 - newstate.rmid = current->rmid; 66 + rmid = current->rmid; 71 67 } 72 68 73 - if (newstate.closid != curstate->closid || 74 - newstate.rmid != curstate->rmid) { 75 - *curstate = newstate; 76 - wrmsr(IA32_PQR_ASSOC, newstate.rmid, newstate.closid); 69 + if (closid != state->cur_closid || rmid != state->cur_rmid) { 70 + state->cur_closid = closid; 71 + state->cur_rmid = rmid; 72 + wrmsr(IA32_PQR_ASSOC, rmid, closid); 77 73 } 78 74 } 79 75
+4 -6
arch/x86/kernel/cpu/intel_rdt.c
··· 47 47 */ 48 48 DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); 49 49 50 - DEFINE_PER_CPU_READ_MOSTLY(struct intel_pqr_state, rdt_cpu_default); 51 - 52 50 /* 53 51 * Used to store the max resource name width and max resource data width 54 52 * to display the schemata in a tabular format ··· 548 550 { 549 551 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); 550 552 551 - per_cpu(rdt_cpu_default.closid, cpu) = 0; 552 - per_cpu(rdt_cpu_default.rmid, cpu) = 0; 553 - state->closid = 0; 554 - state->rmid = 0; 553 + state->default_closid = 0; 554 + state->default_rmid = 0; 555 + state->cur_closid = 0; 556 + state->cur_rmid = 0; 555 557 wrmsr(IA32_PQR_ASSOC, 0, 0); 556 558 } 557 559
+5 -5
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
··· 202 202 struct rdtgroup *r = info; 203 203 204 204 if (r) { 205 - this_cpu_write(rdt_cpu_default.closid, r->closid); 206 - this_cpu_write(rdt_cpu_default.rmid, r->mon.rmid); 205 + this_cpu_write(pqr_state.default_closid, r->closid); 206 + this_cpu_write(pqr_state.default_rmid, r->mon.rmid); 207 207 } 208 208 209 209 /* ··· 1733 1733 1734 1734 /* Update per cpu rmid of the moved CPUs first */ 1735 1735 for_each_cpu(cpu, &rdtgrp->cpu_mask) 1736 - per_cpu(rdt_cpu_default.rmid, cpu) = prdtgrp->mon.rmid; 1736 + per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid; 1737 1737 /* 1738 1738 * Update the MSR on moved CPUs and CPUs which have moved 1739 1739 * task running on them. ··· 1774 1774 1775 1775 /* Update per cpu closid and rmid of the moved CPUs first */ 1776 1776 for_each_cpu(cpu, &rdtgrp->cpu_mask) { 1777 - per_cpu(rdt_cpu_default.closid, cpu) = rdtgroup_default.closid; 1778 - per_cpu(rdt_cpu_default.rmid, cpu) = rdtgroup_default.mon.rmid; 1777 + per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid; 1778 + per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid; 1779 1779 } 1780 1780 1781 1781 /*