Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ia64: Reduce stack usage by iterating over nodemask

GCC complains about sn2_global_tlb_purge() because of the large stack
required by the function,

arch/ia64/sn/kernel/sn2/sn2_smp.c: In function 'sn2_global_tlb_purge':
arch/ia64/sn/kernel/sn2/sn2_smp.c:319:1: warning: the frame size of 2176 bytes is larger than 2048 bytes [-Wframe-larger-than=]

2048 bytes of the stack are consumed by the node ID array 'nasids[]'.
But we don't actually need to put the ID array on the stack and can
use nodemask operations.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by

Matt Fleming and committed by
Tony Luck
0b184a30 1bba3ff9

+23 -12
+23 -12
arch/ia64/sn/kernel/sn2/sn2_smp.c
··· 54 54 volatile unsigned long *, unsigned long, 55 55 volatile unsigned long *, unsigned long); 56 56 void 57 - sn2_ptc_deadlock_recovery(short *, short, short, int, 57 + sn2_ptc_deadlock_recovery(nodemask_t, short, short, int, 58 58 volatile unsigned long *, unsigned long, 59 59 volatile unsigned long *, unsigned long); 60 60 ··· 169 169 int use_cpu_ptcga; 170 170 volatile unsigned long *ptc0, *ptc1; 171 171 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; 172 - short nasids[MAX_NUMNODES], nix; 172 + short nix; 173 173 nodemask_t nodes_flushed; 174 174 int active, max_active, deadlock, flush_opt = sn2_flush_opt; 175 175 ··· 218 218 } 219 219 220 220 itc = ia64_get_itc(); 221 - nix = 0; 222 - for_each_node_mask(cnode, nodes_flushed) 223 - nasids[nix++] = cnodeid_to_nasid(cnode); 221 + nix = nodes_weight(nodes_flushed); 224 222 225 223 rr_value = (mm->context << 3) | REGION_NUMBER(start); 226 224 ··· 268 270 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); 269 271 deadlock = 0; 270 272 active = 0; 271 - for (ibegin = 0, i = 0; i < nix; i++) { 272 - nasid = nasids[i]; 273 + ibegin = 0; 274 + i = 0; 275 + for_each_node_mask(cnode, nodes_flushed) { 276 + nasid = cnodeid_to_nasid(cnode); 273 277 if (use_cpu_ptcga && unlikely(nasid == mynasid)) { 274 278 ia64_ptcga(start, nbits << 2); 275 279 ia64_srlz_i(); ··· 286 286 if ((deadlock = wait_piowc())) { 287 287 if (flush_opt == 1) 288 288 goto done; 289 - sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); 289 + sn2_ptc_deadlock_recovery(nodes_flushed, ibegin, i, mynasid, ptc0, data0, ptc1, data1); 290 290 if (reset_max_active_on_deadlock()) 291 291 max_active = 1; 292 292 } 293 293 active = 0; 294 294 ibegin = i + 1; 295 295 } 296 + i++; 296 297 } 297 298 start += (1UL << nbits); 298 299 } while (start < end); ··· 328 327 */ 329 328 330 329 void 331 - sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, 330 + sn2_ptc_deadlock_recovery(nodemask_t nodes, short ib, short ie, int mynasid, 332 331 volatile unsigned long *ptc0, unsigned long data0, 333 332 volatile unsigned long *ptc1, unsigned long data1) 334 333 { 335 334 short nasid, i; 335 + int cnode; 336 336 unsigned long *piows, zeroval, n; 337 337 338 338 __this_cpu_inc(ptcstats.deadlocks); ··· 341 339 piows = (unsigned long *) pda->pio_write_status_addr; 342 340 zeroval = pda->pio_write_status_val; 343 341 342 + i = 0; 343 + for_each_node_mask(cnode, nodes) { 344 + if (i < ib) 345 + goto next; 344 346 345 - for (i=ib; i <= ie; i++) { 346 - nasid = nasids[i]; 347 + if (i > ie) 348 + break; 349 + 350 + nasid = cnodeid_to_nasid(cnode); 347 351 if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) 348 - continue; 352 + goto next; 353 + 349 354 ptc0 = CHANGE_NASID(nasid, ptc0); 350 355 if (ptc1) 351 356 ptc1 = CHANGE_NASID(nasid, ptc1); 352 357 353 358 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); 354 359 __this_cpu_add(ptcstats.deadlocks2, n); 360 + next: 361 + i++; 355 362 } 356 363 357 364 }