Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[IA64] __per_cpu_idtrs[] is a memory hog

__per_cpu_idtrs is statically allocated ... on CONFIG_NR_CPUS=4096
systems it hogs 16MB of memory. This is way too much for a quite
probably unused facility (only KVM uses dynamic TR registers).

Change to an array of pointers, and allocate entries as needed on
a per cpu basis. Change the name too as the __per_cpu_ prefix is
confusing (this isn't a classic <linux/percpu.h> type object).

Signed-off-by: Tony Luck <tony.luck@intel.com>

Tony Luck 6c57a332 410dc0aa

+24 -15
+1 -1
arch/ia64/include/asm/tlb.h
··· 74 74 extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); 75 75 extern void ia64_ptr_entry(u64 target_mask, int slot); 76 76 77 - extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; 77 + extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; 78 78 79 79 /* 80 80 region register macros
+4 -1
arch/ia64/kernel/mca.c
··· 1225 1225 unsigned long psr; 1226 1226 int cpu = smp_processor_id(); 1227 1227 1228 + if (!ia64_idtrs[cpu]) 1229 + return; 1230 + 1228 1231 psr = ia64_clear_ic(); 1229 1232 for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { 1230 - p = &__per_cpu_idtrs[cpu][iord-1][i]; 1233 + p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; 1231 1234 if (p->pte & 0x1) { 1232 1235 old_rr = ia64_get_rr(p->ifa); 1233 1236 if (old_rr != p->rr) {
+19 -13
arch/ia64/mm/tlb.c
··· 48 48 DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ 49 49 DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ 50 50 51 - struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; 51 + struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; 52 52 53 53 /* 54 54 * Initializes the ia64_ctx.bitmap array based on max_ctx+1. ··· 429 429 struct ia64_tr_entry *p; 430 430 int cpu = smp_processor_id(); 431 431 432 + if (!ia64_idtrs[cpu]) { 433 + ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX * 434 + sizeof (struct ia64_tr_entry), GFP_KERNEL); 435 + if (!ia64_idtrs[cpu]) 436 + return -ENOMEM; 437 + } 432 438 r = -EINVAL; 433 439 /*Check overlap with existing TR entries*/ 434 440 if (target_mask & 0x1) { 435 - p = &__per_cpu_idtrs[cpu][0][0]; 441 + p = ia64_idtrs[cpu]; 436 442 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); 437 443 i++, p++) { 438 444 if (p->pte & 0x1) ··· 450 444 } 451 445 } 452 446 if (target_mask & 0x2) { 453 - p = &__per_cpu_idtrs[cpu][1][0]; 447 + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; 454 448 for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); 455 449 i++, p++) { 456 450 if (p->pte & 0x1) ··· 465 459 for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { 466 460 switch (target_mask & 0x3) { 467 461 case 1: 468 - if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) 462 + if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) 469 463 goto found; 470 464 continue; 471 465 case 2: 472 - if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 466 + if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) 473 467 goto found; 474 468 continue; 475 469 case 3: 476 - if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && 477 - !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 470 + if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && 471 + !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) 478 472 goto found; 479 473 continue; 480 474 default: ··· 494 488 if (target_mask & 0x1) { 495 489 ia64_itr(0x1, i, va, pte, log_size); 496 490 ia64_srlz_i(); 497 - p = &__per_cpu_idtrs[cpu][0][i]; 491 + p = ia64_idtrs[cpu] + i; 498 492 p->ifa = va; 499 493 p->pte = pte; 500 494 p->itir = log_size << 2; ··· 503 497 if (target_mask & 0x2) { 504 498 ia64_itr(0x2, i, va, pte, log_size); 505 499 ia64_srlz_i(); 506 - p = &__per_cpu_idtrs[cpu][1][i]; 500 + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; 507 501 p->ifa = va; 508 502 p->pte = pte; 509 503 p->itir = log_size << 2; ··· 534 528 return; 535 529 536 530 if (target_mask & 0x1) { 537 - p = &__per_cpu_idtrs[cpu][0][slot]; 531 + p = ia64_idtrs[cpu] + slot; 538 532 if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { 539 533 p->pte = 0; 540 534 ia64_ptr(0x1, p->ifa, p->itir>>2); ··· 543 537 } 544 538 545 539 if (target_mask & 0x2) { 546 - p = &__per_cpu_idtrs[cpu][1][slot]; 540 + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; 547 541 if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { 548 542 p->pte = 0; 549 543 ia64_ptr(0x2, p->ifa, p->itir>>2); ··· 552 546 } 553 547 554 548 for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { 555 - if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || 556 - (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) 549 + if (((ia64_idtrs[cpu] + i)->pte & 0x1) || 550 + ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) 557 551 break; 558 552 } 559 553 per_cpu(ia64_tr_used, cpu) = i;