[SPARC64]: Fix ugly dependency on NR_CPUS being a power-of-2.

The page->flags D-cache dirty state tracking depended upon
NR_CPUS being a power-of-2 via it's "NR_CPUS - 1" masking.

Fix that to use a fixed (256 - 1) mask as that is the limit
imposed by thread_info->cpu which is a "u8".

Finally, add a compile time check that NR_CPUS is not greater
than 256.

Signed-off-by: David S. Miller <davem@davemloft.net>

+17 -6
+17 -6
arch/sparc64/mm/init.c
··· 121 } 122 123 #define PG_dcache_dirty PG_arch_1 124 125 #define dcache_dirty_cpu(page) \ 126 - (((page)->flags >> 24) & (NR_CPUS - 1UL)) 127 128 static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) 129 { 130 unsigned long mask = this_cpu; 131 - unsigned long non_cpu_bits = ~((NR_CPUS - 1UL) << 24UL); 132 - mask = (mask << 24) | (1UL << PG_dcache_dirty); 133 __asm__ __volatile__("1:\n\t" 134 "ldx [%2], %%g7\n\t" 135 "and %%g7, %1, %%g1\n\t" ··· 160 __asm__ __volatile__("! test_and_clear_dcache_dirty\n" 161 "1:\n\t" 162 "ldx [%2], %%g7\n\t" 163 - "srlx %%g7, 24, %%g1\n\t" 164 "and %%g1, %3, %%g1\n\t" 165 "cmp %%g1, %0\n\t" 166 "bne,pn %%icc, 2f\n\t" ··· 173 "2:" 174 : /* no outputs */ 175 : "r" (cpu), "r" (mask), "r" (&page->flags), 176 - "i" (NR_CPUS - 1UL) 177 : "g1", "g7"); 178 } 179 ··· 190 if (pfn_valid(pfn) && 191 (page = pfn_to_page(pfn), page_mapping(page)) && 192 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { 193 - int cpu = ((pg_flags >> 24) & (NR_CPUS - 1UL)); 194 int this_cpu = get_cpu(); 195 196 /* This is just to optimize away some function calls
··· 121 } 122 123 #define PG_dcache_dirty PG_arch_1 124 + #define PG_dcache_cpu_shift 24 125 + #define PG_dcache_cpu_mask (256 - 1) 126 + 127 + #if NR_CPUS > 256 128 + #error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus 129 + #endif 130 131 #define dcache_dirty_cpu(page) \ 132 + (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) 133 134 static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) 135 { 136 unsigned long mask = this_cpu; 137 + unsigned long non_cpu_bits; 138 + 139 + non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift); 140 + mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty); 141 + 142 __asm__ __volatile__("1:\n\t" 143 "ldx [%2], %%g7\n\t" 144 "and %%g7, %1, %%g1\n\t" ··· 151 __asm__ __volatile__("! test_and_clear_dcache_dirty\n" 152 "1:\n\t" 153 "ldx [%2], %%g7\n\t" 154 + "srlx %%g7, %4, %%g1\n\t" 155 "and %%g1, %3, %%g1\n\t" 156 "cmp %%g1, %0\n\t" 157 "bne,pn %%icc, 2f\n\t" ··· 164 "2:" 165 : /* no outputs */ 166 : "r" (cpu), "r" (mask), "r" (&page->flags), 167 + "i" (PG_dcache_cpu_mask), 168 + "i" (PG_dcache_cpu_shift) 169 : "g1", "g7"); 170 } 171 ··· 180 if (pfn_valid(pfn) && 181 (page = pfn_to_page(pfn), page_mapping(page)) && 182 ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { 183 + int cpu = ((pg_flags >> PG_dcache_cpu_shift) & 184 + PG_dcache_cpu_mask); 185 int this_cpu = get_cpu(); 186 187 /* This is just to optimize away some function calls