x86, UV: Improve BAU performance and error recovery

- increase performance of the interrupt handler

- release timed-out software acknowledge resources

- recover from continuous-busy status due to a hardware issue

- add a 'throttle' to keep a uvhub from sending more than a
specified number of broadcasts concurrently (work around the hardware issue)

- provide a 'nobau' boot command line option

- rename 'pnode' and 'node' to 'uvhub' (the 'node' terminology
is ambiguous)

- add some new statistics about the scope of broadcasts, retries, the
hardware issue and the 'throttle'

- split off new function uv_bau_retry_msg() from
uv_bau_process_message() per community coding style feedback.

- simplify the argument list to uv_bau_process_message(), per
community coding style feedback.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: linux-mm@kvack.org
Cc: Jack Steiner <steiner@sgi.com>
Cc: Russ Anderson <rja@sgi.com>
Cc: Mike Travis <travis@sgi.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <E1O25Z4-0004Ur-PB@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Cliff Wickman and committed by Ingo Molnar b8f7fb13 2acebe9e

+1084 -451
+174 -71
arch/x86/include/asm/uv/uv_bau.h
··· 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 28 * 29 * We will use 31 sets, one for sending BAU messages from each of the 32 30 - * cpu's on the node. 31 * 32 * TLB shootdown will use the first of the 8 descriptors of each set. 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 34 */ 35 36 #define UV_ITEMS_PER_DESCRIPTOR 8 37 #define UV_CPUS_PER_ACT_STATUS 32 38 #define UV_ACT_STATUS_MASK 0x3 39 #define UV_ACT_STATUS_SIZE 2 ··· 46 #define UV_PAYLOADQ_PNODE_SHIFT 49 47 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" 48 #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) 49 50 /* 51 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 ··· 59 #define DESC_STATUS_SOURCE_TIMEOUT 3 60 61 /* 62 - * source side thresholds at which message retries print a warning 63 */ 64 #define SOURCE_TIMEOUT_LIMIT 20 65 #define DESTINATION_TIMEOUT_LIMIT 20 66 67 /* 68 * number of entries in the destination side payload queue 69 */ 70 - #define DEST_Q_SIZE 17 71 /* 72 * number of destination side software ack resources 73 */ ··· 90 /* 91 * completion statuses for sending a TLB flush message 92 */ 93 - #define FLUSH_RETRY 1 94 - #define FLUSH_GIVEUP 2 95 - #define FLUSH_COMPLETE 3 96 97 /* 98 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) ··· 105 * 'base_dest_nodeid' field of the header corresponds to the 106 * destination nodeID associated with that specified bit. 107 */ 108 - struct bau_target_nodemask { 109 - unsigned long bits[BITS_TO_LONGS(256)]; 110 }; 111 112 /* 113 - * mask of cpu's on a node 114 * (during initialization we need to check that unsigned long has 115 - * enough bits for max. cpu's per node) 116 */ 117 struct bau_local_cpumask { 118 unsigned long bits; ··· 154 struct bau_msg_header { 155 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 156 /* bits 5:0 */ 157 - unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ 158 - /* bits 20:6 */ /* first bit in node_map */ 159 unsigned int command:8; /* message type */ 160 /* bits 28:21 */ 161 /* 0x38: SN3net EndPoint Message */ ··· 165 unsigned int rsvd_2:9; /* must be zero */ 166 /* bits 40:32 */ 167 /* Suppl_A is 56-41 */ 168 - unsigned int payload_2a:8;/* becomes byte 16 of msg */ 169 - /* bits 48:41 */ /* not currently using */ 170 - unsigned int payload_2b:8;/* becomes byte 17 of msg */ 171 - /* bits 56:49 */ /* not currently using */ 172 /* Address field (96:57) is never used as an 173 address (these are address bits 42:3) */ 174 unsigned int rsvd_3:1; /* must be zero */ 175 /* bit 57 */ 176 /* address bits 27:4 are payload */ 177 - /* these 24 bits become bytes 12-14 of msg */ 178 unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ 179 /* bit 58 */ 180 181 - unsigned int payload_1a:5;/* not currently used */ 182 - /* bits 63:59 */ 183 - unsigned int payload_1b:8;/* not currently used */ 184 - /* bits 71:64 */ 185 - unsigned int payload_1c:8;/* not currently used */ 186 - /* bits 79:72 */ 187 - unsigned int payload_1d:2;/* not currently used */ 188 /* bits 81:80 */ 189 190 unsigned int rsvd_4:7; /* must be zero */ ··· 209 /* bits 95:90 */ 210 unsigned int rsvd_6:5; /* must be zero */ 211 /* bits 100:96 */ 212 - unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ 213 /* bit 101*/ 214 unsigned int fairness:3;/* usually zero */ 215 /* bits 104:102 */ ··· 222 /* bits 127:107 */ 223 }; 224 225 /* 226 * The activation descriptor: 227 * The format of the message to send, plus all accompanying control 228 * Should be 64 bytes 229 */ 230 struct bau_desc { 231 - struct bau_target_nodemask distribution; 232 /* 233 * message template, consisting of header and payload: 234 */ ··· 273 unsigned short acknowledge_count; /* filled in by destination */ 274 /* 16 bits, bytes 10-11 */ 275 276 - unsigned short replied_to:1; /* sent as 0 by the source */ 277 - /* 1 bit */ 278 - unsigned short unused1:7; /* not currently using */ 279 - /* 7 bits: byte 12) */ 280 281 - unsigned char unused2[2]; /* not currently using */ 282 - /* bytes 13-14 */ 283 284 unsigned char sw_ack_vector; /* filled in by the hardware */ 285 /* byte 15 (bits 127:120) */ 286 287 - unsigned char unused4[3]; /* not currently using bytes 17-19 */ 288 - /* bytes 17-19 */ 289 290 int number_of_cpus; /* filled in at destination */ 291 /* 32 bits, bytes 20-23 (aligned) */ ··· 301 }; 302 303 /* 304 - * one for every slot in the destination payload queue 305 - */ 306 - struct bau_msg_status { 307 - struct bau_local_cpumask seen_by; /* map of cpu's */ 308 - }; 309 - 310 - /* 311 - * one for every slot in the destination software ack resources 312 - */ 313 - struct bau_sw_ack_status { 314 - struct bau_payload_queue_entry *msg; /* associated message */ 315 - int watcher; /* cpu monitoring, or -1 */ 316 - }; 317 - 318 - /* 319 - * one on every node and per-cpu; to locate the software tables 320 */ 321 struct bau_control { 322 struct bau_desc *descriptor_base; 323 - struct bau_payload_queue_entry *bau_msg_head; 324 struct bau_payload_queue_entry *va_queue_first; 325 struct bau_payload_queue_entry *va_queue_last; 326 - struct bau_msg_status *msg_statuses; 327 - int *watching; /* pointer to array */ 328 }; 329 330 /* 331 * This structure is allocated per_cpu for UV TLB shootdown statistics. 332 */ 333 struct ptc_stats { 334 - unsigned long ptc_i; /* number of IPI-style flushes */ 335 - unsigned long requestor; /* number of nodes this cpu sent to */ 336 - unsigned long requestee; /* times cpu was remotely requested */ 337 - unsigned long alltlb; /* times all tlb's on this cpu were flushed */ 338 - unsigned long onetlb; /* times just one tlb on this cpu was flushed */ 339 - unsigned long s_retry; /* retries on source side timeouts */ 340 - unsigned long d_retry; /* retries on destination side timeouts */ 341 - unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ 342 - unsigned long dflush; /* cycles spent on destination side */ 343 - unsigned long retriesok; /* successes on retries */ 344 - unsigned long nomsg; /* interrupts with no message */ 345 - unsigned long multmsg; /* interrupts with multiple messages */ 346 - unsigned long ntargeted;/* nodes targeted */ 347 }; 348 349 - static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) 350 { 351 - return constant_test_bit(node, &dstp->bits[0]); 352 } 353 - static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) 354 { 355 - __set_bit(node, &dstp->bits[0]); 356 } 357 - static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) 358 { 359 bitmap_zero(&dstp->bits[0], nbits); 360 } 361 362 static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) ··· 399 400 extern void uv_bau_message_intr1(void); 401 extern void uv_bau_timeout_intr1(void); 402 403 #endif /* _ASM_X86_UV_UV_BAU_H */
··· 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 28 * 29 * We will use 31 sets, one for sending BAU messages from each of the 32 30 + * cpu's on the uvhub. 31 * 32 * TLB shootdown will use the first of the 8 descriptors of each set. 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 34 */ 35 36 #define UV_ITEMS_PER_DESCRIPTOR 8 37 + #define MAX_BAU_CONCURRENT 3 38 #define UV_CPUS_PER_ACT_STATUS 32 39 #define UV_ACT_STATUS_MASK 0x3 40 #define UV_ACT_STATUS_SIZE 2 ··· 45 #define UV_PAYLOADQ_PNODE_SHIFT 49 46 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" 47 #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) 48 + #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 49 + #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 50 + #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL 51 52 /* 53 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 ··· 55 #define DESC_STATUS_SOURCE_TIMEOUT 3 56 57 /* 58 + * source side threshholds at which message retries print a warning 59 */ 60 #define SOURCE_TIMEOUT_LIMIT 20 61 #define DESTINATION_TIMEOUT_LIMIT 20 62 63 /* 64 + * misc. delays, in microseconds 65 + */ 66 + #define THROTTLE_DELAY 10 67 + #define TIMEOUT_DELAY 10 68 + #define BIOS_TO 1000 69 + /* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ 70 + 71 + /* 72 + * threshholds at which to use IPI to free resources 73 + */ 74 + #define PLUGSB4RESET 100 75 + #define TIMEOUTSB4RESET 100 76 + 77 + /* 78 * number of entries in the destination side payload queue 79 */ 80 + #define DEST_Q_SIZE 20 81 /* 82 * number of destination side software ack resources 83 */ ··· 72 /* 73 * completion statuses for sending a TLB flush message 74 */ 75 + #define FLUSH_RETRY_PLUGGED 1 76 + #define FLUSH_RETRY_TIMEOUT 2 77 + #define FLUSH_GIVEUP 3 78 + #define FLUSH_COMPLETE 4 79 80 /* 81 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) ··· 86 * 'base_dest_nodeid' field of the header corresponds to the 87 * destination nodeID associated with that specified bit. 88 */ 89 + struct bau_target_uvhubmask { 90 + unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; 91 }; 92 93 /* 94 + * mask of cpu's on a uvhub 95 * (during initialization we need to check that unsigned long has 96 + * enough bits for max. cpu's per uvhub) 97 */ 98 struct bau_local_cpumask { 99 unsigned long bits; ··· 135 struct bau_msg_header { 136 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 137 /* bits 5:0 */ 138 + unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ 139 + /* bits 20:6 */ /* first bit in uvhub map */ 140 unsigned int command:8; /* message type */ 141 /* bits 28:21 */ 142 /* 0x38: SN3net EndPoint Message */ ··· 146 unsigned int rsvd_2:9; /* must be zero */ 147 /* bits 40:32 */ 148 /* Suppl_A is 56-41 */ 149 + unsigned int sequence:16;/* message sequence number */ 150 + /* bits 56:41 */ /* becomes bytes 16-17 of msg */ 151 /* Address field (96:57) is never used as an 152 address (these are address bits 42:3) */ 153 + 154 unsigned int rsvd_3:1; /* must be zero */ 155 /* bit 57 */ 156 /* address bits 27:4 are payload */ 157 + /* these next 24 (58-81) bits become bytes 12-14 of msg */ 158 + 159 + /* bits 65:58 land in byte 12 */ 160 unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ 161 /* bit 58 */ 162 + unsigned int msg_type:3; /* software type of the message*/ 163 + /* bits 61:59 */ 164 + unsigned int canceled:1; /* message canceled, resource to be freed*/ 165 + /* bit 62 */ 166 + unsigned int payload_1a:1;/* not currently used */ 167 + /* bit 63 */ 168 + unsigned int payload_1b:2;/* not currently used */ 169 + /* bits 65:64 */ 170 171 + /* bits 73:66 land in byte 13 */ 172 + unsigned int payload_1ca:6;/* not currently used */ 173 + /* bits 71:66 */ 174 + unsigned int payload_1c:2;/* not currently used */ 175 + /* bits 73:72 */ 176 + 177 + /* bits 81:74 land in byte 14 */ 178 + unsigned int payload_1d:6;/* not currently used */ 179 + /* bits 79:74 */ 180 + unsigned int payload_1e:2;/* not currently used */ 181 /* bits 81:80 */ 182 183 unsigned int rsvd_4:7; /* must be zero */ ··· 178 /* bits 95:90 */ 179 unsigned int rsvd_6:5; /* must be zero */ 180 /* bits 100:96 */ 181 + unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ 182 /* bit 101*/ 183 unsigned int fairness:3;/* usually zero */ 184 /* bits 104:102 */ ··· 191 /* bits 127:107 */ 192 }; 193 194 + /* see msg_type: */ 195 + #define MSG_NOOP 0 196 + #define MSG_REGULAR 1 197 + #define MSG_RETRY 2 198 + 199 /* 200 * The activation descriptor: 201 * The format of the message to send, plus all accompanying control 202 * Should be 64 bytes 203 */ 204 struct bau_desc { 205 + struct bau_target_uvhubmask distribution; 206 /* 207 * message template, consisting of header and payload: 208 */ ··· 237 unsigned short acknowledge_count; /* filled in by destination */ 238 /* 16 bits, bytes 10-11 */ 239 240 + /* these next 3 bytes come from bits 58-81 of the message header */ 241 + unsigned short replied_to:1; /* sent as 0 by the source */ 242 + unsigned short msg_type:3; /* software message type */ 243 + unsigned short canceled:1; /* sent as 0 by the source */ 244 + unsigned short unused1:3; /* not currently using */ 245 + /* byte 12 */ 246 247 + unsigned char unused2a; /* not currently using */ 248 + /* byte 13 */ 249 + unsigned char unused2; /* not currently using */ 250 + /* byte 14 */ 251 252 unsigned char sw_ack_vector; /* filled in by the hardware */ 253 /* byte 15 (bits 127:120) */ 254 255 + unsigned short sequence; /* message sequence number */ 256 + /* bytes 16-17 */ 257 + unsigned char unused4[2]; /* not currently using bytes 18-19 */ 258 + /* bytes 18-19 */ 259 260 int number_of_cpus; /* filled in at destination */ 261 /* 32 bits, bytes 20-23 (aligned) */ ··· 259 }; 260 261 /* 262 + * one per-cpu; to locate the software tables 263 */ 264 struct bau_control { 265 struct bau_desc *descriptor_base; 266 struct bau_payload_queue_entry *va_queue_first; 267 struct bau_payload_queue_entry *va_queue_last; 268 + struct bau_payload_queue_entry *bau_msg_head; 269 + struct bau_control *uvhub_master; 270 + struct bau_control *socket_master; 271 + unsigned long timeout_interval; 272 + atomic_t active_descriptor_count; 273 + int max_concurrent; 274 + int max_concurrent_constant; 275 + int retry_message_scans; 276 + int plugged_tries; 277 + int timeout_tries; 278 + int ipi_attempts; 279 + int conseccompletes; 280 + short cpu; 281 + short uvhub_cpu; 282 + short uvhub; 283 + short cpus_in_socket; 284 + short cpus_in_uvhub; 285 + unsigned short message_number; 286 + unsigned short uvhub_quiesce; 287 + short socket_acknowledge_count[DEST_Q_SIZE]; 288 + cycles_t send_message; 289 + spinlock_t masks_lock; 290 + spinlock_t uvhub_lock; 291 + spinlock_t queue_lock; 292 }; 293 294 /* 295 * This structure is allocated per_cpu for UV TLB shootdown statistics. 296 */ 297 struct ptc_stats { 298 + /* sender statistics */ 299 + unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ 300 + unsigned long s_requestor; /* number of shootdown requests */ 301 + unsigned long s_stimeout; /* source side timeouts */ 302 + unsigned long s_dtimeout; /* destination side timeouts */ 303 + unsigned long s_time; /* time spent in sending side */ 304 + unsigned long s_retriesok; /* successful retries */ 305 + unsigned long s_ntargcpu; /* number of cpus targeted */ 306 + unsigned long s_ntarguvhub; /* number of uvhubs targeted */ 307 + unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ 308 + unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ 309 + unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ 310 + unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ 311 + unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ 312 + unsigned long s_resets_plug; /* ipi-style resets from plug state */ 313 + unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ 314 + unsigned long s_busy; /* status stayed busy past s/w timer */ 315 + unsigned long s_throttles; /* waits in throttle */ 316 + unsigned long s_retry_messages; /* retry broadcasts */ 317 + /* destination statistics */ 318 + unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ 319 + unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ 320 + unsigned long d_multmsg; /* interrupts with multiple messages */ 321 + unsigned long d_nomsg; /* interrupts with no message */ 322 + unsigned long d_time; /* time spent on destination side */ 323 + unsigned long d_requestee; /* number of messages processed */ 324 + unsigned long d_retries; /* number of retry messages processed */ 325 + unsigned long d_canceled; /* number of messages canceled by retries */ 326 + unsigned long d_nocanceled; /* retries that found nothing to cancel */ 327 + unsigned long d_resets; /* number of ipi-style requests processed */ 328 + unsigned long d_rcanceled; /* number of messages canceled by resets */ 329 }; 330 331 + static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 332 { 333 + return constant_test_bit(uvhub, &dstp->bits[0]); 334 } 335 + static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) 336 { 337 + __set_bit(uvhub, &dstp->bits[0]); 338 } 339 + static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 340 + int nbits) 341 { 342 bitmap_zero(&dstp->bits[0], nbits); 343 + } 344 + static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) 345 + { 346 + return bitmap_weight((unsigned long *)&dstp->bits[0], 347 + UV_DISTRIBUTION_SIZE); 348 } 349 350 static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) ··· 327 328 extern void uv_bau_message_intr1(void); 329 extern void uv_bau_timeout_intr1(void); 330 + 331 + struct atomic_short { 332 + short counter; 333 + }; 334 + 335 + /** 336 + * atomic_read_short - read a short atomic variable 337 + * @v: pointer of type atomic_short 338 + * 339 + * Atomically reads the value of @v. 340 + */ 341 + static inline int atomic_read_short(const struct atomic_short *v) 342 + { 343 + return v->counter; 344 + } 345 + 346 + /** 347 + * atomic_add_short_return - add and return a short int 348 + * @i: short value to add 349 + * @v: pointer of type atomic_short 350 + * 351 + * Atomically adds @i to @v and returns @i + @v 352 + */ 353 + static inline int atomic_add_short_return(short i, struct atomic_short *v) 354 + { 355 + short __i = i; 356 + asm volatile(LOCK_PREFIX "xaddw %0, %1" 357 + : "+r" (i), "+m" (v->counter) 358 + : : "memory"); 359 + return i + __i; 360 + } 361 362 #endif /* _ASM_X86_UV_UV_BAU_H */
+910 -380
arch/x86/kernel/tlb_uv.c
··· 1 /* 2 * SGI UltraViolet TLB flush routines. 3 * 4 - * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. 5 * 6 * This code is released under the GNU General Public License version 2 or 7 * later. ··· 19 #include <asm/idle.h> 20 #include <asm/tsc.h> 21 #include <asm/irq_vectors.h> 22 23 #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL 24 25 - static struct bau_control **uv_bau_table_bases __read_mostly; 26 - static int uv_bau_retry_limit __read_mostly; 27 28 /* base pnode in this partition */ 29 - static int uv_partition_base_pnode __read_mostly; 30 - 31 - static unsigned long uv_mmask __read_mostly; 32 33 static DEFINE_PER_CPU(struct ptc_stats, ptcstats); 34 static DEFINE_PER_CPU(struct bau_control, bau_control); 35 36 /* 37 - * Determine the first node on a blade. 38 */ 39 - static int __init blade_to_first_node(int blade) 40 { 41 int node, b; 42 43 for_each_online_node(node) { 44 b = uv_node_to_blade_id(node); 45 - if (blade == b) 46 return node; 47 } 48 - return -1; /* shouldn't happen */ 49 } 50 51 /* 52 - * Determine the apicid of the first cpu on a blade. 53 */ 54 - static int __init blade_to_first_apicid(int blade) 55 { 56 int cpu; 57 58 for_each_present_cpu(cpu) 59 - if (blade == uv_cpu_to_blade_id(cpu)) 60 return per_cpu(x86_cpu_to_apicid, cpu); 61 return -1; 62 } ··· 92 * clear of the Timeout bit (as well) will free the resource. No reply will 93 * be sent (the hardware will only do one reply per message). 94 */ 95 - static void uv_reply_to_message(int resource, 96 - struct bau_payload_queue_entry *msg, 97 - struct bau_msg_status *msp) 98 { 99 unsigned long dw; 100 101 - dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); 102 msg->replied_to = 1; 103 msg->sw_ack_vector = 0; 104 - if (msp) 105 - msp->seen_by.bits = 0; 106 - uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); 107 } 108 109 /* 110 * Do all the things a cpu should do for a TLB shootdown message. 111 * Other cpu's may come here at the same time for this message. 112 */ 113 - static void uv_bau_process_message(struct bau_payload_queue_entry *msg, 114 - int msg_slot, int sw_ack_slot) 115 { 116 - unsigned long this_cpu_mask; 117 - struct bau_msg_status *msp; 118 - int cpu; 119 120 - msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; 121 - cpu = uv_blade_processor_id(); 122 - msg->number_of_cpus = 123 - uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); 124 - this_cpu_mask = 1UL << cpu; 125 - if (msp->seen_by.bits & this_cpu_mask) 126 - return; 127 - atomic_or_long(&msp->seen_by.bits, this_cpu_mask); 128 - 129 - if (msg->replied_to == 1) 130 - return; 131 - 132 if (msg->address == TLB_FLUSH_ALL) { 133 local_flush_tlb(); 134 - __get_cpu_var(ptcstats).alltlb++; 135 } else { 136 __flush_tlb_one(msg->address); 137 - __get_cpu_var(ptcstats).onetlb++; 138 } 139 140 - __get_cpu_var(ptcstats).requestee++; 141 142 - atomic_inc_short(&msg->acknowledge_count); 143 - if (msg->number_of_cpus == msg->acknowledge_count) 144 - uv_reply_to_message(sw_ack_slot, msg, msp); 145 - } 146 147 - /* 148 - * Examine the payload queue on one distribution node to see 149 - * which messages have not been seen, and which cpu(s) have not seen them. 150 - * 151 - * Returns the number of cpu's that have not responded. 152 - */ 153 - static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) 154 - { 155 - struct bau_payload_queue_entry *msg; 156 - struct bau_msg_status *msp; 157 - int count = 0; 158 - int i; 159 - int j; 160 - 161 - for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; 162 - msg++, i++) { 163 - if ((msg->sending_cpu == sender) && (!msg->replied_to)) { 164 - msp = bau_tablesp->msg_statuses + i; 165 - printk(KERN_DEBUG 166 - "blade %d: address:%#lx %d of %d, not cpu(s): ", 167 - i, msg->address, msg->acknowledge_count, 168 - msg->number_of_cpus); 169 - for (j = 0; j < msg->number_of_cpus; j++) { 170 - if (!((1L << j) & msp->seen_by.bits)) { 171 - count++; 172 - printk("%d ", j); 173 - } 174 - } 175 - printk("\n"); 176 } 177 } 178 - return count; 179 } 180 181 /* 182 - * Examine the payload queue on all the distribution nodes to see 183 - * which messages have not been seen, and which cpu(s) have not seen them. 184 - * 185 - * Returns the number of cpu's that have not responded. 186 */ 187 - static int uv_examine_destinations(struct bau_target_nodemask *distribution) 188 { 189 - int sender; 190 - int i; 191 - int count = 0; 192 - 193 - sender = smp_processor_id(); 194 - for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { 195 - if (!bau_node_isset(i, distribution)) 196 - continue; 197 - count += uv_examine_destination(uv_bau_table_bases[i], sender); 198 - } 199 - return count; 200 } 201 202 /* 203 - * wait for completion of a broadcast message 204 * 205 - * return COMPLETE, RETRY or GIVEUP 206 */ 207 static int uv_wait_completion(struct bau_desc *bau_desc, 208 - unsigned long mmr_offset, int right_shift) 209 { 210 - int exams = 0; 211 - long destination_timeouts = 0; 212 - long source_timeouts = 0; 213 unsigned long descriptor_status; 214 215 while ((descriptor_status = (((unsigned long) 216 uv_read_local_mmr(mmr_offset) >> 217 right_shift) & UV_ACT_STATUS_MASK)) != 218 DESC_STATUS_IDLE) { 219 - if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { 220 - source_timeouts++; 221 - if (source_timeouts > SOURCE_TIMEOUT_LIMIT) 222 - source_timeouts = 0; 223 - __get_cpu_var(ptcstats).s_retry++; 224 - return FLUSH_RETRY; 225 - } 226 /* 227 - * spin here looking for progress at the destinations 228 */ 229 - if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { 230 - destination_timeouts++; 231 - if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { 232 - /* 233 - * returns number of cpus not responding 234 - */ 235 - if (uv_examine_destinations 236 - (&bau_desc->distribution) == 0) { 237 - __get_cpu_var(ptcstats).d_retry++; 238 - return FLUSH_RETRY; 239 - } 240 - exams++; 241 - if (exams >= uv_bau_retry_limit) { 242 - printk(KERN_DEBUG 243 - "uv_flush_tlb_others"); 244 - printk("giving up on cpu %d\n", 245 - smp_processor_id()); 246 return FLUSH_GIVEUP; 247 } 248 - /* 249 - * delays can hang the simulator 250 - udelay(1000); 251 - */ 252 - destination_timeouts = 0; 253 } 254 } 255 - cpu_relax(); 256 } 257 return FLUSH_COMPLETE; 258 } 259 260 /** 261 * uv_flush_send_and_wait 262 * 263 - * Send a broadcast and wait for a broadcast message to complete. 264 * 265 - * The flush_mask contains the cpus the broadcast was sent to. 266 * 267 - * Returns NULL if all remote flushing was done. The mask is zeroed. 268 * Returns @flush_mask if some remote flushing remains to be done. The 269 - * mask will have some bits still set. 270 */ 271 - const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, 272 - struct bau_desc *bau_desc, 273 - struct cpumask *flush_mask) 274 { 275 - int completion_status = 0; 276 int right_shift; 277 - int tries = 0; 278 - int pnode; 279 int bit; 280 unsigned long mmr_offset; 281 unsigned long index; 282 cycles_t time1; 283 cycles_t time2; 284 285 if (cpu < UV_CPUS_PER_ACT_STATUS) { 286 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; ··· 556 } 557 time1 = get_cycles(); 558 do { 559 - tries++; 560 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | 561 - cpu; 562 - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 563 - completion_status = uv_wait_completion(bau_desc, mmr_offset, 564 - right_shift); 565 - } while (completion_status == FLUSH_RETRY); 566 - time2 = get_cycles(); 567 - __get_cpu_var(ptcstats).sflush += (time2 - time1); 568 - if (tries > 1) 569 - __get_cpu_var(ptcstats).retriesok++; 570 571 - if (completion_status == FLUSH_GIVEUP) { 572 /* 573 * Cause the caller to do an IPI-style TLB shootdown on 574 - * the cpu's, all of which are still in the mask. 575 */ 576 - __get_cpu_var(ptcstats).ptc_i++; 577 return flush_mask; 578 } 579 ··· 666 * use the IPI method of shootdown on them. 667 */ 668 for_each_cpu(bit, flush_mask) { 669 - pnode = uv_cpu_to_pnode(bit); 670 - if (pnode == this_pnode) 671 continue; 672 cpumask_clear_cpu(bit, flush_mask); 673 } 674 if (!cpumask_empty(flush_mask)) 675 return flush_mask; 676 return NULL; 677 } 678 - 679 - static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); 680 681 /** 682 * uv_flush_tlb_others - globally purge translation cache of a virtual ··· 693 * The caller has derived the cpumask from the mm_struct. This function 694 * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) 695 * 696 - * The cpumask is converted into a nodemask of the nodes containing 697 - * the cpus. 698 * 699 * Note that this function should be called with preemption disabled. 700 * ··· 706 struct mm_struct *mm, 707 unsigned long va, unsigned int cpu) 708 { 709 - struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); 710 - int i; 711 - int bit; 712 - int pnode; 713 - int uv_cpu; 714 - int this_pnode; 715 int locals = 0; 716 struct bau_desc *bau_desc; 717 718 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); 719 720 - uv_cpu = uv_blade_processor_id(); 721 - this_pnode = uv_hub_info->pnode; 722 - bau_desc = __get_cpu_var(bau_control).descriptor_base; 723 - bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; 724 725 - bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 726 - 727 - i = 0; 728 - for_each_cpu(bit, flush_mask) { 729 - pnode = uv_cpu_to_pnode(bit); 730 - BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); 731 - if (pnode == this_pnode) { 732 locals++; 733 continue; 734 } 735 - bau_node_set(pnode - uv_partition_base_pnode, 736 - &bau_desc->distribution); 737 - i++; 738 } 739 - if (i == 0) { 740 /* 741 - * no off_node flushing; return status for local node 742 */ 743 if (locals) 744 - return flush_mask; 745 else 746 return NULL; 747 } 748 - __get_cpu_var(ptcstats).requestor++; 749 - __get_cpu_var(ptcstats).ntargeted += i; 750 751 bau_desc->payload.address = va; 752 bau_desc->payload.sending_cpu = cpu; 753 754 - return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); 755 } 756 757 /* ··· 790 * 791 * We received a broadcast assist message. 792 * 793 - * Interrupts may have been disabled; this interrupt could represent 794 * the receipt of several messages. 795 * 796 - * All cores/threads on this node get this interrupt. 797 - * The last one to see it does the s/w ack. 798 * (the resource will not be freed until noninterruptable cpus see this 799 - * interrupt; hardware will timeout the s/w ack and reply ERROR) 800 */ 801 void uv_bau_message_interrupt(struct pt_regs *regs) 802 { 803 - struct bau_payload_queue_entry *va_queue_first; 804 - struct bau_payload_queue_entry *va_queue_last; 805 - struct bau_payload_queue_entry *msg; 806 - struct pt_regs *old_regs = set_irq_regs(regs); 807 - cycles_t time1; 808 - cycles_t time2; 809 - int msg_slot; 810 - int sw_ack_slot; 811 - int fw; 812 int count = 0; 813 - unsigned long local_pnode; 814 815 - ack_APIC_irq(); 816 - exit_idle(); 817 - irq_enter(); 818 - 819 - time1 = get_cycles(); 820 - 821 - local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); 822 - 823 - va_queue_first = __get_cpu_var(bau_control).va_queue_first; 824 - va_queue_last = __get_cpu_var(bau_control).va_queue_last; 825 - 826 - msg = __get_cpu_var(bau_control).bau_msg_head; 827 while (msg->sw_ack_vector) { 828 count++; 829 - fw = msg->sw_ack_vector; 830 - msg_slot = msg - va_queue_first; 831 - sw_ack_slot = ffs(fw) - 1; 832 - 833 - uv_bau_process_message(msg, msg_slot, sw_ack_slot); 834 - 835 msg++; 836 - if (msg > va_queue_last) 837 - msg = va_queue_first; 838 - __get_cpu_var(bau_control).bau_msg_head = msg; 839 } 840 if (!count) 841 - __get_cpu_var(ptcstats).nomsg++; 842 else if (count > 1) 843 - __get_cpu_var(ptcstats).multmsg++; 844 - 845 - time2 = get_cycles(); 846 - __get_cpu_var(ptcstats).dflush += (time2 - time1); 847 - 848 - irq_exit(); 849 - set_irq_regs(old_regs); 850 } 851 852 /* 853 * uv_enable_timeouts 854 * 855 - * Each target blade (i.e. blades that have cpu's) needs to have 856 * shootdown message timeouts enabled. The timeout does not cause 857 * an interrupt, but causes an error message to be returned to 858 * the sender. 859 */ 860 static void uv_enable_timeouts(void) 861 { 862 - int blade; 863 - int nblades; 864 int pnode; 865 unsigned long mmr_image; 866 867 - nblades = uv_num_possible_blades(); 868 869 - for (blade = 0; blade < nblades; blade++) { 870 - if (!uv_blade_nr_possible_cpus(blade)) 871 continue; 872 873 - pnode = uv_blade_to_pnode(blade); 874 mmr_image = 875 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); 876 /* ··· 906 { 907 } 908 909 /* 910 - * Display the statistics thru /proc 911 - * data points to the cpu number 912 */ 913 static int uv_ptc_seq_show(struct seq_file *file, void *data) 914 { ··· 930 931 if (!cpu) { 932 seq_printf(file, 933 - "# cpu requestor requestee one all sretry dretry ptc_i "); 934 seq_printf(file, 935 - "sw_ack sflush dflush sok dnomsg dmult starget\n"); 936 } 937 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 938 stat = &per_cpu(ptcstats, cpu); 939 - seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", 940 - cpu, stat->requestor, 941 - stat->requestee, stat->onetlb, stat->alltlb, 942 - stat->s_retry, stat->d_retry, stat->ptc_i); 943 - seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", 944 uv_read_global_mmr64(uv_cpu_to_pnode(cpu), 945 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), 946 - stat->sflush, stat->dflush, 947 - stat->retriesok, stat->nomsg, 948 - stat->multmsg, stat->ntargeted); 949 } 950 951 return 0; 952 } 953 954 /* 955 * 0: display meaning of the statistics 956 - * >0: retry limit 957 */ 958 static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, 959 size_t count, loff_t *data) 960 { 961 - long newmode; 962 char optstr[64]; 963 964 if (count == 0 || count > sizeof(optstr)) 965 return -EINVAL; 966 if (copy_from_user(optstr, user, count)) 967 return -EFAULT; 968 optstr[count - 1] = '\0'; 969 - if (strict_strtoul(optstr, 10, &newmode) < 0) { 970 printk(KERN_DEBUG "%s is invalid\n", optstr); 971 return -EINVAL; 972 } 973 974 - if (newmode == 0) { 975 printk(KERN_DEBUG "# cpu: cpu number\n"); 976 printk(KERN_DEBUG 977 - "requestor: times this cpu was the flush requestor\n"); 978 printk(KERN_DEBUG 979 - "requestee: times this cpu was requested to flush its TLBs\n"); 980 printk(KERN_DEBUG 981 - "one: times requested to flush a single address\n"); 982 printk(KERN_DEBUG 983 - "all: times requested to flush all TLB's\n"); 984 printk(KERN_DEBUG 985 - "sretry: number of retries of source-side timeouts\n"); 986 printk(KERN_DEBUG 987 - "dretry: number of retries of destination-side timeouts\n"); 988 printk(KERN_DEBUG 989 - "ptc_i: times UV fell through to IPI-style flushes\n"); 990 printk(KERN_DEBUG 991 - "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); 992 printk(KERN_DEBUG 993 - "sflush_us: cycles spent in uv_flush_tlb_others()\n"); 994 printk(KERN_DEBUG 995 - "dflush_us: cycles spent in handling flush requests\n"); 996 - printk(KERN_DEBUG "sok: successes on retry\n"); 997 - printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); 998 printk(KERN_DEBUG 999 - "dmult: interrupts with multiple messages\n"); 1000 - printk(KERN_DEBUG "starget: nodes targeted\n"); 1001 } else { 1002 - uv_bau_retry_limit = newmode; 1003 - printk(KERN_DEBUG "timeout retry limit:%d\n", 1004 - uv_bau_retry_limit); 1005 } 1006 1007 return count; ··· 1122 } 1123 1124 /* 1125 - * begin the initialization of the per-blade control structures 1126 - */ 1127 - static struct bau_control * __init uv_table_bases_init(int blade, int node) 1128 - { 1129 - int i; 1130 - struct bau_msg_status *msp; 1131 - struct bau_control *bau_tabp; 1132 - 1133 - bau_tabp = 1134 - kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); 1135 - BUG_ON(!bau_tabp); 1136 - 1137 - bau_tabp->msg_statuses = 1138 - kmalloc_node(sizeof(struct bau_msg_status) * 1139 - DEST_Q_SIZE, GFP_KERNEL, node); 1140 - BUG_ON(!bau_tabp->msg_statuses); 1141 - 1142 - for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) 1143 - bau_cpubits_clear(&msp->seen_by, (int) 1144 - uv_blade_nr_possible_cpus(blade)); 1145 - 1146 - uv_bau_table_bases[blade] = bau_tabp; 1147 - 1148 - return bau_tabp; 1149 - } 1150 - 1151 - /* 1152 - * finish the initialization of the per-blade control structures 1153 - */ 1154 - static void __init 1155 - uv_table_bases_finish(int blade, 1156 - struct bau_control *bau_tablesp, 1157 - struct bau_desc *adp) 1158 - { 1159 - struct bau_control *bcp; 1160 - int cpu; 1161 - 1162 - for_each_present_cpu(cpu) { 1163 - if (blade != uv_cpu_to_blade_id(cpu)) 1164 - continue; 1165 - 1166 - bcp = (struct bau_control *)&per_cpu(bau_control, cpu); 1167 - bcp->bau_msg_head = bau_tablesp->va_queue_first; 1168 - bcp->va_queue_first = bau_tablesp->va_queue_first; 1169 - bcp->va_queue_last = bau_tablesp->va_queue_last; 1170 - bcp->msg_statuses = bau_tablesp->msg_statuses; 1171 - bcp->descriptor_base = adp; 1172 - } 1173 - } 1174 - 1175 - /* 1176 * initialize the sending side's sending buffers 1177 */ 1178 - static struct bau_desc * __init 1179 uv_activation_descriptor_init(int node, int pnode) 1180 { 1181 int i; 1182 unsigned long pa; 1183 unsigned long m; 1184 unsigned long n; 1185 - struct bau_desc *adp; 1186 - struct bau_desc *ad2; 1187 1188 /* 1189 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1190 - * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade 1191 */ 1192 - adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* 1193 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 1194 - BUG_ON(!adp); 1195 1196 - pa = uv_gpa(adp); /* need the real nasid*/ 1197 - n = uv_gpa_to_pnode(pa); 1198 m = pa & uv_mmask; 1199 1200 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, ··· 1154 /* 1155 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1156 * cpu even though we only use the first one; one descriptor can 1157 - * describe a broadcast to 256 nodes. 1158 */ 1159 - for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); 1160 - i++, ad2++) { 1161 - memset(ad2, 0, sizeof(struct bau_desc)); 1162 - ad2->header.sw_ack_flag = 1; 1163 /* 1164 - * base_dest_nodeid is the first node in the partition, so 1165 - * the bit map will indicate partition-relative node numbers. 1166 - * note that base_dest_nodeid is actually a nasid. 1167 */ 1168 - ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 1169 - ad2->header.dest_subnodeid = 0x10; /* the LB */ 1170 - ad2->header.command = UV_NET_ENDPOINT_INTD; 1171 - ad2->header.int_both = 1; 1172 /* 1173 * all others need to be set to zero: 1174 * fairness chaining multilevel count replied_to 1175 */ 1176 } 1177 - return adp; 1178 } 1179 1180 /* 1181 * initialize the destination side's receiving buffers 1182 */ 1183 - static struct bau_payload_queue_entry * __init 1184 - uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) 1185 { 1186 - struct bau_payload_queue_entry *pqp; 1187 - unsigned long pa; 1188 int pn; 1189 char *cp; 1190 1191 pqp = (struct bau_payload_queue_entry *) kmalloc_node( 1192 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), 1193 GFP_KERNEL, node); 1194 BUG_ON(!pqp); 1195 1196 cp = (char *)pqp + 31; 1197 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); 1198 - bau_tablesp->va_queue_first = pqp; 1199 /* 1200 * need the pnode of where the memory was really allocated 1201 */ 1202 pa = uv_gpa(pqp); 1203 - pn = uv_gpa_to_pnode(pa); 1204 uv_write_global_mmr64(pnode, 1205 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 1206 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 1207 uv_physnodeaddr(pqp)); 1208 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, 1209 uv_physnodeaddr(pqp)); 1210 - bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); 1211 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, 1212 (unsigned long) 1213 - uv_physnodeaddr(bau_tablesp->va_queue_last)); 1214 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); 1215 - 1216 - return pqp; 1217 } 1218 1219 /* 1220 - * Initialization of each UV blade's structures 1221 */ 1222 - static int __init uv_init_blade(int blade) 1223 { 1224 int node; 1225 int pnode; 1226 - unsigned long pa; 1227 unsigned long apicid; 1228 - struct bau_desc *adp; 1229 - struct bau_payload_queue_entry *pqp; 1230 - struct bau_control *bau_tablesp; 1231 1232 - node = blade_to_first_node(blade); 1233 - bau_tablesp = uv_table_bases_init(blade, node); 1234 - pnode = uv_blade_to_pnode(blade); 1235 - adp = uv_activation_descriptor_init(node, pnode); 1236 - pqp = uv_payload_queue_init(node, pnode, bau_tablesp); 1237 - uv_table_bases_finish(blade, bau_tablesp, adp); 1238 /* 1239 * the below initialization can't be in firmware because the 1240 * messaging IRQ will be determined by the OS 1241 */ 1242 - apicid = blade_to_first_apicid(blade); 1243 - pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); 1244 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1245 - ((apicid << 32) | UV_BAU_MESSAGE)); 1246 - return 0; 1247 } 1248 1249 /* ··· 1343 */ 1344 static int __init uv_bau_init(void) 1345 { 1346 - int blade; 1347 - int nblades; 1348 int cur_cpu; 1349 1350 if (!is_uv_system()) 1351 return 0; 1352 1353 for_each_possible_cpu(cur_cpu) 1354 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), 1355 GFP_KERNEL, cpu_to_node(cur_cpu)); 1356 1357 - uv_bau_retry_limit = 1; 1358 uv_mmask = (1UL << uv_hub_info->m_val) - 1; 1359 - nblades = uv_num_possible_blades(); 1360 1361 - uv_bau_table_bases = (struct bau_control **) 1362 - kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); 1363 - BUG_ON(!uv_bau_table_bases); 1364 1365 uv_partition_base_pnode = 0x7fffffff; 1366 - for (blade = 0; blade < nblades; blade++) 1367 - if (uv_blade_nr_possible_cpus(blade) && 1368 - (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) 1369 - uv_partition_base_pnode = uv_blade_to_pnode(blade); 1370 - for (blade = 0; blade < nblades; blade++) 1371 - if (uv_blade_nr_possible_cpus(blade)) 1372 - uv_init_blade(blade); 1373 1374 - alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); 1375 uv_enable_timeouts(); 1376 1377 return 0; 1378 } 1379 - __initcall(uv_bau_init); 1380 - __initcall(uv_ptc_init);
··· 1 /* 2 * SGI UltraViolet TLB flush routines. 3 * 4 + * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. 5 * 6 * This code is released under the GNU General Public License version 2 or 7 * later. ··· 19 #include <asm/idle.h> 20 #include <asm/tsc.h> 21 #include <asm/irq_vectors.h> 22 + #include <asm/timer.h> 23 + 24 + struct msg_desc { 25 + struct bau_payload_queue_entry *msg; 26 + int msg_slot; 27 + int sw_ack_slot; 28 + struct bau_payload_queue_entry *va_queue_first; 29 + struct bau_payload_queue_entry *va_queue_last; 30 + }; 31 32 #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL 33 34 + static int uv_bau_max_concurrent __read_mostly; 35 + 36 + static int nobau; 37 + static int __init setup_nobau(char *arg) 38 + { 39 + nobau = 1; 40 + return 0; 41 + } 42 + early_param("nobau", setup_nobau); 43 44 /* base pnode in this partition */ 45 + static int uv_partition_base_pnode __read_mostly; 46 + /* position of pnode (which is nasid>>1): */ 47 + static int uv_nshift __read_mostly; 48 + static unsigned long uv_mmask __read_mostly; 49 50 static DEFINE_PER_CPU(struct ptc_stats, ptcstats); 51 static DEFINE_PER_CPU(struct bau_control, bau_control); 52 + static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); 53 + 54 + struct reset_args { 55 + int sender; 56 + }; 57 58 /* 59 + * Determine the first node on a uvhub. 'Nodes' are used for kernel 60 + * memory allocation. 61 */ 62 + static int __init uvhub_to_first_node(int uvhub) 63 { 64 int node, b; 65 66 for_each_online_node(node) { 67 b = uv_node_to_blade_id(node); 68 + if (uvhub == b) 69 return node; 70 } 71 + return -1; 72 } 73 74 /* 75 + * Determine the apicid of the first cpu on a uvhub. 76 */ 77 + static int __init uvhub_to_first_apicid(int uvhub) 78 { 79 int cpu; 80 81 for_each_present_cpu(cpu) 82 + if (uvhub == uv_cpu_to_blade_id(cpu)) 83 return per_cpu(x86_cpu_to_apicid, cpu); 84 return -1; 85 } ··· 69 * clear of the Timeout bit (as well) will free the resource. No reply will 70 * be sent (the hardware will only do one reply per message). 71 */ 72 + static inline void uv_reply_to_message(struct msg_desc *mdp, 73 + struct bau_control *bcp) 74 { 75 unsigned long dw; 76 + struct bau_payload_queue_entry *msg; 77 78 + msg = mdp->msg; 79 + if (!msg->canceled) { 80 + dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | 81 + msg->sw_ack_vector; 82 + uv_write_local_mmr( 83 + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); 84 + } 85 msg->replied_to = 1; 86 msg->sw_ack_vector = 0; 87 + } 88 + 89 + /* 90 + * Process the receipt of a RETRY message 91 + */ 92 + static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, 93 + struct bau_control *bcp) 94 + { 95 + int i; 96 + int cancel_count = 0; 97 + int slot2; 98 + unsigned long msg_res; 99 + unsigned long mmr = 0; 100 + struct bau_payload_queue_entry *msg; 101 + struct bau_payload_queue_entry *msg2; 102 + struct ptc_stats *stat; 103 + 104 + msg = mdp->msg; 105 + stat = &per_cpu(ptcstats, bcp->cpu); 106 + stat->d_retries++; 107 + /* 108 + * cancel any message from msg+1 to the retry itself 109 + */ 110 + for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { 111 + if (msg2 > mdp->va_queue_last) 112 + msg2 = mdp->va_queue_first; 113 + if (msg2 == msg) 114 + break; 115 + 116 + /* same conditions for cancellation as uv_do_reset */ 117 + if ((msg2->replied_to == 0) && (msg2->canceled == 0) && 118 + (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & 119 + msg->sw_ack_vector) == 0) && 120 + (msg2->sending_cpu == msg->sending_cpu) && 121 + (msg2->msg_type != MSG_NOOP)) { 122 + slot2 = msg2 - mdp->va_queue_first; 123 + mmr = uv_read_local_mmr 124 + (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 125 + msg_res = ((msg2->sw_ack_vector << 8) | 126 + msg2->sw_ack_vector); 127 + /* 128 + * This is a message retry; clear the resources held 129 + * by the previous message only if they timed out. 130 + * If it has not timed out we have an unexpected 131 + * situation to report. 132 + */ 133 + if (mmr & (msg_res << 8)) { 134 + /* 135 + * is the resource timed out? 136 + * make everyone ignore the cancelled message. 137 + */ 138 + msg2->canceled = 1; 139 + stat->d_canceled++; 140 + cancel_count++; 141 + uv_write_local_mmr( 142 + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, 143 + (msg_res << 8) | msg_res); 144 + } else 145 + printk(KERN_INFO "note bau retry: no effect\n"); 146 + } 147 + } 148 + if (!cancel_count) 149 + stat->d_nocanceled++; 150 } 151 152 /* 153 * Do all the things a cpu should do for a TLB shootdown message. 154 * Other cpu's may come here at the same time for this message. 155 */ 156 + static void uv_bau_process_message(struct msg_desc *mdp, 157 + struct bau_control *bcp) 158 { 159 + int msg_ack_count; 160 + short socket_ack_count = 0; 161 + struct ptc_stats *stat; 162 + struct bau_payload_queue_entry *msg; 163 + struct bau_control *smaster = bcp->socket_master; 164 165 + /* 166 + * This must be a normal message, or retry of a normal message 167 + */ 168 + msg = mdp->msg; 169 + stat = &per_cpu(ptcstats, bcp->cpu); 170 if (msg->address == TLB_FLUSH_ALL) { 171 local_flush_tlb(); 172 + stat->d_alltlb++; 173 } else { 174 __flush_tlb_one(msg->address); 175 + stat->d_onetlb++; 176 } 177 + stat->d_requestee++; 178 179 + /* 180 + * One cpu on each uvhub has the additional job on a RETRY 181 + * of releasing the resource held by the message that is 182 + * being retried. That message is identified by sending 183 + * cpu number. 184 + */ 185 + if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) 186 + uv_bau_process_retry_msg(mdp, bcp); 187 188 + /* 189 + * This is a sw_ack message, so we have to reply to it. 190 + * Count each responding cpu on the socket. This avoids 191 + * pinging the count's cache line back and forth between 192 + * the sockets. 193 + */ 194 + socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) 195 + &smaster->socket_acknowledge_count[mdp->msg_slot]); 196 + if (socket_ack_count == bcp->cpus_in_socket) { 197 + /* 198 + * Both sockets dump their completed count total into 199 + * the message's count. 200 + */ 201 + smaster->socket_acknowledge_count[mdp->msg_slot] = 0; 202 + msg_ack_count = atomic_add_short_return(socket_ack_count, 203 + (struct atomic_short *)&msg->acknowledge_count); 204 205 + if (msg_ack_count == bcp->cpus_in_uvhub) { 206 + /* 207 + * All cpus in uvhub saw it; reply 208 + */ 209 + uv_reply_to_message(mdp, bcp); 210 } 211 } 212 + 213 + return; 214 } 215 216 /* 217 + * Determine the first cpu on a uvhub. 218 */ 219 + static int uvhub_to_first_cpu(int uvhub) 220 { 221 + int cpu; 222 + for_each_present_cpu(cpu) 223 + if (uvhub == uv_cpu_to_blade_id(cpu)) 224 + return cpu; 225 + return -1; 226 } 227 228 /* 229 + * Last resort when we get a large number of destination timeouts is 230 + * to clear resources held by a given cpu. 231 + * Do this with IPI so that all messages in the BAU message queue 232 + * can be identified by their nonzero sw_ack_vector field. 233 * 234 + * This is entered for a single cpu on the uvhub. 235 + * The sender want's this uvhub to free a specific message's 236 + * sw_ack resources. 237 + */ 238 + static void 239 + uv_do_reset(void *ptr) 240 + { 241 + int i; 242 + int slot; 243 + int count = 0; 244 + unsigned long mmr; 245 + unsigned long msg_res; 246 + struct bau_control *bcp; 247 + struct reset_args *rap; 248 + struct bau_payload_queue_entry *msg; 249 + struct ptc_stats *stat; 250 + 251 + bcp = &per_cpu(bau_control, smp_processor_id()); 252 + rap = (struct reset_args *)ptr; 253 + stat = &per_cpu(ptcstats, bcp->cpu); 254 + stat->d_resets++; 255 + 256 + /* 257 + * We're looking for the given sender, and 258 + * will free its sw_ack resource. 259 + * If all cpu's finally responded after the timeout, its 260 + * message 'replied_to' was set. 261 + */ 262 + for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { 263 + /* uv_do_reset: same conditions for cancellation as 264 + uv_bau_process_retry_msg() */ 265 + if ((msg->replied_to == 0) && 266 + (msg->canceled == 0) && 267 + (msg->sending_cpu == rap->sender) && 268 + (msg->sw_ack_vector) && 269 + (msg->msg_type != MSG_NOOP)) { 270 + /* 271 + * make everyone else ignore this message 272 + */ 273 + msg->canceled = 1; 274 + slot = msg - bcp->va_queue_first; 275 + count++; 276 + /* 277 + * only reset the resource if it is still pending 278 + */ 279 + mmr = uv_read_local_mmr 280 + (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 281 + msg_res = ((msg->sw_ack_vector << 8) | 282 + msg->sw_ack_vector); 283 + if (mmr & msg_res) { 284 + stat->d_rcanceled++; 285 + uv_write_local_mmr( 286 + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, 287 + msg_res); 288 + } 289 + } 290 + } 291 + return; 292 + } 293 + 294 + /* 295 + * Use IPI to get all target uvhubs to release resources held by 296 + * a given sending cpu number. 297 + */ 298 + static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, 299 + int sender) 300 + { 301 + int uvhub; 302 + int cpu; 303 + cpumask_t mask; 304 + struct reset_args reset_args; 305 + 306 + reset_args.sender = sender; 307 + 308 + cpus_clear(mask); 309 + /* find a single cpu for each uvhub in this distribution mask */ 310 + for (uvhub = 0; 311 + uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; 312 + uvhub++) { 313 + if (!bau_uvhub_isset(uvhub, distribution)) 314 + continue; 315 + /* find a cpu for this uvhub */ 316 + cpu = uvhub_to_first_cpu(uvhub); 317 + cpu_set(cpu, mask); 318 + } 319 + /* IPI all cpus; Preemption is already disabled */ 320 + smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); 321 + return; 322 + } 323 + 324 + static inline unsigned long 325 + cycles_2_us(unsigned long long cyc) 326 + { 327 + unsigned long long ns; 328 + unsigned long us; 329 + ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) 330 + >> CYC2NS_SCALE_FACTOR; 331 + us = ns / 1000; 332 + return us; 333 + } 334 + 335 + /* 336 + * wait for all cpus on this hub to finish their sends and go quiet 337 + * leaves uvhub_quiesce set so that no new broadcasts are started by 338 + * bau_flush_send_and_wait() 339 + */ 340 + static inline void 341 + quiesce_local_uvhub(struct bau_control *hmaster) 342 + { 343 + atomic_add_short_return(1, (struct atomic_short *) 344 + &hmaster->uvhub_quiesce); 345 + } 346 + 347 + /* 348 + * mark this quiet-requestor as done 349 + */ 350 + static inline void 351 + end_uvhub_quiesce(struct bau_control *hmaster) 352 + { 353 + atomic_add_short_return(-1, (struct atomic_short *) 354 + &hmaster->uvhub_quiesce); 355 + } 356 + 357 + /* 358 + * Wait for completion of a broadcast software ack message 359 + * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP 360 */ 361 static int uv_wait_completion(struct bau_desc *bau_desc, 362 + unsigned long mmr_offset, int right_shift, int this_cpu, 363 + struct bau_control *bcp, struct bau_control *smaster, long try) 364 { 365 + int relaxes = 0; 366 unsigned long descriptor_status; 367 + unsigned long mmr; 368 + unsigned long mask; 369 + cycles_t ttime; 370 + cycles_t timeout_time; 371 + struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); 372 + struct bau_control *hmaster; 373 374 + hmaster = bcp->uvhub_master; 375 + timeout_time = get_cycles() + bcp->timeout_interval; 376 + 377 + /* spin on the status MMR, waiting for it to go idle */ 378 while ((descriptor_status = (((unsigned long) 379 uv_read_local_mmr(mmr_offset) >> 380 right_shift) & UV_ACT_STATUS_MASK)) != 381 DESC_STATUS_IDLE) { 382 /* 383 + * Our software ack messages may be blocked because there are 384 + * no swack resources available. As long as none of them 385 + * has timed out hardware will NACK our message and its 386 + * state will stay IDLE. 387 */ 388 + if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { 389 + stat->s_stimeout++; 390 + return FLUSH_GIVEUP; 391 + } else if (descriptor_status == 392 + DESC_STATUS_DESTINATION_TIMEOUT) { 393 + stat->s_dtimeout++; 394 + ttime = get_cycles(); 395 + 396 + /* 397 + * Our retries may be blocked by all destination 398 + * swack resources being consumed, and a timeout 399 + * pending. In that case hardware returns the 400 + * ERROR that looks like a destination timeout. 401 + */ 402 + if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { 403 + bcp->conseccompletes = 0; 404 + return FLUSH_RETRY_PLUGGED; 405 + } 406 + 407 + bcp->conseccompletes = 0; 408 + return FLUSH_RETRY_TIMEOUT; 409 + } else { 410 + /* 411 + * descriptor_status is still BUSY 412 + */ 413 + cpu_relax(); 414 + relaxes++; 415 + if (relaxes >= 10000) { 416 + relaxes = 0; 417 + if (get_cycles() > timeout_time) { 418 + quiesce_local_uvhub(hmaster); 419 + 420 + /* single-thread the register change */ 421 + spin_lock(&hmaster->masks_lock); 422 + mmr = uv_read_local_mmr(mmr_offset); 423 + mask = 0UL; 424 + mask |= (3UL < right_shift); 425 + mask = ~mask; 426 + mmr &= mask; 427 + uv_write_local_mmr(mmr_offset, mmr); 428 + spin_unlock(&hmaster->masks_lock); 429 + end_uvhub_quiesce(hmaster); 430 + stat->s_busy++; 431 return FLUSH_GIVEUP; 432 } 433 } 434 } 435 } 436 + bcp->conseccompletes++; 437 return FLUSH_COMPLETE; 438 + } 439 + 440 + static inline cycles_t 441 + sec_2_cycles(unsigned long sec) 442 + { 443 + unsigned long ns; 444 + cycles_t cyc; 445 + 446 + ns = sec * 1000000000; 447 + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); 448 + return cyc; 449 + } 450 + 451 + /* 452 + * conditionally add 1 to *v, unless *v is >= u 453 + * return 0 if we cannot add 1 to *v because it is >= u 454 + * return 1 if we can add 1 to *v because it is < u 455 + * the add is atomic 456 + * 457 + * This is close to atomic_add_unless(), but this allows the 'u' value 458 + * to be lowered below the current 'v'. atomic_add_unless can only stop 459 + * on equal. 460 + */ 461 + static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) 462 + { 463 + spin_lock(lock); 464 + if (atomic_read(v) >= u) { 465 + spin_unlock(lock); 466 + return 0; 467 + } 468 + atomic_inc(v); 469 + spin_unlock(lock); 470 + return 1; 471 } 472 473 /** 474 * uv_flush_send_and_wait 475 * 476 + * Send a broadcast and wait for it to complete. 477 * 478 + * The flush_mask contains the cpus the broadcast is to be sent to, plus 479 + * cpus that are on the local uvhub. 480 * 481 + * Returns NULL if all flushing represented in the mask was done. The mask 482 + * is zeroed. 483 * Returns @flush_mask if some remote flushing remains to be done. The 484 + * mask will have some bits still set, representing any cpus on the local 485 + * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. 486 */ 487 + const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, 488 + struct cpumask *flush_mask, 489 + struct bau_control *bcp) 490 { 491 int right_shift; 492 + int uvhub; 493 int bit; 494 + int completion_status = 0; 495 + int seq_number = 0; 496 + long try = 0; 497 + int cpu = bcp->uvhub_cpu; 498 + int this_cpu = bcp->cpu; 499 + int this_uvhub = bcp->uvhub; 500 unsigned long mmr_offset; 501 unsigned long index; 502 cycles_t time1; 503 cycles_t time2; 504 + struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); 505 + struct bau_control *smaster = bcp->socket_master; 506 + struct bau_control *hmaster = bcp->uvhub_master; 507 + 508 + /* 509 + * Spin here while there are hmaster->max_concurrent or more active 510 + * descriptors. This is the per-uvhub 'throttle'. 511 + */ 512 + if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, 513 + &hmaster->active_descriptor_count, 514 + hmaster->max_concurrent)) { 515 + stat->s_throttles++; 516 + do { 517 + cpu_relax(); 518 + } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, 519 + &hmaster->active_descriptor_count, 520 + hmaster->max_concurrent)); 521 + } 522 + 523 + while (hmaster->uvhub_quiesce) 524 + cpu_relax(); 525 526 if (cpu < UV_CPUS_PER_ACT_STATUS) { 527 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; ··· 269 } 270 time1 = get_cycles(); 271 do { 272 + /* 273 + * Every message from any given cpu gets a unique message 274 + * sequence number. But retries use that same number. 275 + * Our message may have timed out at the destination because 276 + * all sw-ack resources are in use and there is a timeout 277 + * pending there. In that case, our last send never got 278 + * placed into the queue and we need to persist until it 279 + * does. 280 + * 281 + * Make any retry a type MSG_RETRY so that the destination will 282 + * free any resource held by a previous message from this cpu. 283 + */ 284 + if (try == 0) { 285 + /* use message type set by the caller the first time */ 286 + seq_number = bcp->message_number++; 287 + } else { 288 + /* use RETRY type on all the rest; same sequence */ 289 + bau_desc->header.msg_type = MSG_RETRY; 290 + stat->s_retry_messages++; 291 + } 292 + bau_desc->header.sequence = seq_number; 293 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | 294 + bcp->uvhub_cpu; 295 + bcp->send_message = get_cycles(); 296 297 + uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 298 + 299 + try++; 300 + completion_status = uv_wait_completion(bau_desc, mmr_offset, 301 + right_shift, this_cpu, bcp, smaster, try); 302 + 303 + if (completion_status == FLUSH_RETRY_PLUGGED) { 304 + /* 305 + * Our retries may be blocked by all destination swack 306 + * resources being consumed, and a timeout pending. In 307 + * that case hardware immediately returns the ERROR 308 + * that looks like a destination timeout. 309 + */ 310 + udelay(TIMEOUT_DELAY); 311 + bcp->plugged_tries++; 312 + if (bcp->plugged_tries >= PLUGSB4RESET) { 313 + bcp->plugged_tries = 0; 314 + quiesce_local_uvhub(hmaster); 315 + spin_lock(&hmaster->queue_lock); 316 + uv_reset_with_ipi(&bau_desc->distribution, 317 + this_cpu); 318 + spin_unlock(&hmaster->queue_lock); 319 + end_uvhub_quiesce(hmaster); 320 + bcp->ipi_attempts++; 321 + stat->s_resets_plug++; 322 + } 323 + } else if (completion_status == FLUSH_RETRY_TIMEOUT) { 324 + hmaster->max_concurrent = 1; 325 + bcp->timeout_tries++; 326 + udelay(TIMEOUT_DELAY); 327 + if (bcp->timeout_tries >= TIMEOUTSB4RESET) { 328 + bcp->timeout_tries = 0; 329 + quiesce_local_uvhub(hmaster); 330 + spin_lock(&hmaster->queue_lock); 331 + uv_reset_with_ipi(&bau_desc->distribution, 332 + this_cpu); 333 + spin_unlock(&hmaster->queue_lock); 334 + end_uvhub_quiesce(hmaster); 335 + bcp->ipi_attempts++; 336 + stat->s_resets_timeout++; 337 + } 338 + } 339 + if (bcp->ipi_attempts >= 3) { 340 + bcp->ipi_attempts = 0; 341 + completion_status = FLUSH_GIVEUP; 342 + break; 343 + } 344 + cpu_relax(); 345 + } while ((completion_status == FLUSH_RETRY_PLUGGED) || 346 + (completion_status == FLUSH_RETRY_TIMEOUT)); 347 + time2 = get_cycles(); 348 + 349 + if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) 350 + && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) 351 + hmaster->max_concurrent++; 352 + 353 + /* 354 + * hold any cpu not timing out here; no other cpu currently held by 355 + * the 'throttle' should enter the activation code 356 + */ 357 + while (hmaster->uvhub_quiesce) 358 + cpu_relax(); 359 + atomic_dec(&hmaster->active_descriptor_count); 360 + 361 + /* guard against cycles wrap */ 362 + if (time2 > time1) 363 + stat->s_time += (time2 - time1); 364 + else 365 + stat->s_requestor--; /* don't count this one */ 366 + if (completion_status == FLUSH_COMPLETE && try > 1) 367 + stat->s_retriesok++; 368 + else if (completion_status == FLUSH_GIVEUP) { 369 /* 370 * Cause the caller to do an IPI-style TLB shootdown on 371 + * the target cpu's, all of which are still in the mask. 372 */ 373 + stat->s_giveup++; 374 return flush_mask; 375 } 376 ··· 295 * use the IPI method of shootdown on them. 296 */ 297 for_each_cpu(bit, flush_mask) { 298 + uvhub = uv_cpu_to_blade_id(bit); 299 + if (uvhub == this_uvhub) 300 continue; 301 cpumask_clear_cpu(bit, flush_mask); 302 } 303 if (!cpumask_empty(flush_mask)) 304 return flush_mask; 305 + 306 return NULL; 307 } 308 309 /** 310 * uv_flush_tlb_others - globally purge translation cache of a virtual ··· 323 * The caller has derived the cpumask from the mm_struct. This function 324 * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) 325 * 326 + * The cpumask is converted into a uvhubmask of the uvhubs containing 327 + * those cpus. 328 * 329 * Note that this function should be called with preemption disabled. 330 * ··· 336 struct mm_struct *mm, 337 unsigned long va, unsigned int cpu) 338 { 339 + int remotes; 340 + int tcpu; 341 + int uvhub; 342 int locals = 0; 343 struct bau_desc *bau_desc; 344 + struct cpumask *flush_mask; 345 + struct ptc_stats *stat; 346 + struct bau_control *bcp; 347 348 + if (nobau) 349 + return cpumask; 350 + 351 + bcp = &per_cpu(bau_control, cpu); 352 + /* 353 + * Each sending cpu has a per-cpu mask which it fills from the caller's 354 + * cpu mask. Only remote cpus are converted to uvhubs and copied. 355 + */ 356 + flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); 357 + /* 358 + * copy cpumask to flush_mask, removing current cpu 359 + * (current cpu should already have been flushed by the caller and 360 + * should never be returned if we return flush_mask) 361 + */ 362 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); 363 + if (cpu_isset(cpu, *cpumask)) 364 + locals++; /* current cpu was targeted */ 365 366 + bau_desc = bcp->descriptor_base; 367 + bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 368 369 + bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 370 + remotes = 0; 371 + for_each_cpu(tcpu, flush_mask) { 372 + uvhub = uv_cpu_to_blade_id(tcpu); 373 + if (uvhub == bcp->uvhub) { 374 locals++; 375 continue; 376 } 377 + bau_uvhub_set(uvhub, &bau_desc->distribution); 378 + remotes++; 379 } 380 + if (remotes == 0) { 381 /* 382 + * No off_hub flushing; return status for local hub. 383 + * Return the caller's mask if all were local (the current 384 + * cpu may be in that mask). 385 */ 386 if (locals) 387 + return cpumask; 388 else 389 return NULL; 390 } 391 + stat = &per_cpu(ptcstats, cpu); 392 + stat->s_requestor++; 393 + stat->s_ntargcpu += remotes; 394 + remotes = bau_uvhub_weight(&bau_desc->distribution); 395 + stat->s_ntarguvhub += remotes; 396 + if (remotes >= 16) 397 + stat->s_ntarguvhub16++; 398 + else if (remotes >= 8) 399 + stat->s_ntarguvhub8++; 400 + else if (remotes >= 4) 401 + stat->s_ntarguvhub4++; 402 + else if (remotes >= 2) 403 + stat->s_ntarguvhub2++; 404 + else 405 + stat->s_ntarguvhub1++; 406 407 bau_desc->payload.address = va; 408 bau_desc->payload.sending_cpu = cpu; 409 410 + /* 411 + * uv_flush_send_and_wait returns null if all cpu's were messaged, or 412 + * the adjusted flush_mask if any cpu's were not messaged. 413 + */ 414 + return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); 415 } 416 417 /* ··· 390 * 391 * We received a broadcast assist message. 392 * 393 + * Interrupts are disabled; this interrupt could represent 394 * the receipt of several messages. 395 * 396 + * All cores/threads on this hub get this interrupt. 397 + * The last one to see it does the software ack. 398 * (the resource will not be freed until noninterruptable cpus see this 399 + * interrupt; hardware may timeout the s/w ack and reply ERROR) 400 */ 401 void uv_bau_message_interrupt(struct pt_regs *regs) 402 { 403 int count = 0; 404 + cycles_t time_start; 405 + struct bau_payload_queue_entry *msg; 406 + struct bau_control *bcp; 407 + struct ptc_stats *stat; 408 + struct msg_desc msgdesc; 409 410 + time_start = get_cycles(); 411 + bcp = &per_cpu(bau_control, smp_processor_id()); 412 + stat = &per_cpu(ptcstats, smp_processor_id()); 413 + msgdesc.va_queue_first = bcp->va_queue_first; 414 + msgdesc.va_queue_last = bcp->va_queue_last; 415 + msg = bcp->bau_msg_head; 416 while (msg->sw_ack_vector) { 417 count++; 418 + msgdesc.msg_slot = msg - msgdesc.va_queue_first; 419 + msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; 420 + msgdesc.msg = msg; 421 + uv_bau_process_message(&msgdesc, bcp); 422 msg++; 423 + if (msg > msgdesc.va_queue_last) 424 + msg = msgdesc.va_queue_first; 425 + bcp->bau_msg_head = msg; 426 } 427 + stat->d_time += (get_cycles() - time_start); 428 if (!count) 429 + stat->d_nomsg++; 430 else if (count > 1) 431 + stat->d_multmsg++; 432 + ack_APIC_irq(); 433 } 434 435 /* 436 * uv_enable_timeouts 437 * 438 + * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have 439 * shootdown message timeouts enabled. The timeout does not cause 440 * an interrupt, but causes an error message to be returned to 441 * the sender. 442 */ 443 static void uv_enable_timeouts(void) 444 { 445 + int uvhub; 446 + int nuvhubs; 447 int pnode; 448 unsigned long mmr_image; 449 450 + nuvhubs = uv_num_possible_blades(); 451 452 + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 453 + if (!uv_blade_nr_possible_cpus(uvhub)) 454 continue; 455 456 + pnode = uv_blade_to_pnode(uvhub); 457 mmr_image = 458 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); 459 /* ··· 523 { 524 } 525 526 + static inline unsigned long long 527 + millisec_2_cycles(unsigned long millisec) 528 + { 529 + unsigned long ns; 530 + unsigned long long cyc; 531 + 532 + ns = millisec * 1000; 533 + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); 534 + return cyc; 535 + } 536 + 537 /* 538 + * Display the statistics thru /proc. 539 + * 'data' points to the cpu number 540 */ 541 static int uv_ptc_seq_show(struct seq_file *file, void *data) 542 { ··· 536 537 if (!cpu) { 538 seq_printf(file, 539 + "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); 540 seq_printf(file, 541 + "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); 542 + seq_printf(file, 543 + "retries rok resetp resett giveup sto bz throt "); 544 + seq_printf(file, 545 + "sw_ack recv rtime all "); 546 + seq_printf(file, 547 + "one mult none retry canc nocan reset rcan\n"); 548 } 549 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 550 stat = &per_cpu(ptcstats, cpu); 551 + /* source side statistics */ 552 + seq_printf(file, 553 + "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 554 + cpu, stat->s_requestor, cycles_2_us(stat->s_time), 555 + stat->s_ntarguvhub, stat->s_ntarguvhub16, 556 + stat->s_ntarguvhub8, stat->s_ntarguvhub4, 557 + stat->s_ntarguvhub2, stat->s_ntarguvhub1, 558 + stat->s_ntargcpu, stat->s_dtimeout); 559 + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", 560 + stat->s_retry_messages, stat->s_retriesok, 561 + stat->s_resets_plug, stat->s_resets_timeout, 562 + stat->s_giveup, stat->s_stimeout, 563 + stat->s_busy, stat->s_throttles); 564 + /* destination side statistics */ 565 + seq_printf(file, 566 + "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", 567 uv_read_global_mmr64(uv_cpu_to_pnode(cpu), 568 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), 569 + stat->d_requestee, cycles_2_us(stat->d_time), 570 + stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, 571 + stat->d_nomsg, stat->d_retries, stat->d_canceled, 572 + stat->d_nocanceled, stat->d_resets, 573 + stat->d_rcanceled); 574 } 575 576 return 0; 577 } 578 579 /* 580 + * -1: resetf the statistics 581 * 0: display meaning of the statistics 582 + * >0: maximum concurrent active descriptors per uvhub (throttle) 583 */ 584 static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, 585 size_t count, loff_t *data) 586 { 587 + int cpu; 588 + long input_arg; 589 char optstr[64]; 590 + struct ptc_stats *stat; 591 + struct bau_control *bcp; 592 593 if (count == 0 || count > sizeof(optstr)) 594 return -EINVAL; 595 if (copy_from_user(optstr, user, count)) 596 return -EFAULT; 597 optstr[count - 1] = '\0'; 598 + if (strict_strtol(optstr, 10, &input_arg) < 0) { 599 printk(KERN_DEBUG "%s is invalid\n", optstr); 600 return -EINVAL; 601 } 602 603 + if (input_arg == 0) { 604 printk(KERN_DEBUG "# cpu: cpu number\n"); 605 + printk(KERN_DEBUG "Sender statistics:\n"); 606 printk(KERN_DEBUG 607 + "sent: number of shootdown messages sent\n"); 608 printk(KERN_DEBUG 609 + "stime: time spent sending messages\n"); 610 printk(KERN_DEBUG 611 + "numuvhubs: number of hubs targeted with shootdown\n"); 612 printk(KERN_DEBUG 613 + "numuvhubs16: number times 16 or more hubs targeted\n"); 614 printk(KERN_DEBUG 615 + "numuvhubs8: number times 8 or more hubs targeted\n"); 616 printk(KERN_DEBUG 617 + "numuvhubs4: number times 4 or more hubs targeted\n"); 618 printk(KERN_DEBUG 619 + "numuvhubs2: number times 2 or more hubs targeted\n"); 620 printk(KERN_DEBUG 621 + "numuvhubs1: number times 1 hub targeted\n"); 622 printk(KERN_DEBUG 623 + "numcpus: number of cpus targeted with shootdown\n"); 624 printk(KERN_DEBUG 625 + "dto: number of destination timeouts\n"); 626 printk(KERN_DEBUG 627 + "retries: destination timeout retries sent\n"); 628 + printk(KERN_DEBUG 629 + "rok: : destination timeouts successfully retried\n"); 630 + printk(KERN_DEBUG 631 + "resetp: ipi-style resource resets for plugs\n"); 632 + printk(KERN_DEBUG 633 + "resett: ipi-style resource resets for timeouts\n"); 634 + printk(KERN_DEBUG 635 + "giveup: fall-backs to ipi-style shootdowns\n"); 636 + printk(KERN_DEBUG 637 + "sto: number of source timeouts\n"); 638 + printk(KERN_DEBUG 639 + "bz: number of stay-busy's\n"); 640 + printk(KERN_DEBUG 641 + "throt: number times spun in throttle\n"); 642 + printk(KERN_DEBUG "Destination side statistics:\n"); 643 + printk(KERN_DEBUG 644 + "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); 645 + printk(KERN_DEBUG 646 + "recv: shootdown messages received\n"); 647 + printk(KERN_DEBUG 648 + "rtime: time spent processing messages\n"); 649 + printk(KERN_DEBUG 650 + "all: shootdown all-tlb messages\n"); 651 + printk(KERN_DEBUG 652 + "one: shootdown one-tlb messages\n"); 653 + printk(KERN_DEBUG 654 + "mult: interrupts that found multiple messages\n"); 655 + printk(KERN_DEBUG 656 + "none: interrupts that found no messages\n"); 657 + printk(KERN_DEBUG 658 + "retry: number of retry messages processed\n"); 659 + printk(KERN_DEBUG 660 + "canc: number messages canceled by retries\n"); 661 + printk(KERN_DEBUG 662 + "nocan: number retries that found nothing to cancel\n"); 663 + printk(KERN_DEBUG 664 + "reset: number of ipi-style reset requests processed\n"); 665 + printk(KERN_DEBUG 666 + "rcan: number messages canceled by reset requests\n"); 667 + } else if (input_arg == -1) { 668 + for_each_present_cpu(cpu) { 669 + stat = &per_cpu(ptcstats, cpu); 670 + memset(stat, 0, sizeof(struct ptc_stats)); 671 + } 672 } else { 673 + uv_bau_max_concurrent = input_arg; 674 + bcp = &per_cpu(bau_control, smp_processor_id()); 675 + if (uv_bau_max_concurrent < 1 || 676 + uv_bau_max_concurrent > bcp->cpus_in_uvhub) { 677 + printk(KERN_DEBUG 678 + "Error: BAU max concurrent %d; %d is invalid\n", 679 + bcp->max_concurrent, uv_bau_max_concurrent); 680 + return -EINVAL; 681 + } 682 + printk(KERN_DEBUG "Set BAU max concurrent:%d\n", 683 + uv_bau_max_concurrent); 684 + for_each_present_cpu(cpu) { 685 + bcp = &per_cpu(bau_control, cpu); 686 + bcp->max_concurrent = uv_bau_max_concurrent; 687 + } 688 } 689 690 return count; ··· 651 } 652 653 /* 654 * initialize the sending side's sending buffers 655 */ 656 + static void 657 uv_activation_descriptor_init(int node, int pnode) 658 { 659 int i; 660 + int cpu; 661 unsigned long pa; 662 unsigned long m; 663 unsigned long n; 664 + struct bau_desc *bau_desc; 665 + struct bau_desc *bd2; 666 + struct bau_control *bcp; 667 668 /* 669 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 670 + * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 671 */ 672 + bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* 673 UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 674 + BUG_ON(!bau_desc); 675 676 + pa = uv_gpa(bau_desc); /* need the real nasid*/ 677 + n = pa >> uv_nshift; 678 m = pa & uv_mmask; 679 680 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, ··· 732 /* 733 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 734 * cpu even though we only use the first one; one descriptor can 735 + * describe a broadcast to 256 uv hubs. 736 */ 737 + for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); 738 + i++, bd2++) { 739 + memset(bd2, 0, sizeof(struct bau_desc)); 740 + bd2->header.sw_ack_flag = 1; 741 /* 742 + * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub 743 + * in the partition. The bit map will indicate uvhub numbers, 744 + * which are 0-N in a partition. Pnodes are unique system-wide. 745 */ 746 + bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 747 + bd2->header.dest_subnodeid = 0x10; /* the LB */ 748 + bd2->header.command = UV_NET_ENDPOINT_INTD; 749 + bd2->header.int_both = 1; 750 /* 751 * all others need to be set to zero: 752 * fairness chaining multilevel count replied_to 753 */ 754 } 755 + for_each_present_cpu(cpu) { 756 + if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) 757 + continue; 758 + bcp = &per_cpu(bau_control, cpu); 759 + bcp->descriptor_base = bau_desc; 760 + } 761 } 762 763 /* 764 * initialize the destination side's receiving buffers 765 + * entered for each uvhub in the partition 766 + * - node is first node (kernel memory notion) on the uvhub 767 + * - pnode is the uvhub's physical identifier 768 */ 769 + static void 770 + uv_payload_queue_init(int node, int pnode) 771 { 772 int pn; 773 + int cpu; 774 char *cp; 775 + unsigned long pa; 776 + struct bau_payload_queue_entry *pqp; 777 + struct bau_payload_queue_entry *pqp_malloc; 778 + struct bau_control *bcp; 779 780 pqp = (struct bau_payload_queue_entry *) kmalloc_node( 781 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), 782 GFP_KERNEL, node); 783 BUG_ON(!pqp); 784 + pqp_malloc = pqp; 785 786 cp = (char *)pqp + 31; 787 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); 788 + 789 + for_each_present_cpu(cpu) { 790 + if (pnode != uv_cpu_to_pnode(cpu)) 791 + continue; 792 + /* for every cpu on this pnode: */ 793 + bcp = &per_cpu(bau_control, cpu); 794 + bcp->va_queue_first = pqp; 795 + bcp->bau_msg_head = pqp; 796 + bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); 797 + } 798 /* 799 * need the pnode of where the memory was really allocated 800 */ 801 pa = uv_gpa(pqp); 802 + pn = pa >> uv_nshift; 803 uv_write_global_mmr64(pnode, 804 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 805 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 806 uv_physnodeaddr(pqp)); 807 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, 808 uv_physnodeaddr(pqp)); 809 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, 810 (unsigned long) 811 + uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); 812 + /* in effect, all msg_type's are set to MSG_NOOP */ 813 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); 814 } 815 816 /* 817 + * Initialization of each UV hub's structures 818 */ 819 + static void __init uv_init_uvhub(int uvhub, int vector) 820 { 821 int node; 822 int pnode; 823 unsigned long apicid; 824 825 + node = uvhub_to_first_node(uvhub); 826 + pnode = uv_blade_to_pnode(uvhub); 827 + uv_activation_descriptor_init(node, pnode); 828 + uv_payload_queue_init(node, pnode); 829 /* 830 * the below initialization can't be in firmware because the 831 * messaging IRQ will be determined by the OS 832 */ 833 + apicid = uvhub_to_first_apicid(uvhub); 834 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 835 + ((apicid << 32) | vector)); 836 + } 837 + 838 + /* 839 + * initialize the bau_control structure for each cpu 840 + */ 841 + static void uv_init_per_cpu(int nuvhubs) 842 + { 843 + int i, j, k; 844 + int cpu; 845 + int pnode; 846 + int uvhub; 847 + short socket = 0; 848 + struct bau_control *bcp; 849 + struct uvhub_desc *bdp; 850 + struct socket_desc *sdp; 851 + struct bau_control *hmaster = NULL; 852 + struct bau_control *smaster = NULL; 853 + struct socket_desc { 854 + short num_cpus; 855 + short cpu_number[16]; 856 + }; 857 + struct uvhub_desc { 858 + short num_sockets; 859 + short num_cpus; 860 + short uvhub; 861 + short pnode; 862 + struct socket_desc socket[2]; 863 + }; 864 + struct uvhub_desc *uvhub_descs; 865 + 866 + uvhub_descs = (struct uvhub_desc *) 867 + kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); 868 + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); 869 + for_each_present_cpu(cpu) { 870 + bcp = &per_cpu(bau_control, cpu); 871 + memset(bcp, 0, sizeof(struct bau_control)); 872 + spin_lock_init(&bcp->masks_lock); 873 + bcp->max_concurrent = uv_bau_max_concurrent; 874 + pnode = uv_cpu_hub_info(cpu)->pnode; 875 + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 876 + bdp = &uvhub_descs[uvhub]; 877 + bdp->num_cpus++; 878 + bdp->uvhub = uvhub; 879 + bdp->pnode = pnode; 880 + /* time interval to catch a hardware stay-busy bug */ 881 + bcp->timeout_interval = millisec_2_cycles(3); 882 + /* kludge: assume uv_hub.h is constant */ 883 + socket = (cpu_physical_id(cpu)>>5)&1; 884 + if (socket >= bdp->num_sockets) 885 + bdp->num_sockets = socket+1; 886 + sdp = &bdp->socket[socket]; 887 + sdp->cpu_number[sdp->num_cpus] = cpu; 888 + sdp->num_cpus++; 889 + } 890 + socket = 0; 891 + for_each_possible_blade(uvhub) { 892 + bdp = &uvhub_descs[uvhub]; 893 + for (i = 0; i < bdp->num_sockets; i++) { 894 + sdp = &bdp->socket[i]; 895 + for (j = 0; j < sdp->num_cpus; j++) { 896 + cpu = sdp->cpu_number[j]; 897 + bcp = &per_cpu(bau_control, cpu); 898 + bcp->cpu = cpu; 899 + if (j == 0) { 900 + smaster = bcp; 901 + if (i == 0) 902 + hmaster = bcp; 903 + } 904 + bcp->cpus_in_uvhub = bdp->num_cpus; 905 + bcp->cpus_in_socket = sdp->num_cpus; 906 + bcp->socket_master = smaster; 907 + bcp->uvhub_master = hmaster; 908 + for (k = 0; k < DEST_Q_SIZE; k++) 909 + bcp->socket_acknowledge_count[k] = 0; 910 + bcp->uvhub_cpu = 911 + uv_cpu_hub_info(cpu)->blade_processor_id; 912 + } 913 + socket++; 914 + } 915 + } 916 + kfree(uvhub_descs); 917 } 918 919 /* ··· 829 */ 830 static int __init uv_bau_init(void) 831 { 832 + int uvhub; 833 + int pnode; 834 + int nuvhubs; 835 int cur_cpu; 836 + int vector; 837 + unsigned long mmr; 838 839 if (!is_uv_system()) 840 + return 0; 841 + 842 + if (nobau) 843 return 0; 844 845 for_each_possible_cpu(cur_cpu) 846 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), 847 GFP_KERNEL, cpu_to_node(cur_cpu)); 848 849 + uv_bau_max_concurrent = MAX_BAU_CONCURRENT; 850 + uv_nshift = uv_hub_info->m_val; 851 uv_mmask = (1UL << uv_hub_info->m_val) - 1; 852 + nuvhubs = uv_num_possible_blades(); 853 854 + uv_init_per_cpu(nuvhubs); 855 856 uv_partition_base_pnode = 0x7fffffff; 857 + for (uvhub = 0; uvhub < nuvhubs; uvhub++) 858 + if (uv_blade_nr_possible_cpus(uvhub) && 859 + (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 860 + uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 861 862 + vector = UV_BAU_MESSAGE; 863 + for_each_possible_blade(uvhub) 864 + if (uv_blade_nr_possible_cpus(uvhub)) 865 + uv_init_uvhub(uvhub, vector); 866 + 867 uv_enable_timeouts(); 868 + alloc_intr_gate(vector, uv_bau_message_intr1); 869 + 870 + for_each_possible_blade(uvhub) { 871 + pnode = uv_blade_to_pnode(uvhub); 872 + /* INIT the bau */ 873 + uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, 874 + ((unsigned long)1 << 63)); 875 + mmr = 1; /* should be 1 to broadcast to both sockets */ 876 + uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); 877 + } 878 879 return 0; 880 } 881 + core_initcall(uv_bau_init); 882 + core_initcall(uv_ptc_init);