Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86, UV: Clean up uv_tlb.c

SGI UV's uv_tlb.c driver has become rather hard to read, with overly large
functions, non-standard coding style and (way) too long variable, constant
and function names and non-obvious code flow sequences.

This patch improves the readability and maintainability of the driver
significantly, by doing the following strict code cleanups with no side
effects:

- Split long functions into shorter logical functions.

- Shortened some variable and structure member names.

- Added special functions for reads and writes of MMR regs with
very long names.

- Added the 'tunables' table to shortened tunables_write().

- Added the 'stat_description' table to shorten uv_ptc_proc_write().

- Pass fewer 'stat' arguments where it can be derived from the 'bcp'
argument.

- Function definitions consistent on one line, and inline in few (short) cases.

- Moved some small structures and an atomic inline function to the header file.

- Moved some local variables to the blocks where they are used.

- Updated the copyright date.

- Shortened uv_write_global_mmr64() etc. using some aliasing; no
line breaks. Renamed many uv_.. functions that are not exported.

- Aligned structure fields.
[ note that not all structures are aligned the same way though; I'd like
to keep the extensive commenting in some of them. ]

- Shortened some long structure names.

- Standard pass/fail exit from init_per_cpu()

- Vertical alignment for mass initializations.

- More separation between blocks of code.

Tested on a 16-processor Altix UV.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: penberg@kernel.org
Link: http://lkml.kernel.org/r/E1QOw12-0004MN-Lp@eag09.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Cliff Wickman and committed by
Ingo Molnar
f073cc8f 2a919596

+1106 -906
+350 -202
arch/x86/include/asm/uv/uv_bau.h
··· 5 5 * 6 6 * SGI UV Broadcast Assist Unit definitions 7 7 * 8 - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. 8 + * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved. 9 9 */ 10 10 11 11 #ifndef _ASM_X86_UV_UV_BAU_H ··· 35 35 36 36 #define MAX_CPUS_PER_UVHUB 64 37 37 #define MAX_CPUS_PER_SOCKET 32 38 - #define UV_ADP_SIZE 64 /* hardware-provided max. */ 39 - #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ 40 - #define UV_ITEMS_PER_DESCRIPTOR 8 38 + #define ADP_SZ 64 /* hardware-provided max. */ 39 + #define UV_CPUS_PER_AS 32 /* hardware-provided max. */ 40 + #define ITEMS_PER_DESC 8 41 41 /* the 'throttle' to prevent the hardware stay-busy bug */ 42 42 #define MAX_BAU_CONCURRENT 3 43 43 #define UV_ACT_STATUS_MASK 0x3 ··· 48 48 #define UV2_NET_ENDPOINT_INTD 0x28 49 49 #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ 50 50 UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) 51 - #define UV_DESC_BASE_PNODE_SHIFT 49 51 + #define UV_DESC_PSHIFT 49 52 52 #define UV_PAYLOADQ_PNODE_SHIFT 49 53 53 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" 54 54 #define UV_BAU_BASENAME "sgi_uv/bau_tunables" ··· 56 56 #define UV_BAU_TUNABLES_FILE "bau_tunables" 57 57 #define WHITESPACE " \t\n" 58 58 #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) 59 - 59 + #define cpubit_isset(cpu, bau_local_cpumask) \ 60 + test_bit((cpu), (bau_local_cpumask).bits) 60 61 61 62 /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ 62 63 /* ··· 73 72 UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ 74 73 UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) 75 74 76 - #define BAU_MISC_CONTROL_MULT_MASK 3 75 + #define BAU_MISC_CONTROL_MULT_MASK 3 77 76 78 - #define UVH_AGING_PRESCALE_SEL 0x000000b000UL 77 + #define UVH_AGING_PRESCALE_SEL 0x000000b000UL 79 78 /* [30:28] URGENCY_7 an index into a table of times */ 80 - #define BAU_URGENCY_7_SHIFT 28 81 - #define BAU_URGENCY_7_MASK 7 79 + #define BAU_URGENCY_7_SHIFT 28 80 + #define BAU_URGENCY_7_MASK 7 82 81 83 - #define UVH_TRANSACTION_TIMEOUT 0x000000b200UL 82 + #define UVH_TRANSACTION_TIMEOUT 0x000000b200UL 84 83 /* [45:40] BAU - BAU transaction timeout select - a multiplier */ 85 - #define BAU_TRANS_SHIFT 40 86 - #define BAU_TRANS_MASK 0x3f 84 + #define BAU_TRANS_SHIFT 40 85 + #define BAU_TRANS_MASK 0x3f 86 + 87 + /* 88 + * shorten some awkward names 89 + */ 90 + #define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 91 + #define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 92 + #define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 93 + #define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 94 + #define write_gmmr uv_write_global_mmr64 95 + #define write_lmmr uv_write_local_mmr 96 + #define read_lmmr uv_read_local_mmr 97 + #define read_gmmr uv_read_global_mmr64 87 98 88 99 /* 89 100 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 90 101 */ 91 - #define DESC_STATUS_IDLE 0 92 - #define DESC_STATUS_ACTIVE 1 93 - #define DESC_STATUS_DESTINATION_TIMEOUT 2 94 - #define DESC_STATUS_SOURCE_TIMEOUT 3 102 + #define DS_IDLE 0 103 + #define DS_ACTIVE 1 104 + #define DS_DESTINATION_TIMEOUT 2 105 + #define DS_SOURCE_TIMEOUT 3 95 106 /* 96 107 * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 97 108 * values 1 and 5 will not occur ··· 124 111 * threshholds at which to use IPI to free resources 125 112 */ 126 113 /* after this # consecutive 'plugged' timeouts, use IPI to release resources */ 127 - #define PLUGSB4RESET 100 114 + #define PLUGSB4RESET 100 128 115 /* after this many consecutive timeouts, use IPI to release resources */ 129 - #define TIMEOUTSB4RESET 1 116 + #define TIMEOUTSB4RESET 1 130 117 /* at this number uses of IPI to release resources, giveup the request */ 131 - #define IPI_RESET_LIMIT 1 118 + #define IPI_RESET_LIMIT 1 132 119 /* after this # consecutive successes, bump up the throttle if it was lowered */ 133 - #define COMPLETE_THRESHOLD 5 120 + #define COMPLETE_THRESHOLD 5 134 121 135 - #define UV_LB_SUBNODEID 0x10 122 + #define UV_LB_SUBNODEID 0x10 136 123 137 124 /* these two are the same for UV1 and UV2: */ 138 125 #define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 139 126 #define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 140 127 /* 4 bits of software ack period */ 141 - #define UV2_ACK_MASK 0x7UL 142 - #define UV2_ACK_UNITS_SHFT 3 128 + #define UV2_ACK_MASK 0x7UL 129 + #define UV2_ACK_UNITS_SHFT 3 143 130 #define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 144 131 #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 145 132 ··· 162 149 /* 163 150 * tuning the action when the numalink network is extremely delayed 164 151 */ 165 - #define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ 166 - #define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ 167 - #define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ 152 + #define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in 153 + microseconds */ 154 + #define CONGESTED_REPS 10 /* long delays averaged over 155 + this many broadcasts */ 156 + #define CONGESTED_PERIOD 30 /* time for the bau to be 157 + disabled, in seconds */ 158 + /* see msg_type: */ 159 + #define MSG_NOOP 0 160 + #define MSG_REGULAR 1 161 + #define MSG_RETRY 2 168 162 169 163 /* 170 164 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) ··· 183 163 * 'base_dest_nasid' field of the header corresponds to the 184 164 * destination nodeID associated with that specified bit. 185 165 */ 186 - struct bau_target_uvhubmask { 187 - unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; 166 + struct bau_targ_hubmask { 167 + unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; 188 168 }; 189 169 190 170 /* ··· 193 173 * enough bits for max. cpu's per uvhub) 194 174 */ 195 175 struct bau_local_cpumask { 196 - unsigned long bits; 176 + unsigned long bits; 197 177 }; 198 178 199 179 /* ··· 214 194 * The payload is software-defined for INTD transactions 215 195 */ 216 196 struct bau_msg_payload { 217 - unsigned long address; /* signifies a page or all TLB's 218 - of the cpu */ 197 + unsigned long address; /* signifies a page or all 198 + TLB's of the cpu */ 219 199 /* 64 bits */ 220 - unsigned short sending_cpu; /* filled in by sender */ 200 + unsigned short sending_cpu; /* filled in by sender */ 221 201 /* 16 bits */ 222 - unsigned short acknowledge_count;/* filled in by destination */ 202 + unsigned short acknowledge_count; /* filled in by destination */ 223 203 /* 16 bits */ 224 - unsigned int reserved1:32; /* not usable */ 204 + unsigned int reserved1:32; /* not usable */ 225 205 }; 226 206 227 207 ··· 230 210 * see table 4.2.3.0.1 in broacast_assist spec. 231 211 */ 232 212 struct bau_msg_header { 233 - unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 213 + unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 234 214 /* bits 5:0 */ 235 - unsigned int base_dest_nasid:15; /* nasid of the */ 236 - /* bits 20:6 */ /* first bit in uvhub map */ 237 - unsigned int command:8; /* message type */ 215 + unsigned int base_dest_nasid:15; /* nasid of the first bit */ 216 + /* bits 20:6 */ /* in uvhub map */ 217 + unsigned int command:8; /* message type */ 238 218 /* bits 28:21 */ 239 - /* 0x38: SN3net EndPoint Message */ 240 - unsigned int rsvd_1:3; /* must be zero */ 219 + /* 0x38: SN3net EndPoint Message */ 220 + unsigned int rsvd_1:3; /* must be zero */ 241 221 /* bits 31:29 */ 242 - /* int will align on 32 bits */ 243 - unsigned int rsvd_2:9; /* must be zero */ 222 + /* int will align on 32 bits */ 223 + unsigned int rsvd_2:9; /* must be zero */ 244 224 /* bits 40:32 */ 245 - /* Suppl_A is 56-41 */ 246 - unsigned int sequence:16;/* message sequence number */ 247 - /* bits 56:41 */ /* becomes bytes 16-17 of msg */ 248 - /* Address field (96:57) is never used as an 249 - address (these are address bits 42:3) */ 225 + /* Suppl_A is 56-41 */ 226 + unsigned int sequence:16; /* message sequence number */ 227 + /* bits 56:41 */ /* becomes bytes 16-17 of msg */ 228 + /* Address field (96:57) is 229 + never used as an address 230 + (these are address bits 231 + 42:3) */ 250 232 251 - unsigned int rsvd_3:1; /* must be zero */ 233 + unsigned int rsvd_3:1; /* must be zero */ 252 234 /* bit 57 */ 253 - /* address bits 27:4 are payload */ 235 + /* address bits 27:4 are payload */ 254 236 /* these next 24 (58-81) bits become bytes 12-14 of msg */ 255 - 256 237 /* bits 65:58 land in byte 12 */ 257 - unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ 238 + unsigned int replied_to:1; /* sent as 0 by the source to 239 + byte 12 */ 258 240 /* bit 58 */ 259 - unsigned int msg_type:3; /* software type of the message*/ 241 + unsigned int msg_type:3; /* software type of the 242 + message */ 260 243 /* bits 61:59 */ 261 - unsigned int canceled:1; /* message canceled, resource to be freed*/ 244 + unsigned int canceled:1; /* message canceled, resource 245 + is to be freed*/ 262 246 /* bit 62 */ 263 - unsigned int payload_1a:1;/* not currently used */ 247 + unsigned int payload_1a:1; /* not currently used */ 264 248 /* bit 63 */ 265 - unsigned int payload_1b:2;/* not currently used */ 249 + unsigned int payload_1b:2; /* not currently used */ 266 250 /* bits 65:64 */ 267 251 268 252 /* bits 73:66 land in byte 13 */ 269 - unsigned int payload_1ca:6;/* not currently used */ 253 + unsigned int payload_1ca:6; /* not currently used */ 270 254 /* bits 71:66 */ 271 - unsigned int payload_1c:2;/* not currently used */ 255 + unsigned int payload_1c:2; /* not currently used */ 272 256 /* bits 73:72 */ 273 257 274 258 /* bits 81:74 land in byte 14 */ 275 - unsigned int payload_1d:6;/* not currently used */ 259 + unsigned int payload_1d:6; /* not currently used */ 276 260 /* bits 79:74 */ 277 - unsigned int payload_1e:2;/* not currently used */ 261 + unsigned int payload_1e:2; /* not currently used */ 278 262 /* bits 81:80 */ 279 263 280 - unsigned int rsvd_4:7; /* must be zero */ 264 + unsigned int rsvd_4:7; /* must be zero */ 281 265 /* bits 88:82 */ 282 - unsigned int sw_ack_flag:1;/* software acknowledge flag */ 266 + unsigned int swack_flag:1; /* software acknowledge flag */ 283 267 /* bit 89 */ 284 - /* INTD trasactions at destination are to 285 - wait for software acknowledge */ 286 - unsigned int rsvd_5:6; /* must be zero */ 268 + /* INTD trasactions at 269 + destination are to wait for 270 + software acknowledge */ 271 + unsigned int rsvd_5:6; /* must be zero */ 287 272 /* bits 95:90 */ 288 - unsigned int rsvd_6:5; /* must be zero */ 273 + unsigned int rsvd_6:5; /* must be zero */ 289 274 /* bits 100:96 */ 290 - unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ 275 + unsigned int int_both:1; /* if 1, interrupt both sockets 276 + on the uvhub */ 291 277 /* bit 101*/ 292 - unsigned int fairness:3;/* usually zero */ 278 + unsigned int fairness:3; /* usually zero */ 293 279 /* bits 104:102 */ 294 - unsigned int multilevel:1; /* multi-level multicast format */ 280 + unsigned int multilevel:1; /* multi-level multicast 281 + format */ 295 282 /* bit 105 */ 296 - /* 0 for TLB: endpoint multi-unicast messages */ 297 - unsigned int chaining:1;/* next descriptor is part of this activation*/ 283 + /* 0 for TLB: endpoint multi-unicast messages */ 284 + unsigned int chaining:1; /* next descriptor is part of 285 + this activation*/ 298 286 /* bit 106 */ 299 - unsigned int rsvd_7:21; /* must be zero */ 287 + unsigned int rsvd_7:21; /* must be zero */ 300 288 /* bits 127:107 */ 301 289 }; 302 - 303 - /* see msg_type: */ 304 - #define MSG_NOOP 0 305 - #define MSG_REGULAR 1 306 - #define MSG_RETRY 2 307 290 308 291 /* 309 292 * The activation descriptor: ··· 314 291 * Should be 64 bytes 315 292 */ 316 293 struct bau_desc { 317 - struct bau_target_uvhubmask distribution; 294 + struct bau_targ_hubmask distribution; 318 295 /* 319 296 * message template, consisting of header and payload: 320 297 */ 321 - struct bau_msg_header header; 322 - struct bau_msg_payload payload; 298 + struct bau_msg_header header; 299 + struct bau_msg_payload payload; 323 300 }; 324 301 /* 325 302 * -payload-- ---------header------ ··· 338 315 * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 339 316 * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) 340 317 * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from 341 - * sw_ack_vector and payload_2) 318 + * swack_vec and payload_2) 342 319 * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software 343 320 * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload 344 321 * operation." 345 322 */ 346 - struct bau_payload_queue_entry { 347 - unsigned long address; /* signifies a page or all TLB's 348 - of the cpu */ 323 + struct bau_pq_entry { 324 + unsigned long address; /* signifies a page or all TLB's 325 + of the cpu */ 349 326 /* 64 bits, bytes 0-7 */ 350 - 351 - unsigned short sending_cpu; /* cpu that sent the message */ 327 + unsigned short sending_cpu; /* cpu that sent the message */ 352 328 /* 16 bits, bytes 8-9 */ 353 - 354 - unsigned short acknowledge_count; /* filled in by destination */ 329 + unsigned short acknowledge_count; /* filled in by destination */ 355 330 /* 16 bits, bytes 10-11 */ 356 - 357 331 /* these next 3 bytes come from bits 58-81 of the message header */ 358 - unsigned short replied_to:1; /* sent as 0 by the source */ 359 - unsigned short msg_type:3; /* software message type */ 360 - unsigned short canceled:1; /* sent as 0 by the source */ 361 - unsigned short unused1:3; /* not currently using */ 332 + unsigned short replied_to:1; /* sent as 0 by the source */ 333 + unsigned short msg_type:3; /* software message type */ 334 + unsigned short canceled:1; /* sent as 0 by the source */ 335 + unsigned short unused1:3; /* not currently using */ 362 336 /* byte 12 */ 363 - 364 - unsigned char unused2a; /* not currently using */ 337 + unsigned char unused2a; /* not currently using */ 365 338 /* byte 13 */ 366 - unsigned char unused2; /* not currently using */ 339 + unsigned char unused2; /* not currently using */ 367 340 /* byte 14 */ 368 - 369 - unsigned char sw_ack_vector; /* filled in by the hardware */ 341 + unsigned char swack_vec; /* filled in by the hardware */ 370 342 /* byte 15 (bits 127:120) */ 371 - 372 - unsigned short sequence; /* message sequence number */ 343 + unsigned short sequence; /* message sequence number */ 373 344 /* bytes 16-17 */ 374 - unsigned char unused4[2]; /* not currently using bytes 18-19 */ 345 + unsigned char unused4[2]; /* not currently using bytes 18-19 */ 375 346 /* bytes 18-19 */ 376 - 377 - int number_of_cpus; /* filled in at destination */ 347 + int number_of_cpus; /* filled in at destination */ 378 348 /* 32 bits, bytes 20-23 (aligned) */ 379 - 380 - unsigned char unused5[8]; /* not using */ 349 + unsigned char unused5[8]; /* not using */ 381 350 /* bytes 24-31 */ 382 351 }; 383 352 384 353 struct msg_desc { 385 - struct bau_payload_queue_entry *msg; 386 - int msg_slot; 387 - int sw_ack_slot; 388 - struct bau_payload_queue_entry *va_queue_first; 389 - struct bau_payload_queue_entry *va_queue_last; 354 + struct bau_pq_entry *msg; 355 + int msg_slot; 356 + int swack_slot; 357 + struct bau_pq_entry *queue_first; 358 + struct bau_pq_entry *queue_last; 390 359 }; 391 360 392 361 struct reset_args { 393 - int sender; 362 + int sender; 394 363 }; 395 364 396 365 /* ··· 390 375 */ 391 376 struct ptc_stats { 392 377 /* sender statistics */ 393 - unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ 394 - unsigned long s_requestor; /* number of shootdown requests */ 395 - unsigned long s_stimeout; /* source side timeouts */ 396 - unsigned long s_dtimeout; /* destination side timeouts */ 397 - unsigned long s_time; /* time spent in sending side */ 398 - unsigned long s_retriesok; /* successful retries */ 399 - unsigned long s_ntargcpu; /* total number of cpu's targeted */ 400 - unsigned long s_ntargself; /* times the sending cpu was targeted */ 401 - unsigned long s_ntarglocals; /* targets of cpus on the local blade */ 402 - unsigned long s_ntargremotes; /* targets of cpus on remote blades */ 403 - unsigned long s_ntarglocaluvhub; /* targets of the local hub */ 404 - unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ 405 - unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ 406 - unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ 407 - unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ 408 - unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ 409 - unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ 410 - unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */ 411 - unsigned long s_resets_plug; /* ipi-style resets from plug state */ 412 - unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ 413 - unsigned long s_busy; /* status stayed busy past s/w timer */ 414 - unsigned long s_throttles; /* waits in throttle */ 415 - unsigned long s_retry_messages; /* retry broadcasts */ 416 - unsigned long s_bau_reenabled; /* for bau enable/disable */ 417 - unsigned long s_bau_disabled; /* for bau enable/disable */ 378 + unsigned long s_giveup; /* number of fall backs to 379 + IPI-style flushes */ 380 + unsigned long s_requestor; /* number of shootdown 381 + requests */ 382 + unsigned long s_stimeout; /* source side timeouts */ 383 + unsigned long s_dtimeout; /* destination side timeouts */ 384 + unsigned long s_time; /* time spent in sending side */ 385 + unsigned long s_retriesok; /* successful retries */ 386 + unsigned long s_ntargcpu; /* total number of cpu's 387 + targeted */ 388 + unsigned long s_ntargself; /* times the sending cpu was 389 + targeted */ 390 + unsigned long s_ntarglocals; /* targets of cpus on the local 391 + blade */ 392 + unsigned long s_ntargremotes; /* targets of cpus on remote 393 + blades */ 394 + unsigned long s_ntarglocaluvhub; /* targets of the local hub */ 395 + unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ 396 + unsigned long s_ntarguvhub; /* total number of uvhubs 397 + targeted */ 398 + unsigned long s_ntarguvhub16; /* number of times target 399 + hubs >= 16*/ 400 + unsigned long s_ntarguvhub8; /* number of times target 401 + hubs >= 8 */ 402 + unsigned long s_ntarguvhub4; /* number of times target 403 + hubs >= 4 */ 404 + unsigned long s_ntarguvhub2; /* number of times target 405 + hubs >= 2 */ 406 + unsigned long s_ntarguvhub1; /* number of times target 407 + hubs == 1 */ 408 + unsigned long s_resets_plug; /* ipi-style resets from plug 409 + state */ 410 + unsigned long s_resets_timeout; /* ipi-style resets from 411 + timeouts */ 412 + unsigned long s_busy; /* status stayed busy past 413 + s/w timer */ 414 + unsigned long s_throttles; /* waits in throttle */ 415 + unsigned long s_retry_messages; /* retry broadcasts */ 416 + unsigned long s_bau_reenabled; /* for bau enable/disable */ 417 + unsigned long s_bau_disabled; /* for bau enable/disable */ 418 418 /* destination statistics */ 419 - unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ 420 - unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ 421 - unsigned long d_multmsg; /* interrupts with multiple messages */ 422 - unsigned long d_nomsg; /* interrupts with no message */ 423 - unsigned long d_time; /* time spent on destination side */ 424 - unsigned long d_requestee; /* number of messages processed */ 425 - unsigned long d_retries; /* number of retry messages processed */ 426 - unsigned long d_canceled; /* number of messages canceled by retries */ 427 - unsigned long d_nocanceled; /* retries that found nothing to cancel */ 428 - unsigned long d_resets; /* number of ipi-style requests processed */ 429 - unsigned long d_rcanceled; /* number of messages canceled by resets */ 419 + unsigned long d_alltlb; /* times all tlb's on this 420 + cpu were flushed */ 421 + unsigned long d_onetlb; /* times just one tlb on this 422 + cpu was flushed */ 423 + unsigned long d_multmsg; /* interrupts with multiple 424 + messages */ 425 + unsigned long d_nomsg; /* interrupts with no message */ 426 + unsigned long d_time; /* time spent on destination 427 + side */ 428 + unsigned long d_requestee; /* number of messages 429 + processed */ 430 + unsigned long d_retries; /* number of retry messages 431 + processed */ 432 + unsigned long d_canceled; /* number of messages canceled 433 + by retries */ 434 + unsigned long d_nocanceled; /* retries that found nothing 435 + to cancel */ 436 + unsigned long d_resets; /* number of ipi-style requests 437 + processed */ 438 + unsigned long d_rcanceled; /* number of messages canceled 439 + by resets */ 440 + }; 441 + 442 + struct tunables { 443 + int *tunp; 444 + int deflt; 430 445 }; 431 446 432 447 struct hub_and_pnode { 433 - short uvhub; 434 - short pnode; 448 + short uvhub; 449 + short pnode; 435 450 }; 451 + 452 + struct socket_desc { 453 + short num_cpus; 454 + short cpu_number[MAX_CPUS_PER_SOCKET]; 455 + }; 456 + 457 + struct uvhub_desc { 458 + unsigned short socket_mask; 459 + short num_cpus; 460 + short uvhub; 461 + short pnode; 462 + struct socket_desc socket[2]; 463 + }; 464 + 436 465 /* 437 466 * one per-cpu; to locate the software tables 438 467 */ 439 468 struct bau_control { 440 - struct bau_desc *descriptor_base; 441 - struct bau_payload_queue_entry *va_queue_first; 442 - struct bau_payload_queue_entry *va_queue_last; 443 - struct bau_payload_queue_entry *bau_msg_head; 444 - struct bau_control *uvhub_master; 445 - struct bau_control *socket_master; 446 - struct ptc_stats *statp; 447 - unsigned long timeout_interval; 448 - unsigned long set_bau_on_time; 449 - atomic_t active_descriptor_count; 450 - int plugged_tries; 451 - int timeout_tries; 452 - int ipi_attempts; 453 - int conseccompletes; 454 - int baudisabled; 455 - int set_bau_off; 456 - short cpu; 457 - short osnode; 458 - short uvhub_cpu; 459 - short uvhub; 460 - short cpus_in_socket; 461 - short cpus_in_uvhub; 462 - short partition_base_pnode; 463 - unsigned short message_number; 464 - unsigned short uvhub_quiesce; 465 - short socket_acknowledge_count[DEST_Q_SIZE]; 466 - cycles_t send_message; 467 - spinlock_t uvhub_lock; 468 - spinlock_t queue_lock; 469 + struct bau_desc *descriptor_base; 470 + struct bau_pq_entry *queue_first; 471 + struct bau_pq_entry *queue_last; 472 + struct bau_pq_entry *bau_msg_head; 473 + struct bau_control *uvhub_master; 474 + struct bau_control *socket_master; 475 + struct ptc_stats *statp; 476 + unsigned long timeout_interval; 477 + unsigned long set_bau_on_time; 478 + atomic_t active_descriptor_count; 479 + int plugged_tries; 480 + int timeout_tries; 481 + int ipi_attempts; 482 + int conseccompletes; 483 + int baudisabled; 484 + int set_bau_off; 485 + short cpu; 486 + short osnode; 487 + short uvhub_cpu; 488 + short uvhub; 489 + short cpus_in_socket; 490 + short cpus_in_uvhub; 491 + short partition_base_pnode; 492 + unsigned short message_number; 493 + unsigned short uvhub_quiesce; 494 + short socket_acknowledge_count[DEST_Q_SIZE]; 495 + cycles_t send_message; 496 + spinlock_t uvhub_lock; 497 + spinlock_t queue_lock; 469 498 /* tunables */ 470 - int max_bau_concurrent; 471 - int max_bau_concurrent_constant; 472 - int plugged_delay; 473 - int plugsb4reset; 474 - int timeoutsb4reset; 475 - int ipi_reset_limit; 476 - int complete_threshold; 477 - int congested_response_us; 478 - int congested_reps; 479 - int congested_period; 480 - cycles_t period_time; 481 - long period_requests; 482 - struct hub_and_pnode *target_hub_and_pnode; 499 + int max_concurr; 500 + int max_concurr_const; 501 + int plugged_delay; 502 + int plugsb4reset; 503 + int timeoutsb4reset; 504 + int ipi_reset_limit; 505 + int complete_threshold; 506 + int cong_response_us; 507 + int cong_reps; 508 + int cong_period; 509 + cycles_t period_time; 510 + long period_requests; 511 + struct hub_and_pnode *thp; 483 512 }; 484 513 485 - static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 514 + static unsigned long read_mmr_uv2_status(void) 515 + { 516 + return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2); 517 + } 518 + 519 + static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) 520 + { 521 + write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); 522 + } 523 + 524 + static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) 525 + { 526 + write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); 527 + } 528 + 529 + static void write_mmr_activation(unsigned long index) 530 + { 531 + write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 532 + } 533 + 534 + static void write_gmmr_activation(int pnode, unsigned long mmr_image) 535 + { 536 + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); 537 + } 538 + 539 + static void write_mmr_payload_first(int pnode, unsigned long mmr_image) 540 + { 541 + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); 542 + } 543 + 544 + static void write_mmr_payload_tail(int pnode, unsigned long mmr_image) 545 + { 546 + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); 547 + } 548 + 549 + static void write_mmr_payload_last(int pnode, unsigned long mmr_image) 550 + { 551 + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); 552 + } 553 + 554 + static void write_mmr_misc_control(int pnode, unsigned long mmr_image) 555 + { 556 + write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 557 + } 558 + 559 + static unsigned long read_mmr_misc_control(int pnode) 560 + { 561 + return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); 562 + } 563 + 564 + static void write_mmr_sw_ack(unsigned long mr) 565 + { 566 + uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); 567 + } 568 + 569 + static unsigned long read_mmr_sw_ack(void) 570 + { 571 + return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 572 + } 573 + 574 + static unsigned long read_gmmr_sw_ack(int pnode) 575 + { 576 + return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 577 + } 578 + 579 + static void write_mmr_data_config(int pnode, unsigned long mr) 580 + { 581 + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); 582 + } 583 + 584 + static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp) 486 585 { 487 586 return constant_test_bit(uvhub, &dstp->bits[0]); 488 587 } 489 - static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) 588 + static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp) 490 589 { 491 590 __set_bit(pnode, &dstp->bits[0]); 492 591 } 493 - static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 592 + static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp, 494 593 int nbits) 495 594 { 496 595 bitmap_zero(&dstp->bits[0], nbits); 497 596 } 498 - static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) 597 + static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp) 499 598 { 500 599 return bitmap_weight((unsigned long *)&dstp->bits[0], 501 600 UV_DISTRIBUTION_SIZE); ··· 620 491 bitmap_zero(&dstp->bits, nbits); 621 492 } 622 493 623 - #define cpubit_isset(cpu, bau_local_cpumask) \ 624 - test_bit((cpu), (bau_local_cpumask).bits) 625 - 626 494 extern void uv_bau_message_intr1(void); 627 495 extern void uv_bau_timeout_intr1(void); 628 496 ··· 627 501 short counter; 628 502 }; 629 503 630 - /** 504 + /* 631 505 * atomic_read_short - read a short atomic variable 632 506 * @v: pointer of type atomic_short 633 507 * ··· 638 512 return v->counter; 639 513 } 640 514 641 - /** 642 - * atomic_add_short_return - add and return a short int 515 + /* 516 + * atom_asr - add and return a short int 643 517 * @i: short value to add 644 518 * @v: pointer of type atomic_short 645 519 * 646 520 * Atomically adds @i to @v and returns @i + @v 647 521 */ 648 - static inline int atomic_add_short_return(short i, struct atomic_short *v) 522 + static inline int atom_asr(short i, struct atomic_short *v) 649 523 { 650 524 short __i = i; 651 525 asm volatile(LOCK_PREFIX "xaddw %0, %1" 652 526 : "+r" (i), "+m" (v->counter) 653 527 : : "memory"); 654 528 return i + __i; 529 + } 530 + 531 + /* 532 + * conditionally add 1 to *v, unless *v is >= u 533 + * return 0 if we cannot add 1 to *v because it is >= u 534 + * return 1 if we can add 1 to *v because it is < u 535 + * the add is atomic 536 + * 537 + * This is close to atomic_add_unless(), but this allows the 'u' value 538 + * to be lowered below the current 'v'. atomic_add_unless can only stop 539 + * on equal. 540 + */ 541 + static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) 542 + { 543 + spin_lock(lock); 544 + if (atomic_read(v) >= u) { 545 + spin_unlock(lock); 546 + return 0; 547 + } 548 + atomic_inc(v); 549 + spin_unlock(lock); 550 + return 1; 655 551 } 656 552 657 553 #endif /* _ASM_X86_UV_UV_BAU_H */
+756 -704
arch/x86/platform/uv/tlb_uv.c
··· 1 1 /* 2 2 * SGI UltraViolet TLB flush routines. 3 3 * 4 - * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. 4 + * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. 5 5 * 6 6 * This code is released under the GNU General Public License version 2 or 7 7 * later. ··· 35 35 5242880, 36 36 167772160 37 37 }; 38 + 38 39 static int timeout_us; 39 40 static int nobau; 40 41 static int baudisabled; ··· 43 42 static cycles_t congested_cycles; 44 43 45 44 /* tunables: */ 46 - static int max_bau_concurrent = MAX_BAU_CONCURRENT; 47 - static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; 48 - static int plugged_delay = PLUGGED_DELAY; 49 - static int plugsb4reset = PLUGSB4RESET; 50 - static int timeoutsb4reset = TIMEOUTSB4RESET; 51 - static int ipi_reset_limit = IPI_RESET_LIMIT; 52 - static int complete_threshold = COMPLETE_THRESHOLD; 53 - static int congested_response_us = CONGESTED_RESPONSE_US; 54 - static int congested_reps = CONGESTED_REPS; 55 - static int congested_period = CONGESTED_PERIOD; 45 + static int max_concurr = MAX_BAU_CONCURRENT; 46 + static int max_concurr_const = MAX_BAU_CONCURRENT; 47 + static int plugged_delay = PLUGGED_DELAY; 48 + static int plugsb4reset = PLUGSB4RESET; 49 + static int timeoutsb4reset = TIMEOUTSB4RESET; 50 + static int ipi_reset_limit = IPI_RESET_LIMIT; 51 + static int complete_threshold = COMPLETE_THRESHOLD; 52 + static int congested_respns_us = CONGESTED_RESPONSE_US; 53 + static int congested_reps = CONGESTED_REPS; 54 + static int congested_period = CONGESTED_PERIOD; 55 + 56 + static struct tunables tunables[] = { 57 + {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ 58 + {&plugged_delay, PLUGGED_DELAY}, 59 + {&plugsb4reset, PLUGSB4RESET}, 60 + {&timeoutsb4reset, TIMEOUTSB4RESET}, 61 + {&ipi_reset_limit, IPI_RESET_LIMIT}, 62 + {&complete_threshold, COMPLETE_THRESHOLD}, 63 + {&congested_respns_us, CONGESTED_RESPONSE_US}, 64 + {&congested_reps, CONGESTED_REPS}, 65 + {&congested_period, CONGESTED_PERIOD} 66 + }; 67 + 56 68 static struct dentry *tunables_dir; 57 69 static struct dentry *tunables_file; 58 70 59 - static int __init setup_nobau(char *arg) 71 + /* these correspond to the statistics printed by ptc_seq_show() */ 72 + static char *stat_description[] = { 73 + "sent: number of shootdown messages sent", 74 + "stime: time spent sending messages", 75 + "numuvhubs: number of hubs targeted with shootdown", 76 + "numuvhubs16: number times 16 or more hubs targeted", 77 + "numuvhubs8: number times 8 or more hubs targeted", 78 + "numuvhubs4: number times 4 or more hubs targeted", 79 + "numuvhubs2: number times 2 or more hubs targeted", 80 + "numuvhubs1: number times 1 hub targeted", 81 + "numcpus: number of cpus targeted with shootdown", 82 + "dto: number of destination timeouts", 83 + "retries: destination timeout retries sent", 84 + "rok: : destination timeouts successfully retried", 85 + "resetp: ipi-style resource resets for plugs", 86 + "resett: ipi-style resource resets for timeouts", 87 + "giveup: fall-backs to ipi-style shootdowns", 88 + "sto: number of source timeouts", 89 + "bz: number of stay-busy's", 90 + "throt: number times spun in throttle", 91 + "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", 92 + "recv: shootdown messages received", 93 + "rtime: time spent processing messages", 94 + "all: shootdown all-tlb messages", 95 + "one: shootdown one-tlb messages", 96 + "mult: interrupts that found multiple messages", 97 + "none: interrupts that found no messages", 98 + "retry: number of retry messages processed", 99 + "canc: number messages canceled by retries", 100 + "nocan: number retries that found nothing to cancel", 101 + "reset: number of ipi-style reset requests processed", 102 + "rcan: number messages canceled by reset requests", 103 + "disable: number times use of the BAU was disabled", 104 + "enable: number times use of the BAU was re-enabled" 105 + }; 106 + 107 + static int __init 108 + setup_nobau(char *arg) 60 109 { 61 110 nobau = 1; 62 111 return 0; ··· 114 63 early_param("nobau", setup_nobau); 115 64 116 65 /* base pnode in this partition */ 117 - static int uv_partition_base_pnode __read_mostly; 66 + static int uv_base_pnode __read_mostly; 118 67 /* position of pnode (which is nasid>>1): */ 119 68 static int uv_nshift __read_mostly; 120 69 static unsigned long uv_mmask __read_mostly; ··· 160 109 * clear of the Timeout bit (as well) will free the resource. No reply will 161 110 * be sent (the hardware will only do one reply per message). 162 111 */ 163 - static inline void uv_reply_to_message(struct msg_desc *mdp, 164 - struct bau_control *bcp) 112 + static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) 165 113 { 166 114 unsigned long dw; 167 - struct bau_payload_queue_entry *msg; 115 + struct bau_pq_entry *msg; 168 116 169 117 msg = mdp->msg; 170 118 if (!msg->canceled) { 171 - dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | 172 - msg->sw_ack_vector; 173 - uv_write_local_mmr( 174 - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); 119 + dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; 120 + write_mmr_sw_ack(dw); 175 121 } 176 122 msg->replied_to = 1; 177 - msg->sw_ack_vector = 0; 123 + msg->swack_vec = 0; 178 124 } 179 125 180 126 /* 181 127 * Process the receipt of a RETRY message 182 128 */ 183 - static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, 184 - struct bau_control *bcp) 129 + static void bau_process_retry_msg(struct msg_desc *mdp, 130 + struct bau_control *bcp) 185 131 { 186 132 int i; 187 133 int cancel_count = 0; 188 - int slot2; 189 134 unsigned long msg_res; 190 135 unsigned long mmr = 0; 191 - struct bau_payload_queue_entry *msg; 192 - struct bau_payload_queue_entry *msg2; 193 - struct ptc_stats *stat; 136 + struct bau_pq_entry *msg = mdp->msg; 137 + struct bau_pq_entry *msg2; 138 + struct ptc_stats *stat = bcp->statp; 194 139 195 - msg = mdp->msg; 196 - stat = bcp->statp; 197 140 stat->d_retries++; 198 141 /* 199 142 * cancel any message from msg+1 to the retry itself 200 143 */ 201 144 for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { 202 - if (msg2 > mdp->va_queue_last) 203 - msg2 = mdp->va_queue_first; 145 + if (msg2 > mdp->queue_last) 146 + msg2 = mdp->queue_first; 204 147 if (msg2 == msg) 205 148 break; 206 149 207 - /* same conditions for cancellation as uv_do_reset */ 150 + /* same conditions for cancellation as do_reset */ 208 151 if ((msg2->replied_to == 0) && (msg2->canceled == 0) && 209 - (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & 210 - msg->sw_ack_vector) == 0) && 152 + (msg2->swack_vec) && ((msg2->swack_vec & 153 + msg->swack_vec) == 0) && 211 154 (msg2->sending_cpu == msg->sending_cpu) && 212 155 (msg2->msg_type != MSG_NOOP)) { 213 - slot2 = msg2 - mdp->va_queue_first; 214 - mmr = uv_read_local_mmr 215 - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 216 - msg_res = msg2->sw_ack_vector; 156 + mmr = read_mmr_sw_ack(); 157 + msg_res = msg2->swack_vec; 217 158 /* 218 159 * This is a message retry; clear the resources held 219 160 * by the previous message only if they timed out. ··· 213 170 * situation to report. 214 171 */ 215 172 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { 173 + unsigned long mr; 216 174 /* 217 175 * is the resource timed out? 218 176 * make everyone ignore the cancelled message. ··· 221 177 msg2->canceled = 1; 222 178 stat->d_canceled++; 223 179 cancel_count++; 224 - uv_write_local_mmr( 225 - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, 226 - (msg_res << UV_SW_ACK_NPENDING) | 227 - msg_res); 180 + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; 181 + write_mmr_sw_ack(mr); 228 182 } 229 183 } 230 184 } ··· 234 192 * Do all the things a cpu should do for a TLB shootdown message. 235 193 * Other cpu's may come here at the same time for this message. 236 194 */ 237 - static void uv_bau_process_message(struct msg_desc *mdp, 238 - struct bau_control *bcp) 195 + static void bau_process_message(struct msg_desc *mdp, 196 + struct bau_control *bcp) 239 197 { 240 - int msg_ack_count; 241 198 short socket_ack_count = 0; 242 - struct ptc_stats *stat; 243 - struct bau_payload_queue_entry *msg; 199 + short *sp; 200 + struct atomic_short *asp; 201 + struct ptc_stats *stat = bcp->statp; 202 + struct bau_pq_entry *msg = mdp->msg; 244 203 struct bau_control *smaster = bcp->socket_master; 245 204 246 205 /* 247 206 * This must be a normal message, or retry of a normal message 248 207 */ 249 - msg = mdp->msg; 250 - stat = bcp->statp; 251 208 if (msg->address == TLB_FLUSH_ALL) { 252 209 local_flush_tlb(); 253 210 stat->d_alltlb++; ··· 263 222 * cpu number. 264 223 */ 265 224 if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) 266 - uv_bau_process_retry_msg(mdp, bcp); 225 + bau_process_retry_msg(mdp, bcp); 267 226 268 227 /* 269 - * This is a sw_ack message, so we have to reply to it. 228 + * This is a swack message, so we have to reply to it. 270 229 * Count each responding cpu on the socket. This avoids 271 230 * pinging the count's cache line back and forth between 272 231 * the sockets. 273 232 */ 274 - socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) 275 - &smaster->socket_acknowledge_count[mdp->msg_slot]); 233 + sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; 234 + asp = (struct atomic_short *)sp; 235 + socket_ack_count = atom_asr(1, asp); 276 236 if (socket_ack_count == bcp->cpus_in_socket) { 237 + int msg_ack_count; 277 238 /* 278 239 * Both sockets dump their completed count total into 279 240 * the message's count. 280 241 */ 281 242 smaster->socket_acknowledge_count[mdp->msg_slot] = 0; 282 - msg_ack_count = atomic_add_short_return(socket_ack_count, 283 - (struct atomic_short *)&msg->acknowledge_count); 243 + asp = (struct atomic_short *)&msg->acknowledge_count; 244 + msg_ack_count = atom_asr(socket_ack_count, asp); 284 245 285 246 if (msg_ack_count == bcp->cpus_in_uvhub) { 286 247 /* 287 248 * All cpus in uvhub saw it; reply 288 249 */ 289 - uv_reply_to_message(mdp, bcp); 250 + reply_to_message(mdp, bcp); 290 251 } 291 252 } 292 253 ··· 311 268 * Last resort when we get a large number of destination timeouts is 312 269 * to clear resources held by a given cpu. 313 270 * Do this with IPI so that all messages in the BAU message queue 314 - * can be identified by their nonzero sw_ack_vector field. 271 + * can be identified by their nonzero swack_vec field. 315 272 * 316 273 * This is entered for a single cpu on the uvhub. 317 274 * The sender want's this uvhub to free a specific message's 318 - * sw_ack resources. 275 + * swack resources. 319 276 */ 320 - static void 321 - uv_do_reset(void *ptr) 277 + static void do_reset(void *ptr) 322 278 { 323 279 int i; 324 - int slot; 325 - int count = 0; 326 - unsigned long mmr; 327 - unsigned long msg_res; 328 - struct bau_control *bcp; 329 - struct reset_args *rap; 330 - struct bau_payload_queue_entry *msg; 331 - struct ptc_stats *stat; 280 + struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); 281 + struct reset_args *rap = (struct reset_args *)ptr; 282 + struct bau_pq_entry *msg; 283 + struct ptc_stats *stat = bcp->statp; 332 284 333 - bcp = &per_cpu(bau_control, smp_processor_id()); 334 - rap = (struct reset_args *)ptr; 335 - stat = bcp->statp; 336 285 stat->d_resets++; 337 - 338 286 /* 339 287 * We're looking for the given sender, and 340 - * will free its sw_ack resource. 288 + * will free its swack resource. 341 289 * If all cpu's finally responded after the timeout, its 342 290 * message 'replied_to' was set. 343 291 */ 344 - for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { 345 - /* uv_do_reset: same conditions for cancellation as 346 - uv_bau_process_retry_msg() */ 292 + for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { 293 + unsigned long msg_res; 294 + /* do_reset: same conditions for cancellation as 295 + bau_process_retry_msg() */ 347 296 if ((msg->replied_to == 0) && 348 297 (msg->canceled == 0) && 349 298 (msg->sending_cpu == rap->sender) && 350 - (msg->sw_ack_vector) && 299 + (msg->swack_vec) && 351 300 (msg->msg_type != MSG_NOOP)) { 301 + unsigned long mmr; 302 + unsigned long mr; 352 303 /* 353 304 * make everyone else ignore this message 354 305 */ 355 306 msg->canceled = 1; 356 - slot = msg - bcp->va_queue_first; 357 - count++; 358 307 /* 359 308 * only reset the resource if it is still pending 360 309 */ 361 - mmr = uv_read_local_mmr 362 - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 363 - msg_res = msg->sw_ack_vector; 310 + mmr = read_mmr_sw_ack(); 311 + msg_res = msg->swack_vec; 312 + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; 364 313 if (mmr & msg_res) { 365 314 stat->d_rcanceled++; 366 - uv_write_local_mmr( 367 - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, 368 - (msg_res << UV_SW_ACK_NPENDING) | 369 - msg_res); 315 + write_mmr_sw_ack(mr); 370 316 } 371 317 } 372 318 } ··· 366 334 * Use IPI to get all target uvhubs to release resources held by 367 335 * a given sending cpu number. 368 336 */ 369 - static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, 370 - int sender) 337 + static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender) 371 338 { 372 339 int uvhub; 373 - int cpu; 340 + int maskbits; 374 341 cpumask_t mask; 375 342 struct reset_args reset_args; 376 343 377 344 reset_args.sender = sender; 378 - 379 345 cpus_clear(mask); 380 346 /* find a single cpu for each uvhub in this distribution mask */ 381 - for (uvhub = 0; 382 - uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; 383 - uvhub++) { 347 + maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE; 348 + for (uvhub = 0; uvhub < maskbits; uvhub++) { 349 + int cpu; 384 350 if (!bau_uvhub_isset(uvhub, distribution)) 385 351 continue; 386 352 /* find a cpu for this uvhub */ 387 353 cpu = uvhub_to_first_cpu(uvhub); 388 354 cpu_set(cpu, mask); 389 355 } 390 - /* IPI all cpus; Preemption is already disabled */ 391 - smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); 356 + 357 + /* IPI all cpus; preemption is already disabled */ 358 + smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1); 392 359 return; 393 360 } 394 361 395 - static inline unsigned long 396 - cycles_2_us(unsigned long long cyc) 362 + static inline unsigned long cycles_2_us(unsigned long long cyc) 397 363 { 398 364 unsigned long long ns; 399 365 unsigned long us; 400 - ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) 401 - >> CYC2NS_SCALE_FACTOR; 366 + int cpu = smp_processor_id(); 367 + 368 + ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; 402 369 us = ns / 1000; 403 370 return us; 404 371 } ··· 407 376 * leaves uvhub_quiesce set so that no new broadcasts are started by 408 377 * bau_flush_send_and_wait() 409 378 */ 410 - static inline void 411 - quiesce_local_uvhub(struct bau_control *hmaster) 379 + static inline void quiesce_local_uvhub(struct bau_control *hmaster) 412 380 { 413 - atomic_add_short_return(1, (struct atomic_short *) 414 - &hmaster->uvhub_quiesce); 381 + atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); 415 382 } 416 383 417 384 /* 418 385 * mark this quiet-requestor as done 419 386 */ 420 - static inline void 421 - end_uvhub_quiesce(struct bau_control *hmaster) 387 + static inline void end_uvhub_quiesce(struct bau_control *hmaster) 422 388 { 423 - atomic_add_short_return(-1, (struct atomic_short *) 424 - &hmaster->uvhub_quiesce); 389 + atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); 390 + } 391 + 392 + static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) 393 + { 394 + unsigned long descriptor_status; 395 + 396 + descriptor_status = uv_read_local_mmr(mmr_offset); 397 + descriptor_status >>= right_shift; 398 + descriptor_status &= UV_ACT_STATUS_MASK; 399 + return descriptor_status; 425 400 } 426 401 427 402 /* ··· 435 398 * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP 436 399 */ 437 400 static int uv1_wait_completion(struct bau_desc *bau_desc, 438 - unsigned long mmr_offset, int right_shift, int this_cpu, 439 - struct bau_control *bcp, struct bau_control *smaster, long try) 401 + unsigned long mmr_offset, int right_shift, 402 + struct bau_control *bcp, long try) 440 403 { 441 404 unsigned long descriptor_status; 442 - cycles_t ttime; 405 + cycles_t ttm; 443 406 struct ptc_stats *stat = bcp->statp; 444 407 408 + descriptor_status = uv1_read_status(mmr_offset, right_shift); 445 409 /* spin on the status MMR, waiting for it to go idle */ 446 - while ((descriptor_status = (((unsigned long) 447 - uv_read_local_mmr(mmr_offset) >> 448 - right_shift) & UV_ACT_STATUS_MASK)) != 449 - DESC_STATUS_IDLE) { 410 + while ((descriptor_status != DS_IDLE)) { 450 411 /* 451 412 * Our software ack messages may be blocked because 452 413 * there are no swack resources available. As long 453 414 * as none of them has timed out hardware will NACK 454 415 * our message and its state will stay IDLE. 455 416 */ 456 - if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { 417 + if (descriptor_status == DS_SOURCE_TIMEOUT) { 457 418 stat->s_stimeout++; 458 419 return FLUSH_GIVEUP; 459 - } else if (descriptor_status == 460 - DESC_STATUS_DESTINATION_TIMEOUT) { 420 + } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { 461 421 stat->s_dtimeout++; 462 - ttime = get_cycles(); 422 + ttm = get_cycles(); 463 423 464 424 /* 465 425 * Our retries may be blocked by all destination ··· 464 430 * pending. In that case hardware returns the 465 431 * ERROR that looks like a destination timeout. 466 432 */ 467 - if (cycles_2_us(ttime - bcp->send_message) < 468 - timeout_us) { 433 + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { 469 434 bcp->conseccompletes = 0; 470 435 return FLUSH_RETRY_PLUGGED; 471 436 } ··· 477 444 */ 478 445 cpu_relax(); 479 446 } 447 + descriptor_status = uv1_read_status(mmr_offset, right_shift); 480 448 } 481 449 bcp->conseccompletes++; 482 450 return FLUSH_COMPLETE; 483 451 } 484 452 485 - static int uv2_wait_completion(struct bau_desc *bau_desc, 486 - unsigned long mmr_offset, int right_shift, int this_cpu, 487 - struct bau_control *bcp, struct bau_control *smaster, long try) 453 + /* 454 + * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 455 + */ 456 + static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) 488 457 { 489 458 unsigned long descriptor_status; 490 459 unsigned long descriptor_status2; 491 - int cpu; 492 - cycles_t ttime; 460 + 461 + descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 462 + descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; 463 + descriptor_status = (descriptor_status << 1) | descriptor_status2; 464 + return descriptor_status; 465 + } 466 + 467 + static int uv2_wait_completion(struct bau_desc *bau_desc, 468 + unsigned long mmr_offset, int right_shift, 469 + struct bau_control *bcp, long try) 470 + { 471 + unsigned long descriptor_stat; 472 + cycles_t ttm; 473 + int cpu = bcp->uvhub_cpu; 493 474 struct ptc_stats *stat = bcp->statp; 494 475 495 - /* UV2 has an extra bit of status */ 496 - cpu = bcp->uvhub_cpu; 476 + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 477 + 497 478 /* spin on the status MMR, waiting for it to go idle */ 498 - descriptor_status = (((unsigned long)(uv_read_local_mmr 499 - (mmr_offset)) >> right_shift) & UV_ACT_STATUS_MASK); 500 - descriptor_status2 = (((unsigned long)uv_read_local_mmr 501 - (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & 0x1UL); 502 - descriptor_status = (descriptor_status << 1) | 503 - descriptor_status2; 504 - while (descriptor_status != UV2H_DESC_IDLE) { 479 + while (descriptor_stat != UV2H_DESC_IDLE) { 505 480 /* 506 481 * Our software ack messages may be blocked because 507 482 * there are no swack resources available. As long 508 483 * as none of them has timed out hardware will NACK 509 484 * our message and its state will stay IDLE. 510 485 */ 511 - if ((descriptor_status == UV2H_DESC_SOURCE_TIMEOUT) || 512 - (descriptor_status == UV2H_DESC_DEST_STRONG_NACK) || 513 - (descriptor_status == UV2H_DESC_DEST_PUT_ERR)) { 486 + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || 487 + (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) || 488 + (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { 514 489 stat->s_stimeout++; 515 490 return FLUSH_GIVEUP; 516 - } else if (descriptor_status == UV2H_DESC_DEST_TIMEOUT) { 491 + } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { 517 492 stat->s_dtimeout++; 518 - ttime = get_cycles(); 519 - 493 + ttm = get_cycles(); 520 494 /* 521 495 * Our retries may be blocked by all destination 522 496 * swack resources being consumed, and a timeout 523 497 * pending. In that case hardware returns the 524 498 * ERROR that looks like a destination timeout. 525 499 */ 526 - if (cycles_2_us(ttime - bcp->send_message) < 527 - timeout_us) { 500 + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { 528 501 bcp->conseccompletes = 0; 529 502 return FLUSH_RETRY_PLUGGED; 530 503 } 531 - 532 504 bcp->conseccompletes = 0; 533 505 return FLUSH_RETRY_TIMEOUT; 534 506 } else { 535 507 /* 536 - * descriptor_status is still BUSY 508 + * descriptor_stat is still BUSY 537 509 */ 538 510 cpu_relax(); 539 511 } 540 - descriptor_status = (((unsigned long)(uv_read_local_mmr 541 - (mmr_offset)) >> right_shift) & 542 - UV_ACT_STATUS_MASK); 543 - descriptor_status2 = (((unsigned long)uv_read_local_mmr 544 - (UV2H_LB_BAU_SB_ACTIVATION_STATUS_2) >> cpu) & 545 - 0x1UL); 546 - descriptor_status = (descriptor_status << 1) | 547 - descriptor_status2; 512 + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 548 513 } 549 514 bcp->conseccompletes++; 550 515 return FLUSH_COMPLETE; 551 516 } 552 517 553 - static int uv_wait_completion(struct bau_desc *bau_desc, 554 - unsigned long mmr_offset, int right_shift, int this_cpu, 555 - struct bau_control *bcp, struct bau_control *smaster, long try) 518 + /* 519 + * There are 2 status registers; each and array[32] of 2 bits. Set up for 520 + * which register to read and position in that register based on cpu in 521 + * current hub. 522 + */ 523 + static int wait_completion(struct bau_desc *bau_desc, 524 + struct bau_control *bcp, long try) 556 525 { 526 + int right_shift; 527 + unsigned long mmr_offset; 528 + int cpu = bcp->uvhub_cpu; 529 + 530 + if (cpu < UV_CPUS_PER_AS) { 531 + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 532 + right_shift = cpu * UV_ACT_STATUS_SIZE; 533 + } else { 534 + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 535 + right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); 536 + } 537 + 557 538 if (is_uv1_hub()) 558 539 return uv1_wait_completion(bau_desc, mmr_offset, right_shift, 559 - this_cpu, bcp, smaster, try); 540 + bcp, try); 560 541 else 561 542 return uv2_wait_completion(bau_desc, mmr_offset, right_shift, 562 - this_cpu, bcp, smaster, try); 543 + bcp, try); 563 544 } 564 545 565 - static inline cycles_t 566 - sec_2_cycles(unsigned long sec) 546 + static inline cycles_t sec_2_cycles(unsigned long sec) 567 547 { 568 548 unsigned long ns; 569 549 cycles_t cyc; ··· 587 541 } 588 542 589 543 /* 590 - * conditionally add 1 to *v, unless *v is >= u 591 - * return 0 if we cannot add 1 to *v because it is >= u 592 - * return 1 if we can add 1 to *v because it is < u 593 - * the add is atomic 594 - * 595 - * This is close to atomic_add_unless(), but this allows the 'u' value 596 - * to be lowered below the current 'v'. atomic_add_unless can only stop 597 - * on equal. 598 - */ 599 - static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) 600 - { 601 - spin_lock(lock); 602 - if (atomic_read(v) >= u) { 603 - spin_unlock(lock); 604 - return 0; 605 - } 606 - atomic_inc(v); 607 - spin_unlock(lock); 608 - return 1; 609 - } 610 - 611 - /* 612 - * Our retries are blocked by all destination swack resources being 544 + * Our retries are blocked by all destination sw ack resources being 613 545 * in use, and a timeout is pending. In that case hardware immediately 614 546 * returns the ERROR that looks like a destination timeout. 615 547 */ 616 - static void 617 - destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, 548 + static void destination_plugged(struct bau_desc *bau_desc, 549 + struct bau_control *bcp, 618 550 struct bau_control *hmaster, struct ptc_stats *stat) 619 551 { 620 552 udelay(bcp->plugged_delay); 621 553 bcp->plugged_tries++; 554 + 622 555 if (bcp->plugged_tries >= bcp->plugsb4reset) { 623 556 bcp->plugged_tries = 0; 557 + 624 558 quiesce_local_uvhub(hmaster); 559 + 625 560 spin_lock(&hmaster->queue_lock); 626 - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); 561 + reset_with_ipi(&bau_desc->distribution, bcp->cpu); 627 562 spin_unlock(&hmaster->queue_lock); 563 + 628 564 end_uvhub_quiesce(hmaster); 565 + 629 566 bcp->ipi_attempts++; 630 567 stat->s_resets_plug++; 631 568 } 632 569 } 633 570 634 - static void 635 - destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, 636 - struct bau_control *hmaster, struct ptc_stats *stat) 571 + static void destination_timeout(struct bau_desc *bau_desc, 572 + struct bau_control *bcp, struct bau_control *hmaster, 573 + struct ptc_stats *stat) 637 574 { 638 - hmaster->max_bau_concurrent = 1; 575 + hmaster->max_concurr = 1; 639 576 bcp->timeout_tries++; 640 577 if (bcp->timeout_tries >= bcp->timeoutsb4reset) { 641 578 bcp->timeout_tries = 0; 579 + 642 580 quiesce_local_uvhub(hmaster); 581 + 643 582 spin_lock(&hmaster->queue_lock); 644 - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); 583 + reset_with_ipi(&bau_desc->distribution, bcp->cpu); 645 584 spin_unlock(&hmaster->queue_lock); 585 + 646 586 end_uvhub_quiesce(hmaster); 587 + 647 588 bcp->ipi_attempts++; 648 589 stat->s_resets_timeout++; 649 590 } ··· 640 607 * Completions are taking a very long time due to a congested numalink 641 608 * network. 642 609 */ 643 - static void 644 - disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) 610 + static void disable_for_congestion(struct bau_control *bcp, 611 + struct ptc_stats *stat) 645 612 { 646 - int tcpu; 647 - struct bau_control *tbcp; 648 - 649 613 /* let only one cpu do this disabling */ 650 614 spin_lock(&disable_lock); 615 + 651 616 if (!baudisabled && bcp->period_requests && 652 617 ((bcp->period_time / bcp->period_requests) > congested_cycles)) { 618 + int tcpu; 619 + struct bau_control *tbcp; 653 620 /* it becomes this cpu's job to turn on the use of the 654 621 BAU again */ 655 622 baudisabled = 1; 656 623 bcp->set_bau_off = 1; 657 - bcp->set_bau_on_time = get_cycles() + 658 - sec_2_cycles(bcp->congested_period); 624 + bcp->set_bau_on_time = get_cycles(); 625 + bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); 659 626 stat->s_bau_disabled++; 660 627 for_each_present_cpu(tcpu) { 661 628 tbcp = &per_cpu(bau_control, tcpu); 662 - tbcp->baudisabled = 1; 629 + tbcp->baudisabled = 1; 663 630 } 664 631 } 632 + 665 633 spin_unlock(&disable_lock); 666 634 } 667 635 668 - /** 669 - * uv_flush_send_and_wait 670 - * 636 + static void count_max_concurr(int stat, struct bau_control *bcp, 637 + struct bau_control *hmaster) 638 + { 639 + bcp->plugged_tries = 0; 640 + bcp->timeout_tries = 0; 641 + if (stat != FLUSH_COMPLETE) 642 + return; 643 + if (bcp->conseccompletes <= bcp->complete_threshold) 644 + return; 645 + if (hmaster->max_concurr >= hmaster->max_concurr_const) 646 + return; 647 + hmaster->max_concurr++; 648 + } 649 + 650 + static void record_send_stats(cycles_t time1, cycles_t time2, 651 + struct bau_control *bcp, struct ptc_stats *stat, 652 + int completion_status, int try) 653 + { 654 + cycles_t elapsed; 655 + 656 + if (time2 > time1) { 657 + elapsed = time2 - time1; 658 + stat->s_time += elapsed; 659 + 660 + if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { 661 + bcp->period_requests++; 662 + bcp->period_time += elapsed; 663 + if ((elapsed > congested_cycles) && 664 + (bcp->period_requests > bcp->cong_reps)) 665 + disable_for_congestion(bcp, stat); 666 + } 667 + } else 668 + stat->s_requestor--; 669 + 670 + if (completion_status == FLUSH_COMPLETE && try > 1) 671 + stat->s_retriesok++; 672 + else if (completion_status == FLUSH_GIVEUP) 673 + stat->s_giveup++; 674 + } 675 + 676 + /* 677 + * Because of a uv1 hardware bug only a limited number of concurrent 678 + * requests can be made. 679 + */ 680 + static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) 681 + { 682 + spinlock_t *lock = &hmaster->uvhub_lock; 683 + atomic_t *v; 684 + 685 + v = &hmaster->active_descriptor_count; 686 + if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { 687 + stat->s_throttles++; 688 + do { 689 + cpu_relax(); 690 + } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); 691 + } 692 + } 693 + 694 + /* 695 + * Handle the completion status of a message send. 696 + */ 697 + static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, 698 + struct bau_control *bcp, struct bau_control *hmaster, 699 + struct ptc_stats *stat) 700 + { 701 + if (completion_status == FLUSH_RETRY_PLUGGED) 702 + destination_plugged(bau_desc, bcp, hmaster, stat); 703 + else if (completion_status == FLUSH_RETRY_TIMEOUT) 704 + destination_timeout(bau_desc, bcp, hmaster, stat); 705 + } 706 + 707 + /* 671 708 * Send a broadcast and wait for it to complete. 672 709 * 673 710 * The flush_mask contains the cpus the broadcast is to be sent to including ··· 748 645 * returned to the kernel. 749 646 */ 750 647 int uv_flush_send_and_wait(struct bau_desc *bau_desc, 751 - struct cpumask *flush_mask, struct bau_control *bcp) 648 + struct cpumask *flush_mask, struct bau_control *bcp) 752 649 { 753 - int right_shift; 754 - int completion_status = 0; 755 650 int seq_number = 0; 651 + int completion_stat = 0; 756 652 long try = 0; 757 - int cpu = bcp->uvhub_cpu; 758 - int this_cpu = bcp->cpu; 759 - unsigned long mmr_offset; 760 653 unsigned long index; 761 654 cycles_t time1; 762 655 cycles_t time2; 763 - cycles_t elapsed; 764 656 struct ptc_stats *stat = bcp->statp; 765 - struct bau_control *smaster = bcp->socket_master; 766 657 struct bau_control *hmaster = bcp->uvhub_master; 767 658 768 - if (is_uv1_hub() && 769 - !atomic_inc_unless_ge(&hmaster->uvhub_lock, 770 - &hmaster->active_descriptor_count, 771 - hmaster->max_bau_concurrent)) { 772 - stat->s_throttles++; 773 - do { 774 - cpu_relax(); 775 - } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, 776 - &hmaster->active_descriptor_count, 777 - hmaster->max_bau_concurrent)); 778 - } 659 + if (is_uv1_hub()) 660 + uv1_throttle(hmaster, stat); 661 + 779 662 while (hmaster->uvhub_quiesce) 780 663 cpu_relax(); 781 664 782 - if (cpu < UV_CPUS_PER_ACT_STATUS) { 783 - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 784 - right_shift = cpu * UV_ACT_STATUS_SIZE; 785 - } else { 786 - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 787 - right_shift = 788 - ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); 789 - } 790 665 time1 = get_cycles(); 791 666 do { 792 667 if (try == 0) { ··· 774 693 bau_desc->header.msg_type = MSG_RETRY; 775 694 stat->s_retry_messages++; 776 695 } 777 - bau_desc->header.sequence = seq_number; 778 - index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | 779 - bcp->uvhub_cpu; 780 - bcp->send_message = get_cycles(); 781 - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 782 - try++; 783 - completion_status = uv_wait_completion(bau_desc, mmr_offset, 784 - right_shift, this_cpu, bcp, smaster, try); 785 696 786 - if (completion_status == FLUSH_RETRY_PLUGGED) { 787 - destination_plugged(bau_desc, bcp, hmaster, stat); 788 - } else if (completion_status == FLUSH_RETRY_TIMEOUT) { 789 - destination_timeout(bau_desc, bcp, hmaster, stat); 790 - } 697 + bau_desc->header.sequence = seq_number; 698 + index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; 699 + bcp->send_message = get_cycles(); 700 + 701 + write_mmr_activation(index); 702 + 703 + try++; 704 + completion_stat = wait_completion(bau_desc, bcp, try); 705 + 706 + handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 707 + 791 708 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { 792 709 bcp->ipi_attempts = 0; 793 - completion_status = FLUSH_GIVEUP; 710 + completion_stat = FLUSH_GIVEUP; 794 711 break; 795 712 } 796 713 cpu_relax(); 797 - } while ((completion_status == FLUSH_RETRY_PLUGGED) || 798 - (completion_status == FLUSH_RETRY_TIMEOUT)); 714 + } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 715 + (completion_stat == FLUSH_RETRY_TIMEOUT)); 716 + 799 717 time2 = get_cycles(); 800 - bcp->plugged_tries = 0; 801 - bcp->timeout_tries = 0; 802 - if ((completion_status == FLUSH_COMPLETE) && 803 - (bcp->conseccompletes > bcp->complete_threshold) && 804 - (hmaster->max_bau_concurrent < 805 - hmaster->max_bau_concurrent_constant)) 806 - hmaster->max_bau_concurrent++; 718 + 719 + count_max_concurr(completion_stat, bcp, hmaster); 720 + 807 721 while (hmaster->uvhub_quiesce) 808 722 cpu_relax(); 723 + 809 724 atomic_dec(&hmaster->active_descriptor_count); 810 - if (time2 > time1) { 811 - elapsed = time2 - time1; 812 - stat->s_time += elapsed; 813 - if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { 814 - bcp->period_requests++; 815 - bcp->period_time += elapsed; 816 - if ((elapsed > congested_cycles) && 817 - (bcp->period_requests > bcp->congested_reps)) { 818 - disable_for_congestion(bcp, stat); 819 - } 820 - } 821 - } else 822 - stat->s_requestor--; 823 - if (completion_status == FLUSH_COMPLETE && try > 1) 824 - stat->s_retriesok++; 825 - else if (completion_status == FLUSH_GIVEUP) { 826 - stat->s_giveup++; 725 + 726 + record_send_stats(time1, time2, bcp, stat, completion_stat, try); 727 + 728 + if (completion_stat == FLUSH_GIVEUP) 827 729 return 1; 828 - } 829 730 return 0; 830 731 } 831 732 832 - /** 833 - * uv_flush_tlb_others - globally purge translation cache of a virtual 834 - * address or all TLB's 733 + /* 734 + * The BAU is disabled. When the disabled time period has expired, the cpu 735 + * that disabled it must re-enable it. 736 + * Return 0 if it is re-enabled for all cpus. 737 + */ 738 + static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) 739 + { 740 + int tcpu; 741 + struct bau_control *tbcp; 742 + 743 + if (bcp->set_bau_off) { 744 + if (get_cycles() >= bcp->set_bau_on_time) { 745 + stat->s_bau_reenabled++; 746 + baudisabled = 0; 747 + for_each_present_cpu(tcpu) { 748 + tbcp = &per_cpu(bau_control, tcpu); 749 + tbcp->baudisabled = 0; 750 + tbcp->period_requests = 0; 751 + tbcp->period_time = 0; 752 + } 753 + return 0; 754 + } 755 + } 756 + return -1; 757 + } 758 + 759 + static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, 760 + int remotes, struct bau_desc *bau_desc) 761 + { 762 + stat->s_requestor++; 763 + stat->s_ntargcpu += remotes + locals; 764 + stat->s_ntargremotes += remotes; 765 + stat->s_ntarglocals += locals; 766 + 767 + /* uvhub statistics */ 768 + hubs = bau_uvhub_weight(&bau_desc->distribution); 769 + if (locals) { 770 + stat->s_ntarglocaluvhub++; 771 + stat->s_ntargremoteuvhub += (hubs - 1); 772 + } else 773 + stat->s_ntargremoteuvhub += hubs; 774 + 775 + stat->s_ntarguvhub += hubs; 776 + 777 + if (hubs >= 16) 778 + stat->s_ntarguvhub16++; 779 + else if (hubs >= 8) 780 + stat->s_ntarguvhub8++; 781 + else if (hubs >= 4) 782 + stat->s_ntarguvhub4++; 783 + else if (hubs >= 2) 784 + stat->s_ntarguvhub2++; 785 + else 786 + stat->s_ntarguvhub1++; 787 + } 788 + 789 + /* 790 + * Translate a cpu mask to the uvhub distribution mask in the BAU 791 + * activation descriptor. 792 + */ 793 + static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, 794 + struct bau_desc *bau_desc, int *localsp, int *remotesp) 795 + { 796 + int cpu; 797 + int pnode; 798 + int cnt = 0; 799 + struct hub_and_pnode *hpp; 800 + 801 + for_each_cpu(cpu, flush_mask) { 802 + /* 803 + * The distribution vector is a bit map of pnodes, relative 804 + * to the partition base pnode (and the partition base nasid 805 + * in the header). 806 + * Translate cpu to pnode and hub using a local memory array. 807 + */ 808 + hpp = &bcp->socket_master->thp[cpu]; 809 + pnode = hpp->pnode - bcp->partition_base_pnode; 810 + bau_uvhub_set(pnode, &bau_desc->distribution); 811 + cnt++; 812 + if (hpp->uvhub == bcp->uvhub) 813 + (*localsp)++; 814 + else 815 + (*remotesp)++; 816 + } 817 + if (!cnt) 818 + return 1; 819 + return 0; 820 + } 821 + 822 + /* 823 + * globally purge translation cache of a virtual address or all TLB's 835 824 * @cpumask: mask of all cpu's in which the address is to be removed 836 825 * @mm: mm_struct containing virtual address range 837 826 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) ··· 925 774 * done. The returned pointer is valid till preemption is re-enabled. 926 775 */ 927 776 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 928 - struct mm_struct *mm, 929 - unsigned long va, unsigned int cpu) 777 + struct mm_struct *mm, unsigned long va, 778 + unsigned int cpu) 930 779 { 931 780 int locals = 0; 932 781 int remotes = 0; 933 782 int hubs = 0; 934 - int tcpu; 935 - int tpnode; 936 783 struct bau_desc *bau_desc; 937 784 struct cpumask *flush_mask; 938 785 struct ptc_stats *stat; 939 786 struct bau_control *bcp; 940 - struct bau_control *tbcp; 941 - struct hub_and_pnode *hpp; 942 787 943 788 /* kernel was booted 'nobau' */ 944 789 if (nobau) ··· 945 798 946 799 /* bau was disabled due to slow response */ 947 800 if (bcp->baudisabled) { 948 - /* the cpu that disabled it must re-enable it */ 949 - if (bcp->set_bau_off) { 950 - if (get_cycles() >= bcp->set_bau_on_time) { 951 - stat->s_bau_reenabled++; 952 - baudisabled = 0; 953 - for_each_present_cpu(tcpu) { 954 - tbcp = &per_cpu(bau_control, tcpu); 955 - tbcp->baudisabled = 0; 956 - tbcp->period_requests = 0; 957 - tbcp->period_time = 0; 958 - } 959 - } 960 - } 961 - return cpumask; 801 + if (check_enable(bcp, stat)) 802 + return cpumask; 962 803 } 963 804 964 805 /* ··· 957 822 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); 958 823 /* don't actually do a shootdown of the local cpu */ 959 824 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); 825 + 960 826 if (cpu_isset(cpu, *cpumask)) 961 827 stat->s_ntargself++; 962 828 963 829 bau_desc = bcp->descriptor_base; 964 - bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 830 + bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; 965 831 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 966 - 967 - for_each_cpu(tcpu, flush_mask) { 968 - /* 969 - * The distribution vector is a bit map of pnodes, relative 970 - * to the partition base pnode (and the partition base nasid 971 - * in the header). 972 - * Translate cpu to pnode and hub using an array stored 973 - * in local memory. 974 - */ 975 - hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; 976 - tpnode = hpp->pnode - bcp->partition_base_pnode; 977 - bau_uvhub_set(tpnode, &bau_desc->distribution); 978 - if (hpp->uvhub == bcp->uvhub) 979 - locals++; 980 - else 981 - remotes++; 982 - } 983 - if ((locals + remotes) == 0) 832 + if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 984 833 return NULL; 985 - stat->s_requestor++; 986 - stat->s_ntargcpu += remotes + locals; 987 - stat->s_ntargremotes += remotes; 988 - stat->s_ntarglocals += locals; 989 - remotes = bau_uvhub_weight(&bau_desc->distribution); 990 834 991 - /* uvhub statistics */ 992 - hubs = bau_uvhub_weight(&bau_desc->distribution); 993 - if (locals) { 994 - stat->s_ntarglocaluvhub++; 995 - stat->s_ntargremoteuvhub += (hubs - 1); 996 - } else 997 - stat->s_ntargremoteuvhub += hubs; 998 - stat->s_ntarguvhub += hubs; 999 - if (hubs >= 16) 1000 - stat->s_ntarguvhub16++; 1001 - else if (hubs >= 8) 1002 - stat->s_ntarguvhub8++; 1003 - else if (hubs >= 4) 1004 - stat->s_ntarguvhub4++; 1005 - else if (hubs >= 2) 1006 - stat->s_ntarguvhub2++; 1007 - else 1008 - stat->s_ntarguvhub1++; 835 + record_send_statistics(stat, locals, hubs, remotes, bau_desc); 1009 836 1010 837 bau_desc->payload.address = va; 1011 838 bau_desc->payload.sending_cpu = cpu; 1012 - 1013 839 /* 1014 840 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1015 841 * or 1 if it gave up and the original cpumask should be returned. ··· 999 903 { 1000 904 int count = 0; 1001 905 cycles_t time_start; 1002 - struct bau_payload_queue_entry *msg; 906 + struct bau_pq_entry *msg; 1003 907 struct bau_control *bcp; 1004 908 struct ptc_stats *stat; 1005 909 struct msg_desc msgdesc; 1006 910 1007 911 time_start = get_cycles(); 912 + 1008 913 bcp = &per_cpu(bau_control, smp_processor_id()); 1009 914 stat = bcp->statp; 1010 - msgdesc.va_queue_first = bcp->va_queue_first; 1011 - msgdesc.va_queue_last = bcp->va_queue_last; 915 + 916 + msgdesc.queue_first = bcp->queue_first; 917 + msgdesc.queue_last = bcp->queue_last; 918 + 1012 919 msg = bcp->bau_msg_head; 1013 - while (msg->sw_ack_vector) { 920 + while (msg->swack_vec) { 1014 921 count++; 1015 - msgdesc.msg_slot = msg - msgdesc.va_queue_first; 1016 - msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; 922 + 923 + msgdesc.msg_slot = msg - msgdesc.queue_first; 924 + msgdesc.swack_slot = ffs(msg->swack_vec) - 1; 1017 925 msgdesc.msg = msg; 1018 - uv_bau_process_message(&msgdesc, bcp); 926 + bau_process_message(&msgdesc, bcp); 927 + 1019 928 msg++; 1020 - if (msg > msgdesc.va_queue_last) 1021 - msg = msgdesc.va_queue_first; 929 + if (msg > msgdesc.queue_last) 930 + msg = msgdesc.queue_first; 1022 931 bcp->bau_msg_head = msg; 1023 932 } 1024 933 stat->d_time += (get_cycles() - time_start); ··· 1031 930 stat->d_nomsg++; 1032 931 else if (count > 1) 1033 932 stat->d_multmsg++; 933 + 1034 934 ack_APIC_irq(); 1035 935 } 1036 936 1037 937 /* 1038 - * uv_enable_timeouts 1039 - * 1040 - * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have 938 + * Each target uvhub (i.e. a uvhub that has cpu's) needs to have 1041 939 * shootdown message timeouts enabled. The timeout does not cause 1042 940 * an interrupt, but causes an error message to be returned to 1043 941 * the sender. 1044 942 */ 1045 - static void __init uv_enable_timeouts(void) 943 + static void __init enable_timeouts(void) 1046 944 { 1047 945 int uvhub; 1048 946 int nuvhubs; ··· 1055 955 continue; 1056 956 1057 957 pnode = uv_blade_to_pnode(uvhub); 1058 - mmr_image = 1059 - uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); 958 + mmr_image = read_mmr_misc_control(pnode); 1060 959 /* 1061 960 * Set the timeout period and then lock it in, in three 1062 961 * steps; captures and locks in the period. 1063 962 * 1064 963 * To program the period, the SOFT_ACK_MODE must be off. 1065 964 */ 1066 - mmr_image &= ~((unsigned long)1 << 1067 - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); 1068 - uv_write_global_mmr64 1069 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 965 + mmr_image &= ~(1L << SOFTACK_MSHIFT); 966 + write_mmr_misc_control(pnode, mmr_image); 1070 967 /* 1071 968 * Set the 4-bit period. 1072 969 */ 1073 - mmr_image &= ~((unsigned long)0xf << 1074 - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); 1075 - mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << 1076 - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); 1077 - uv_write_global_mmr64 1078 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 970 + mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); 971 + mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); 972 + write_mmr_misc_control(pnode, mmr_image); 1079 973 /* 1080 974 * UV1: 1081 975 * Subsequent reversals of the timebase bit (3) cause an 1082 976 * immediate timeout of one or all INTD resources as 1083 977 * indicated in bits 2:0 (7 causes all of them to timeout). 1084 978 */ 1085 - mmr_image |= ((unsigned long)1 << 1086 - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); 979 + mmr_image |= (1L << SOFTACK_MSHIFT); 1087 980 if (is_uv2_hub()) { 1088 - mmr_image |= ((unsigned long)1 << UV2_LEG_SHFT); 1089 - mmr_image |= ((unsigned long)1 << UV2_EXT_SHFT); 981 + mmr_image |= (1L << UV2_LEG_SHFT); 982 + mmr_image |= (1L << UV2_EXT_SHFT); 1090 983 } 1091 - uv_write_global_mmr64 1092 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 984 + write_mmr_misc_control(pnode, mmr_image); 1093 985 } 1094 986 } 1095 987 1096 - static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) 988 + static void *ptc_seq_start(struct seq_file *file, loff_t *offset) 1097 989 { 1098 990 if (*offset < num_possible_cpus()) 1099 991 return offset; 1100 992 return NULL; 1101 993 } 1102 994 1103 - static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) 995 + static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) 1104 996 { 1105 997 (*offset)++; 1106 998 if (*offset < num_possible_cpus()) ··· 1100 1008 return NULL; 1101 1009 } 1102 1010 1103 - static void uv_ptc_seq_stop(struct seq_file *file, void *data) 1011 + static void ptc_seq_stop(struct seq_file *file, void *data) 1104 1012 { 1105 1013 } 1106 1014 1107 - static inline unsigned long long 1108 - microsec_2_cycles(unsigned long microsec) 1015 + static inline unsigned long long usec_2_cycles(unsigned long microsec) 1109 1016 { 1110 1017 unsigned long ns; 1111 1018 unsigned long long cyc; ··· 1115 1024 } 1116 1025 1117 1026 /* 1118 - * Display the statistics thru /proc. 1027 + * Display the statistics thru /proc/sgi_uv/ptc_statistics 1119 1028 * 'data' points to the cpu number 1029 + * Note: see the descriptions in stat_description[]. 1120 1030 */ 1121 - static int uv_ptc_seq_show(struct seq_file *file, void *data) 1031 + static int ptc_seq_show(struct seq_file *file, void *data) 1122 1032 { 1123 1033 struct ptc_stats *stat; 1124 1034 int cpu; 1125 1035 1126 1036 cpu = *(loff_t *)data; 1127 - 1128 1037 if (!cpu) { 1129 1038 seq_printf(file, 1130 1039 "# cpu sent stime self locals remotes ncpus localhub "); 1131 1040 seq_printf(file, 1132 1041 "remotehub numuvhubs numuvhubs16 numuvhubs8 "); 1133 1042 seq_printf(file, 1134 - "numuvhubs4 numuvhubs2 numuvhubs1 dto "); 1043 + "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); 1135 1044 seq_printf(file, 1136 - "retries rok resetp resett giveup sto bz throt "); 1045 + "resetp resett giveup sto bz throt swack recv rtime "); 1137 1046 seq_printf(file, 1138 - "sw_ack recv rtime all "); 1139 - seq_printf(file, 1140 - "one mult none retry canc nocan reset rcan "); 1047 + "all one mult none retry canc nocan reset rcan "); 1141 1048 seq_printf(file, 1142 1049 "disable enable\n"); 1143 1050 } ··· 1162 1073 /* destination side statistics */ 1163 1074 seq_printf(file, 1164 1075 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 1165 - uv_read_global_mmr64(uv_cpu_to_pnode(cpu), 1166 - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), 1076 + read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), 1167 1077 stat->d_requestee, cycles_2_us(stat->d_time), 1168 1078 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, 1169 1079 stat->d_nomsg, stat->d_retries, stat->d_canceled, ··· 1171 1083 seq_printf(file, "%ld %ld\n", 1172 1084 stat->s_bau_disabled, stat->s_bau_reenabled); 1173 1085 } 1174 - 1175 1086 return 0; 1176 1087 } 1177 1088 ··· 1178 1091 * Display the tunables thru debugfs 1179 1092 */ 1180 1093 static ssize_t tunables_read(struct file *file, char __user *userbuf, 1181 - size_t count, loff_t *ppos) 1094 + size_t count, loff_t *ppos) 1182 1095 { 1183 1096 char *buf; 1184 1097 int ret; 1185 1098 1186 1099 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", 1187 - "max_bau_concurrent plugged_delay plugsb4reset", 1100 + "max_concur plugged_delay plugsb4reset", 1188 1101 "timeoutsb4reset ipi_reset_limit complete_threshold", 1189 1102 "congested_response_us congested_reps congested_period", 1190 - max_bau_concurrent, plugged_delay, plugsb4reset, 1103 + max_concurr, plugged_delay, plugsb4reset, 1191 1104 timeoutsb4reset, ipi_reset_limit, complete_threshold, 1192 - congested_response_us, congested_reps, congested_period); 1105 + congested_respns_us, congested_reps, congested_period); 1193 1106 1194 1107 if (!buf) 1195 1108 return -ENOMEM; ··· 1200 1113 } 1201 1114 1202 1115 /* 1203 - * -1: resetf the statistics 1116 + * handle a write to /proc/sgi_uv/ptc_statistics 1117 + * -1: reset the statistics 1204 1118 * 0: display meaning of the statistics 1205 1119 */ 1206 - static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, 1207 - size_t count, loff_t *data) 1120 + static ssize_t ptc_proc_write(struct file *file, const char __user *user, 1121 + size_t count, loff_t *data) 1208 1122 { 1209 1123 int cpu; 1124 + int i; 1125 + int elements; 1210 1126 long input_arg; 1211 1127 char optstr[64]; 1212 1128 struct ptc_stats *stat; ··· 1219 1129 if (copy_from_user(optstr, user, count)) 1220 1130 return -EFAULT; 1221 1131 optstr[count - 1] = '\0'; 1132 + 1222 1133 if (strict_strtol(optstr, 10, &input_arg) < 0) { 1223 1134 printk(KERN_DEBUG "%s is invalid\n", optstr); 1224 1135 return -EINVAL; 1225 1136 } 1226 1137 1227 1138 if (input_arg == 0) { 1139 + elements = sizeof(stat_description)/sizeof(*stat_description); 1228 1140 printk(KERN_DEBUG "# cpu: cpu number\n"); 1229 1141 printk(KERN_DEBUG "Sender statistics:\n"); 1230 - printk(KERN_DEBUG 1231 - "sent: number of shootdown messages sent\n"); 1232 - printk(KERN_DEBUG 1233 - "stime: time spent sending messages\n"); 1234 - printk(KERN_DEBUG 1235 - "numuvhubs: number of hubs targeted with shootdown\n"); 1236 - printk(KERN_DEBUG 1237 - "numuvhubs16: number times 16 or more hubs targeted\n"); 1238 - printk(KERN_DEBUG 1239 - "numuvhubs8: number times 8 or more hubs targeted\n"); 1240 - printk(KERN_DEBUG 1241 - "numuvhubs4: number times 4 or more hubs targeted\n"); 1242 - printk(KERN_DEBUG 1243 - "numuvhubs2: number times 2 or more hubs targeted\n"); 1244 - printk(KERN_DEBUG 1245 - "numuvhubs1: number times 1 hub targeted\n"); 1246 - printk(KERN_DEBUG 1247 - "numcpus: number of cpus targeted with shootdown\n"); 1248 - printk(KERN_DEBUG 1249 - "dto: number of destination timeouts\n"); 1250 - printk(KERN_DEBUG 1251 - "retries: destination timeout retries sent\n"); 1252 - printk(KERN_DEBUG 1253 - "rok: : destination timeouts successfully retried\n"); 1254 - printk(KERN_DEBUG 1255 - "resetp: ipi-style resource resets for plugs\n"); 1256 - printk(KERN_DEBUG 1257 - "resett: ipi-style resource resets for timeouts\n"); 1258 - printk(KERN_DEBUG 1259 - "giveup: fall-backs to ipi-style shootdowns\n"); 1260 - printk(KERN_DEBUG 1261 - "sto: number of source timeouts\n"); 1262 - printk(KERN_DEBUG 1263 - "bz: number of stay-busy's\n"); 1264 - printk(KERN_DEBUG 1265 - "throt: number times spun in throttle\n"); 1266 - printk(KERN_DEBUG "Destination side statistics:\n"); 1267 - printk(KERN_DEBUG 1268 - "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); 1269 - printk(KERN_DEBUG 1270 - "recv: shootdown messages received\n"); 1271 - printk(KERN_DEBUG 1272 - "rtime: time spent processing messages\n"); 1273 - printk(KERN_DEBUG 1274 - "all: shootdown all-tlb messages\n"); 1275 - printk(KERN_DEBUG 1276 - "one: shootdown one-tlb messages\n"); 1277 - printk(KERN_DEBUG 1278 - "mult: interrupts that found multiple messages\n"); 1279 - printk(KERN_DEBUG 1280 - "none: interrupts that found no messages\n"); 1281 - printk(KERN_DEBUG 1282 - "retry: number of retry messages processed\n"); 1283 - printk(KERN_DEBUG 1284 - "canc: number messages canceled by retries\n"); 1285 - printk(KERN_DEBUG 1286 - "nocan: number retries that found nothing to cancel\n"); 1287 - printk(KERN_DEBUG 1288 - "reset: number of ipi-style reset requests processed\n"); 1289 - printk(KERN_DEBUG 1290 - "rcan: number messages canceled by reset requests\n"); 1291 - printk(KERN_DEBUG 1292 - "disable: number times use of the BAU was disabled\n"); 1293 - printk(KERN_DEBUG 1294 - "enable: number times use of the BAU was re-enabled\n"); 1142 + for (i = 0; i < elements; i++) 1143 + printk(KERN_DEBUG "%s\n", stat_description[i]); 1295 1144 } else if (input_arg == -1) { 1296 1145 for_each_present_cpu(cpu) { 1297 1146 stat = &per_cpu(ptcstats, cpu); ··· 1257 1228 } 1258 1229 1259 1230 /* 1260 - * set the tunables 1261 - * 0 values reset them to defaults 1231 + * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. 1232 + * Zero values reset them to defaults. 1262 1233 */ 1263 - static ssize_t tunables_write(struct file *file, const char __user *user, 1264 - size_t count, loff_t *data) 1234 + static int parse_tunables_write(struct bau_control *bcp, char *instr, 1235 + int count) 1265 1236 { 1266 - int cpu; 1267 - int cnt = 0; 1268 - int val; 1269 1237 char *p; 1270 1238 char *q; 1271 - char instr[64]; 1272 - struct bau_control *bcp; 1239 + int cnt = 0; 1240 + int val; 1241 + int e = sizeof(tunables) / sizeof(*tunables); 1273 1242 1274 - if (count == 0 || count > sizeof(instr)-1) 1275 - return -EINVAL; 1276 - if (copy_from_user(instr, user, count)) 1277 - return -EFAULT; 1278 - 1279 - instr[count] = '\0'; 1280 - /* count the fields */ 1281 1243 p = instr + strspn(instr, WHITESPACE); 1282 1244 q = p; 1283 1245 for (; *p; p = q + strspn(q, WHITESPACE)) { ··· 1277 1257 if (q == p) 1278 1258 break; 1279 1259 } 1280 - if (cnt != 9) { 1281 - printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); 1260 + if (cnt != e) { 1261 + printk(KERN_INFO "bau tunable error: should be %d values\n", e); 1282 1262 return -EINVAL; 1283 1263 } 1284 1264 ··· 1290 1270 switch (cnt) { 1291 1271 case 0: 1292 1272 if (val == 0) { 1293 - max_bau_concurrent = MAX_BAU_CONCURRENT; 1294 - max_bau_concurrent_constant = 1295 - MAX_BAU_CONCURRENT; 1273 + max_concurr = MAX_BAU_CONCURRENT; 1274 + max_concurr_const = MAX_BAU_CONCURRENT; 1296 1275 continue; 1297 1276 } 1298 - bcp = &per_cpu(bau_control, smp_processor_id()); 1299 1277 if (val < 1 || val > bcp->cpus_in_uvhub) { 1300 1278 printk(KERN_DEBUG 1301 1279 "Error: BAU max concurrent %d is invalid\n", 1302 1280 val); 1303 1281 return -EINVAL; 1304 1282 } 1305 - max_bau_concurrent = val; 1306 - max_bau_concurrent_constant = val; 1283 + max_concurr = val; 1284 + max_concurr_const = val; 1307 1285 continue; 1308 - case 1: 1286 + default: 1309 1287 if (val == 0) 1310 - plugged_delay = PLUGGED_DELAY; 1288 + *tunables[cnt].tunp = tunables[cnt].deflt; 1311 1289 else 1312 - plugged_delay = val; 1313 - continue; 1314 - case 2: 1315 - if (val == 0) 1316 - plugsb4reset = PLUGSB4RESET; 1317 - else 1318 - plugsb4reset = val; 1319 - continue; 1320 - case 3: 1321 - if (val == 0) 1322 - timeoutsb4reset = TIMEOUTSB4RESET; 1323 - else 1324 - timeoutsb4reset = val; 1325 - continue; 1326 - case 4: 1327 - if (val == 0) 1328 - ipi_reset_limit = IPI_RESET_LIMIT; 1329 - else 1330 - ipi_reset_limit = val; 1331 - continue; 1332 - case 5: 1333 - if (val == 0) 1334 - complete_threshold = COMPLETE_THRESHOLD; 1335 - else 1336 - complete_threshold = val; 1337 - continue; 1338 - case 6: 1339 - if (val == 0) 1340 - congested_response_us = CONGESTED_RESPONSE_US; 1341 - else 1342 - congested_response_us = val; 1343 - continue; 1344 - case 7: 1345 - if (val == 0) 1346 - congested_reps = CONGESTED_REPS; 1347 - else 1348 - congested_reps = val; 1349 - continue; 1350 - case 8: 1351 - if (val == 0) 1352 - congested_period = CONGESTED_PERIOD; 1353 - else 1354 - congested_period = val; 1290 + *tunables[cnt].tunp = val; 1355 1291 continue; 1356 1292 } 1357 1293 if (q == p) 1358 1294 break; 1359 1295 } 1296 + return 0; 1297 + } 1298 + 1299 + /* 1300 + * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) 1301 + */ 1302 + static ssize_t tunables_write(struct file *file, const char __user *user, 1303 + size_t count, loff_t *data) 1304 + { 1305 + int cpu; 1306 + int ret; 1307 + char instr[100]; 1308 + struct bau_control *bcp; 1309 + 1310 + if (count == 0 || count > sizeof(instr)-1) 1311 + return -EINVAL; 1312 + if (copy_from_user(instr, user, count)) 1313 + return -EFAULT; 1314 + 1315 + instr[count] = '\0'; 1316 + 1317 + bcp = &per_cpu(bau_control, smp_processor_id()); 1318 + 1319 + ret = parse_tunables_write(bcp, instr, count); 1320 + if (ret) 1321 + return ret; 1322 + 1360 1323 for_each_present_cpu(cpu) { 1361 1324 bcp = &per_cpu(bau_control, cpu); 1362 - bcp->max_bau_concurrent = max_bau_concurrent; 1363 - bcp->max_bau_concurrent_constant = max_bau_concurrent; 1364 - bcp->plugged_delay = plugged_delay; 1365 - bcp->plugsb4reset = plugsb4reset; 1366 - bcp->timeoutsb4reset = timeoutsb4reset; 1367 - bcp->ipi_reset_limit = ipi_reset_limit; 1368 - bcp->complete_threshold = complete_threshold; 1369 - bcp->congested_response_us = congested_response_us; 1370 - bcp->congested_reps = congested_reps; 1371 - bcp->congested_period = congested_period; 1325 + bcp->max_concurr = max_concurr; 1326 + bcp->max_concurr_const = max_concurr; 1327 + bcp->plugged_delay = plugged_delay; 1328 + bcp->plugsb4reset = plugsb4reset; 1329 + bcp->timeoutsb4reset = timeoutsb4reset; 1330 + bcp->ipi_reset_limit = ipi_reset_limit; 1331 + bcp->complete_threshold = complete_threshold; 1332 + bcp->cong_response_us = congested_respns_us; 1333 + bcp->cong_reps = congested_reps; 1334 + bcp->cong_period = congested_period; 1372 1335 } 1373 1336 return count; 1374 1337 } 1375 1338 1376 1339 static const struct seq_operations uv_ptc_seq_ops = { 1377 - .start = uv_ptc_seq_start, 1378 - .next = uv_ptc_seq_next, 1379 - .stop = uv_ptc_seq_stop, 1380 - .show = uv_ptc_seq_show 1340 + .start = ptc_seq_start, 1341 + .next = ptc_seq_next, 1342 + .stop = ptc_seq_stop, 1343 + .show = ptc_seq_show 1381 1344 }; 1382 1345 1383 - static int uv_ptc_proc_open(struct inode *inode, struct file *file) 1346 + static int ptc_proc_open(struct inode *inode, struct file *file) 1384 1347 { 1385 1348 return seq_open(file, &uv_ptc_seq_ops); 1386 1349 } ··· 1374 1371 } 1375 1372 1376 1373 static const struct file_operations proc_uv_ptc_operations = { 1377 - .open = uv_ptc_proc_open, 1374 + .open = ptc_proc_open, 1378 1375 .read = seq_read, 1379 - .write = uv_ptc_proc_write, 1376 + .write = ptc_proc_write, 1380 1377 .llseek = seq_lseek, 1381 1378 .release = seq_release, 1382 1379 }; ··· 1410 1407 return -EINVAL; 1411 1408 } 1412 1409 tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, 1413 - tunables_dir, NULL, &tunables_fops); 1410 + tunables_dir, NULL, &tunables_fops); 1414 1411 if (!tunables_file) { 1415 1412 printk(KERN_ERR "unable to create debugfs file %s\n", 1416 1413 UV_BAU_TUNABLES_FILE); ··· 1422 1419 /* 1423 1420 * Initialize the sending side's sending buffers. 1424 1421 */ 1425 - static void 1426 - uv_activation_descriptor_init(int node, int pnode, int base_pnode) 1422 + static void activation_descriptor_init(int node, int pnode, int base_pnode) 1427 1423 { 1428 1424 int i; 1429 1425 int cpu; 1430 1426 unsigned long pa; 1431 1427 unsigned long m; 1432 1428 unsigned long n; 1429 + size_t dsize; 1433 1430 struct bau_desc *bau_desc; 1434 1431 struct bau_desc *bd2; 1435 1432 struct bau_control *bcp; 1436 1433 1437 1434 /* 1438 - * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1439 - * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) 1435 + * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) 1436 + * per cpu; and one per cpu on the uvhub (ADP_SZ) 1440 1437 */ 1441 - bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1442 - * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 1438 + dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; 1439 + bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); 1443 1440 BUG_ON(!bau_desc); 1444 1441 1445 1442 pa = uv_gpa(bau_desc); /* need the real nasid*/ ··· 1447 1444 m = pa & uv_mmask; 1448 1445 1449 1446 /* the 14-bit pnode */ 1450 - uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 1451 - (n << UV_DESC_BASE_PNODE_SHIFT | m)); 1447 + write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); 1452 1448 /* 1453 - * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1449 + * Initializing all 8 (ITEMS_PER_DESC) descriptors for each 1454 1450 * cpu even though we only use the first one; one descriptor can 1455 1451 * describe a broadcast to 256 uv hubs. 1456 1452 */ 1457 - for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); 1458 - i++, bd2++) { 1453 + for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { 1459 1454 memset(bd2, 0, sizeof(struct bau_desc)); 1460 - bd2->header.sw_ack_flag = 1; 1455 + bd2->header.swack_flag = 1; 1461 1456 /* 1462 1457 * The base_dest_nasid set in the message header is the nasid 1463 1458 * of the first uvhub in the partition. The bit map will 1464 1459 * indicate destination pnode numbers relative to that base. 1465 1460 * They may not be consecutive if nasid striding is being used. 1466 1461 */ 1467 - bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); 1468 - bd2->header.dest_subnodeid = UV_LB_SUBNODEID; 1469 - bd2->header.command = UV_NET_ENDPOINT_INTD; 1470 - bd2->header.int_both = 1; 1462 + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); 1463 + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; 1464 + bd2->header.command = UV_NET_ENDPOINT_INTD; 1465 + bd2->header.int_both = 1; 1471 1466 /* 1472 1467 * all others need to be set to zero: 1473 1468 * fairness chaining multilevel count replied_to ··· 1485 1484 * - node is first node (kernel memory notion) on the uvhub 1486 1485 * - pnode is the uvhub's physical identifier 1487 1486 */ 1488 - static void 1489 - uv_payload_queue_init(int node, int pnode) 1487 + static void pq_init(int node, int pnode) 1490 1488 { 1491 - int pn; 1492 1489 int cpu; 1490 + size_t plsize; 1493 1491 char *cp; 1494 - unsigned long pa; 1495 - struct bau_payload_queue_entry *pqp; 1496 - struct bau_payload_queue_entry *pqp_malloc; 1492 + void *vp; 1493 + unsigned long pn; 1494 + unsigned long first; 1495 + unsigned long pn_first; 1496 + unsigned long last; 1497 + struct bau_pq_entry *pqp; 1497 1498 struct bau_control *bcp; 1498 1499 1499 - pqp = kmalloc_node((DEST_Q_SIZE + 1) 1500 - * sizeof(struct bau_payload_queue_entry), 1501 - GFP_KERNEL, node); 1500 + plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); 1501 + vp = kmalloc_node(plsize, GFP_KERNEL, node); 1502 + pqp = (struct bau_pq_entry *)vp; 1502 1503 BUG_ON(!pqp); 1503 - pqp_malloc = pqp; 1504 1504 1505 1505 cp = (char *)pqp + 31; 1506 - pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); 1506 + pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); 1507 1507 1508 1508 for_each_present_cpu(cpu) { 1509 1509 if (pnode != uv_cpu_to_pnode(cpu)) 1510 1510 continue; 1511 1511 /* for every cpu on this pnode: */ 1512 1512 bcp = &per_cpu(bau_control, cpu); 1513 - bcp->va_queue_first = pqp; 1514 - bcp->bau_msg_head = pqp; 1515 - bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); 1513 + bcp->queue_first = pqp; 1514 + bcp->bau_msg_head = pqp; 1515 + bcp->queue_last = pqp + (DEST_Q_SIZE - 1); 1516 1516 } 1517 1517 /* 1518 1518 * need the pnode of where the memory was really allocated 1519 1519 */ 1520 - pa = uv_gpa(pqp); 1521 - pn = pa >> uv_nshift; 1522 - uv_write_global_mmr64(pnode, 1523 - UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 1524 - ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 1525 - uv_physnodeaddr(pqp)); 1526 - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, 1527 - uv_physnodeaddr(pqp)); 1528 - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, 1529 - (unsigned long) 1530 - uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); 1520 + pn = uv_gpa(pqp) >> uv_nshift; 1521 + first = uv_physnodeaddr(pqp); 1522 + pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; 1523 + last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); 1524 + write_mmr_payload_first(pnode, pn_first); 1525 + write_mmr_payload_tail(pnode, first); 1526 + write_mmr_payload_last(pnode, last); 1527 + 1531 1528 /* in effect, all msg_type's are set to MSG_NOOP */ 1532 - memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); 1529 + memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); 1533 1530 } 1534 1531 1535 1532 /* 1536 1533 * Initialization of each UV hub's structures 1537 1534 */ 1538 - static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) 1535 + static void __init init_uvhub(int uvhub, int vector, int base_pnode) 1539 1536 { 1540 1537 int node; 1541 1538 int pnode; ··· 1541 1542 1542 1543 node = uvhub_to_first_node(uvhub); 1543 1544 pnode = uv_blade_to_pnode(uvhub); 1544 - uv_activation_descriptor_init(node, pnode, base_pnode); 1545 - uv_payload_queue_init(node, pnode); 1545 + 1546 + activation_descriptor_init(node, pnode, base_pnode); 1547 + 1548 + pq_init(node, pnode); 1546 1549 /* 1547 1550 * The below initialization can't be in firmware because the 1548 1551 * messaging IRQ will be determined by the OS. 1549 1552 */ 1550 1553 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; 1551 - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1552 - ((apicid << 32) | vector)); 1554 + write_mmr_data_config(pnode, ((apicid << 32) | vector)); 1553 1555 } 1554 1556 1555 1557 /* 1556 1558 * We will set BAU_MISC_CONTROL with a timeout period. 1557 1559 * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. 1558 - * So the destination timeout period has be be calculated from them. 1560 + * So the destination timeout period has to be calculated from them. 1559 1561 */ 1560 - static int 1561 - calculate_destination_timeout(void) 1562 + static int calculate_destination_timeout(void) 1562 1563 { 1563 1564 unsigned long mmr_image; 1564 1565 int mult1; ··· 1569 1570 unsigned long ts_ns; 1570 1571 1571 1572 if (is_uv1_hub()) { 1572 - mult1 = UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD & 1573 - BAU_MISC_CONTROL_MULT_MASK; 1573 + mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; 1574 1574 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); 1575 1575 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; 1576 1576 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); ··· 1581 1583 /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ 1582 1584 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); 1583 1585 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; 1584 - if (mmr_image & ((unsigned long)1 << UV2_ACK_UNITS_SHFT)) 1586 + if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) 1585 1587 mult1 = 80; 1586 1588 else 1587 1589 mult1 = 10; ··· 1591 1593 return ret; 1592 1594 } 1593 1595 1594 - /* 1595 - * initialize the bau_control structure for each cpu 1596 - */ 1597 - static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) 1596 + static void __init init_per_cpu_tunables(void) 1598 1597 { 1599 - int i; 1600 1598 int cpu; 1601 - int tcpu; 1599 + struct bau_control *bcp; 1600 + 1601 + for_each_present_cpu(cpu) { 1602 + bcp = &per_cpu(bau_control, cpu); 1603 + bcp->baudisabled = 0; 1604 + bcp->statp = &per_cpu(ptcstats, cpu); 1605 + /* time interval to catch a hardware stay-busy bug */ 1606 + bcp->timeout_interval = usec_2_cycles(2*timeout_us); 1607 + bcp->max_concurr = max_concurr; 1608 + bcp->max_concurr_const = max_concurr; 1609 + bcp->plugged_delay = plugged_delay; 1610 + bcp->plugsb4reset = plugsb4reset; 1611 + bcp->timeoutsb4reset = timeoutsb4reset; 1612 + bcp->ipi_reset_limit = ipi_reset_limit; 1613 + bcp->complete_threshold = complete_threshold; 1614 + bcp->cong_response_us = congested_respns_us; 1615 + bcp->cong_reps = congested_reps; 1616 + bcp->cong_period = congested_period; 1617 + } 1618 + } 1619 + 1620 + /* 1621 + * Scan all cpus to collect blade and socket summaries. 1622 + */ 1623 + static int __init get_cpu_topology(int base_pnode, 1624 + struct uvhub_desc *uvhub_descs, 1625 + unsigned char *uvhub_mask) 1626 + { 1627 + int cpu; 1602 1628 int pnode; 1603 1629 int uvhub; 1604 - int have_hmaster; 1605 - short socket = 0; 1606 - unsigned short socket_mask; 1607 - unsigned char *uvhub_mask; 1630 + int socket; 1608 1631 struct bau_control *bcp; 1609 1632 struct uvhub_desc *bdp; 1610 1633 struct socket_desc *sdp; 1611 - struct bau_control *hmaster = NULL; 1612 - struct bau_control *smaster = NULL; 1613 - struct socket_desc { 1614 - short num_cpus; 1615 - short cpu_number[MAX_CPUS_PER_SOCKET]; 1616 - }; 1617 - struct uvhub_desc { 1618 - unsigned short socket_mask; 1619 - short num_cpus; 1620 - short uvhub; 1621 - short pnode; 1622 - struct socket_desc socket[2]; 1623 - }; 1624 - struct uvhub_desc *uvhub_descs; 1625 1634 1626 - timeout_us = calculate_destination_timeout(); 1627 - 1628 - uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); 1629 - memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); 1630 - uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); 1631 1635 for_each_present_cpu(cpu) { 1632 1636 bcp = &per_cpu(bau_control, cpu); 1637 + 1633 1638 memset(bcp, 0, sizeof(struct bau_control)); 1639 + 1634 1640 pnode = uv_cpu_hub_info(cpu)->pnode; 1635 - if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { 1641 + if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { 1636 1642 printk(KERN_EMERG 1637 1643 "cpu %d pnode %d-%d beyond %d; BAU disabled\n", 1638 - cpu, pnode, base_part_pnode, 1639 - UV_DISTRIBUTION_SIZE); 1644 + cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); 1640 1645 return 1; 1641 1646 } 1647 + 1642 1648 bcp->osnode = cpu_to_node(cpu); 1643 - bcp->partition_base_pnode = uv_partition_base_pnode; 1649 + bcp->partition_base_pnode = base_pnode; 1650 + 1644 1651 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1645 1652 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); 1646 1653 bdp = &uvhub_descs[uvhub]; 1654 + 1647 1655 bdp->num_cpus++; 1648 1656 bdp->uvhub = uvhub; 1649 1657 bdp->pnode = pnode; 1658 + 1650 1659 /* kludge: 'assuming' one node per socket, and assuming that 1651 1660 disabling a socket just leaves a gap in node numbers */ 1652 1661 socket = bcp->osnode & 1; ··· 1662 1657 sdp->cpu_number[sdp->num_cpus] = cpu; 1663 1658 sdp->num_cpus++; 1664 1659 if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { 1665 - printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); 1660 + printk(KERN_EMERG "%d cpus per socket invalid\n", 1661 + sdp->num_cpus); 1666 1662 return 1; 1667 1663 } 1668 1664 } 1665 + return 0; 1666 + } 1667 + 1668 + /* 1669 + * Each socket is to get a local array of pnodes/hubs. 1670 + */ 1671 + static void make_per_cpu_thp(struct bau_control *smaster) 1672 + { 1673 + int cpu; 1674 + size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); 1675 + 1676 + smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); 1677 + memset(smaster->thp, 0, hpsz); 1678 + for_each_present_cpu(cpu) { 1679 + smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; 1680 + smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1681 + } 1682 + } 1683 + 1684 + /* 1685 + * Initialize all the per_cpu information for the cpu's on a given socket, 1686 + * given what has been gathered into the socket_desc struct. 1687 + * And reports the chosen hub and socket masters back to the caller. 1688 + */ 1689 + static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, 1690 + struct bau_control **smasterp, 1691 + struct bau_control **hmasterp) 1692 + { 1693 + int i; 1694 + int cpu; 1695 + struct bau_control *bcp; 1696 + 1697 + for (i = 0; i < sdp->num_cpus; i++) { 1698 + cpu = sdp->cpu_number[i]; 1699 + bcp = &per_cpu(bau_control, cpu); 1700 + bcp->cpu = cpu; 1701 + if (i == 0) { 1702 + *smasterp = bcp; 1703 + if (!(*hmasterp)) 1704 + *hmasterp = bcp; 1705 + } 1706 + bcp->cpus_in_uvhub = bdp->num_cpus; 1707 + bcp->cpus_in_socket = sdp->num_cpus; 1708 + bcp->socket_master = *smasterp; 1709 + bcp->uvhub = bdp->uvhub; 1710 + bcp->uvhub_master = *hmasterp; 1711 + bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 1712 + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1713 + printk(KERN_EMERG "%d cpus per uvhub invalid\n", 1714 + bcp->uvhub_cpu); 1715 + return 1; 1716 + } 1717 + } 1718 + return 0; 1719 + } 1720 + 1721 + /* 1722 + * Summarize the blade and socket topology into the per_cpu structures. 1723 + */ 1724 + static int __init summarize_uvhub_sockets(int nuvhubs, 1725 + struct uvhub_desc *uvhub_descs, 1726 + unsigned char *uvhub_mask) 1727 + { 1728 + int socket; 1729 + int uvhub; 1730 + unsigned short socket_mask; 1731 + 1669 1732 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1733 + struct uvhub_desc *bdp; 1734 + struct bau_control *smaster = NULL; 1735 + struct bau_control *hmaster = NULL; 1736 + 1670 1737 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) 1671 1738 continue; 1672 - have_hmaster = 0; 1739 + 1673 1740 bdp = &uvhub_descs[uvhub]; 1674 1741 socket_mask = bdp->socket_mask; 1675 1742 socket = 0; 1676 1743 while (socket_mask) { 1677 - if (!(socket_mask & 1)) 1678 - goto nextsocket; 1679 - sdp = &bdp->socket[socket]; 1680 - for (i = 0; i < sdp->num_cpus; i++) { 1681 - cpu = sdp->cpu_number[i]; 1682 - bcp = &per_cpu(bau_control, cpu); 1683 - bcp->cpu = cpu; 1684 - if (i == 0) { 1685 - smaster = bcp; 1686 - if (!have_hmaster) { 1687 - have_hmaster++; 1688 - hmaster = bcp; 1689 - } 1690 - } 1691 - bcp->cpus_in_uvhub = bdp->num_cpus; 1692 - bcp->cpus_in_socket = sdp->num_cpus; 1693 - bcp->socket_master = smaster; 1694 - bcp->uvhub = bdp->uvhub; 1695 - bcp->uvhub_master = hmaster; 1696 - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1697 - blade_processor_id; 1698 - if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1699 - printk(KERN_EMERG 1700 - "%d cpus per uvhub invalid\n", 1701 - bcp->uvhub_cpu); 1744 + struct socket_desc *sdp; 1745 + if ((socket_mask & 1)) { 1746 + sdp = &bdp->socket[socket]; 1747 + if (scan_sock(sdp, bdp, &smaster, &hmaster)) 1702 1748 return 1; 1703 - } 1704 1749 } 1705 - nextsocket: 1706 1750 socket++; 1707 1751 socket_mask = (socket_mask >> 1); 1708 - /* each socket gets a local array of pnodes/hubs */ 1709 - bcp = smaster; 1710 - bcp->target_hub_and_pnode = kmalloc_node( 1711 - sizeof(struct hub_and_pnode) * 1712 - num_possible_cpus(), GFP_KERNEL, bcp->osnode); 1713 - memset(bcp->target_hub_and_pnode, 0, 1714 - sizeof(struct hub_and_pnode) * 1715 - num_possible_cpus()); 1716 - for_each_present_cpu(tcpu) { 1717 - bcp->target_hub_and_pnode[tcpu].pnode = 1718 - uv_cpu_hub_info(tcpu)->pnode; 1719 - bcp->target_hub_and_pnode[tcpu].uvhub = 1720 - uv_cpu_hub_info(tcpu)->numa_blade_id; 1721 - } 1752 + make_per_cpu_thp(smaster); 1722 1753 } 1723 1754 } 1755 + return 0; 1756 + } 1757 + 1758 + /* 1759 + * initialize the bau_control structure for each cpu 1760 + */ 1761 + static int __init init_per_cpu(int nuvhubs, int base_part_pnode) 1762 + { 1763 + unsigned char *uvhub_mask; 1764 + void *vp; 1765 + struct uvhub_desc *uvhub_descs; 1766 + 1767 + timeout_us = calculate_destination_timeout(); 1768 + 1769 + vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); 1770 + uvhub_descs = (struct uvhub_desc *)vp; 1771 + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); 1772 + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); 1773 + 1774 + if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) 1775 + return 1; 1776 + 1777 + if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) 1778 + return 1; 1779 + 1724 1780 kfree(uvhub_descs); 1725 1781 kfree(uvhub_mask); 1726 - for_each_present_cpu(cpu) { 1727 - bcp = &per_cpu(bau_control, cpu); 1728 - bcp->baudisabled = 0; 1729 - bcp->statp = &per_cpu(ptcstats, cpu); 1730 - /* time interval to catch a hardware stay-busy bug */ 1731 - bcp->timeout_interval = microsec_2_cycles(2*timeout_us); 1732 - bcp->max_bau_concurrent = max_bau_concurrent; 1733 - bcp->max_bau_concurrent_constant = max_bau_concurrent; 1734 - bcp->plugged_delay = plugged_delay; 1735 - bcp->plugsb4reset = plugsb4reset; 1736 - bcp->timeoutsb4reset = timeoutsb4reset; 1737 - bcp->ipi_reset_limit = ipi_reset_limit; 1738 - bcp->complete_threshold = complete_threshold; 1739 - bcp->congested_response_us = congested_response_us; 1740 - bcp->congested_reps = congested_reps; 1741 - bcp->congested_period = congested_period; 1742 - } 1782 + init_per_cpu_tunables(); 1743 1783 return 0; 1744 1784 } 1745 1785 ··· 1797 1747 int pnode; 1798 1748 int nuvhubs; 1799 1749 int cur_cpu; 1750 + int cpus; 1800 1751 int vector; 1801 - unsigned long mmr; 1752 + cpumask_var_t *mask; 1802 1753 1803 1754 if (!is_uv_system()) 1804 1755 return 0; ··· 1807 1756 if (nobau) 1808 1757 return 0; 1809 1758 1810 - for_each_possible_cpu(cur_cpu) 1811 - zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), 1812 - GFP_KERNEL, cpu_to_node(cur_cpu)); 1759 + for_each_possible_cpu(cur_cpu) { 1760 + mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); 1761 + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); 1762 + } 1813 1763 1814 1764 uv_nshift = uv_hub_info->m_val; 1815 1765 uv_mmask = (1UL << uv_hub_info->m_val) - 1; 1816 1766 nuvhubs = uv_num_possible_blades(); 1817 1767 spin_lock_init(&disable_lock); 1818 - congested_cycles = microsec_2_cycles(congested_response_us); 1768 + congested_cycles = usec_2_cycles(congested_respns_us); 1819 1769 1820 - uv_partition_base_pnode = 0x7fffffff; 1770 + uv_base_pnode = 0x7fffffff; 1821 1771 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1822 - if (uv_blade_nr_possible_cpus(uvhub) && 1823 - (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 1824 - uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 1772 + cpus = uv_blade_nr_possible_cpus(uvhub); 1773 + if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) 1774 + uv_base_pnode = uv_blade_to_pnode(uvhub); 1825 1775 } 1826 1776 1827 - if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { 1777 + if (init_per_cpu(nuvhubs, uv_base_pnode)) { 1828 1778 nobau = 1; 1829 1779 return 0; 1830 1780 } ··· 1833 1781 vector = UV_BAU_MESSAGE; 1834 1782 for_each_possible_blade(uvhub) 1835 1783 if (uv_blade_nr_possible_cpus(uvhub)) 1836 - uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); 1784 + init_uvhub(uvhub, vector, uv_base_pnode); 1837 1785 1838 - uv_enable_timeouts(); 1786 + enable_timeouts(); 1839 1787 alloc_intr_gate(vector, uv_bau_message_intr1); 1840 1788 1841 1789 for_each_possible_blade(uvhub) { 1842 1790 if (uv_blade_nr_possible_cpus(uvhub)) { 1791 + unsigned long val; 1792 + unsigned long mmr; 1843 1793 pnode = uv_blade_to_pnode(uvhub); 1844 1794 /* INIT the bau */ 1845 - uv_write_global_mmr64(pnode, 1846 - UVH_LB_BAU_SB_ACTIVATION_CONTROL, 1847 - ((unsigned long)1 << 63)); 1795 + val = 1L << 63; 1796 + write_gmmr_activation(pnode, val); 1848 1797 mmr = 1; /* should be 1 to broadcast to both sockets */ 1849 - uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, 1850 - mmr); 1798 + write_mmr_data_broadcast(pnode, mmr); 1851 1799 } 1852 1800 } 1853 1801