taskstats: pad taskstats netlink response for aligment issues on ia64

The taskstats structure is internally aligned on 8 byte boundaries but the
layout of the aggregrate reply, with two NLA headers and the pid (each 4
bytes), actually force the entire structure to be unaligned. This causes
the kernel to issue unaligned access warnings on some architectures like
ia64. Unfortunately, some software out there doesn't properly unroll the
NLA packet and assumes that the start of the taskstats structure will
always be 20 bytes from the start of the netlink payload. Aligning the
start of the taskstats structure breaks this software, which we don't
want. So, for now the alignment only happens on architectures that
require it and those users will have to update to fixed versions of those
packages. Space is reserved in the packet only when needed. This ifdef
should be removed in several years e.g. 2012 once we can be confident
that fixed versions are installed on most systems. We add the padding
before the aggregate since the aggregate is already a defined type.

Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems")
previously addressed the alignment issues by padding out the pid field.
This was supposed to be a compatible change but the circumstances
described above mean that it wasn't. This patch backs out that change,
since it was a hack, and introduces a new NULL attribute type to provide
the padding. Padding the response with 4 bytes avoids allocating an
aligned taskstats structure and copying it back. Since the structure
weighs in at 328 bytes, it's too big to do it on the stack.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reported-by: Brian Rogers <brian@xyzw.org>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Guillaume Chazarain <guichaz@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Jeff Mahoney and committed by Linus Torvalds 4be2c95d 4e06fd14

+47 -14
+1
Documentation/accounting/getdelays.c
··· 516 default: 517 fprintf(stderr, "Unknown nla_type %d\n", 518 na->nla_type); 519 break; 520 } 521 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
··· 516 default: 517 fprintf(stderr, "Unknown nla_type %d\n", 518 na->nla_type); 519 + case TASKSTATS_TYPE_NULL: 520 break; 521 } 522 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
+2 -1
include/linux/taskstats.h
··· 33 */ 34 35 36 - #define TASKSTATS_VERSION 7 37 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 38 * in linux/sched.h */ 39 ··· 188 TASKSTATS_TYPE_STATS, /* taskstats structure */ 189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ 190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ 191 __TASKSTATS_TYPE_MAX, 192 }; 193
··· 33 */ 34 35 36 + #define TASKSTATS_VERSION 8 37 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 38 * in linux/sched.h */ 39 ··· 188 TASKSTATS_TYPE_STATS, /* taskstats structure */ 189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ 190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ 191 + TASKSTATS_TYPE_NULL, /* contains nothing */ 192 __TASKSTATS_TYPE_MAX, 193 }; 194
+44 -13
kernel/taskstats.c
··· 349 return ret; 350 } 351 352 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 353 { 354 struct nlattr *na, *ret; 355 int aggr; 356 357 - /* If we don't pad, we end up with alignment on a 4 byte boundary. 358 - * This causes lots of runtime warnings on systems requiring 8 byte 359 - * alignment */ 360 - u32 pids[2] = { pid, 0 }; 361 - int pid_size = ALIGN(sizeof(pid), sizeof(long)); 362 - 363 aggr = (type == TASKSTATS_TYPE_PID) 364 ? TASKSTATS_TYPE_AGGR_PID 365 : TASKSTATS_TYPE_AGGR_TGID; 366 367 na = nla_nest_start(skb, aggr); 368 if (!na) 369 goto err; 370 - if (nla_put(skb, type, pid_size, pids) < 0) 371 goto err; 372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 373 if (!ret) ··· 478 return rc; 479 } 480 481 static int cmd_attr_pid(struct genl_info *info) 482 { 483 struct taskstats *stats; ··· 498 u32 pid; 499 int rc; 500 501 - size = nla_total_size(sizeof(u32)) + 502 - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 503 504 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 505 if (rc < 0) ··· 527 u32 tgid; 528 int rc; 529 530 - size = nla_total_size(sizeof(u32)) + 531 - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 532 533 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 534 if (rc < 0) ··· 602 /* 603 * Size includes space for nested attributes 604 */ 605 - size = nla_total_size(sizeof(u32)) + 606 - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 607 608 is_thread_group = !!taskstats_tgid_alloc(tsk); 609 if (is_thread_group) {
··· 349 return ret; 350 } 351 352 + #ifdef CONFIG_IA64 353 + #define TASKSTATS_NEEDS_PADDING 1 354 + #endif 355 + 356 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 357 { 358 struct nlattr *na, *ret; 359 int aggr; 360 361 aggr = (type == TASKSTATS_TYPE_PID) 362 ? TASKSTATS_TYPE_AGGR_PID 363 : TASKSTATS_TYPE_AGGR_TGID; 364 365 + /* 366 + * The taskstats structure is internally aligned on 8 byte 367 + * boundaries but the layout of the aggregrate reply, with 368 + * two NLA headers and the pid (each 4 bytes), actually 369 + * force the entire structure to be unaligned. This causes 370 + * the kernel to issue unaligned access warnings on some 371 + * architectures like ia64. Unfortunately, some software out there 372 + * doesn't properly unroll the NLA packet and assumes that the start 373 + * of the taskstats structure will always be 20 bytes from the start 374 + * of the netlink payload. Aligning the start of the taskstats 375 + * structure breaks this software, which we don't want. So, for now 376 + * the alignment only happens on architectures that require it 377 + * and those users will have to update to fixed versions of those 378 + * packages. Space is reserved in the packet only when needed. 379 + * This ifdef should be removed in several years e.g. 2012 once 380 + * we can be confident that fixed versions are installed on most 381 + * systems. We add the padding before the aggregate since the 382 + * aggregate is already a defined type. 383 + */ 384 + #ifdef TASKSTATS_NEEDS_PADDING 385 + if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) 386 + goto err; 387 + #endif 388 na = nla_nest_start(skb, aggr); 389 if (!na) 390 goto err; 391 + 392 + if (nla_put(skb, type, sizeof(pid), &pid) < 0) 393 goto err; 394 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 395 if (!ret) ··· 456 return rc; 457 } 458 459 + static size_t taskstats_packet_size(void) 460 + { 461 + size_t size; 462 + 463 + size = nla_total_size(sizeof(u32)) + 464 + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 465 + #ifdef TASKSTATS_NEEDS_PADDING 466 + size += nla_total_size(0); /* Padding for alignment */ 467 + #endif 468 + return size; 469 + } 470 + 471 static int cmd_attr_pid(struct genl_info *info) 472 { 473 struct taskstats *stats; ··· 464 u32 pid; 465 int rc; 466 467 + size = taskstats_packet_size(); 468 469 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 470 if (rc < 0) ··· 494 u32 tgid; 495 int rc; 496 497 + size = taskstats_packet_size(); 498 499 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 500 if (rc < 0) ··· 570 /* 571 * Size includes space for nested attributes 572 */ 573 + size = taskstats_packet_size(); 574 575 is_thread_group = !!taskstats_tgid_alloc(tsk); 576 if (is_thread_group) {