Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs fixes from Dave Chinner:
"Fixes for low memory perforamnce regressions and a quota inode
handling regression.

These are regression fixes for issues recently introduced - the change
in the stack switch location is fairly important, so I've held off
sending this update until I was sure that it still addresses the stack
usage problem the original solved. So while the commits in the xfs
tree are recent, it has been under tested for several weeks now"

* tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs:
xfs: null unused quota inodes when quota is on
xfs: refine the allocation stack switch
Revert "xfs: block allocation work needs to be kswapd aware"

+106 -72
+2 -5
fs/xfs/xfs_bmap.c
··· 4298 4298 } 4299 4299 4300 4300 4301 - int 4302 - __xfs_bmapi_allocate( 4301 + static int 4302 + xfs_bmapi_allocate( 4303 4303 struct xfs_bmalloca *bma) 4304 4304 { 4305 4305 struct xfs_mount *mp = bma->ip->i_mount; ··· 4577 4577 bma.userdata = 0; 4578 4578 bma.flist = flist; 4579 4579 bma.firstblock = firstblock; 4580 - 4581 - if (flags & XFS_BMAPI_STACK_SWITCH) 4582 - bma.stack_switch = 1; 4583 4580 4584 4581 while (bno < end && n < *nmap) { 4585 4582 inhole = eof || bma.got.br_startoff > bno;
+1 -3
fs/xfs/xfs_bmap.h
··· 77 77 * from written to unwritten, otherwise convert from unwritten to written. 78 78 */ 79 79 #define XFS_BMAPI_CONVERT 0x040 80 - #define XFS_BMAPI_STACK_SWITCH 0x080 81 80 82 81 #define XFS_BMAPI_FLAGS \ 83 82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ ··· 85 86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 86 87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 87 88 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 88 - { XFS_BMAPI_CONVERT, "CONVERT" }, \ 89 - { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } 89 + { XFS_BMAPI_CONVERT, "CONVERT" } 90 90 91 91 92 92 static inline int xfs_bmapi_aflag(int w)
-53
fs/xfs/xfs_bmap_util.c
··· 249 249 } 250 250 251 251 /* 252 - * Stack switching interfaces for allocation 253 - */ 254 - static void 255 - xfs_bmapi_allocate_worker( 256 - struct work_struct *work) 257 - { 258 - struct xfs_bmalloca *args = container_of(work, 259 - struct xfs_bmalloca, work); 260 - unsigned long pflags; 261 - unsigned long new_pflags = PF_FSTRANS; 262 - 263 - /* 264 - * we are in a transaction context here, but may also be doing work 265 - * in kswapd context, and hence we may need to inherit that state 266 - * temporarily to ensure that we don't block waiting for memory reclaim 267 - * in any way. 268 - */ 269 - if (args->kswapd) 270 - new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 271 - 272 - current_set_flags_nested(&pflags, new_pflags); 273 - 274 - args->result = __xfs_bmapi_allocate(args); 275 - complete(args->done); 276 - 277 - current_restore_flags_nested(&pflags, new_pflags); 278 - } 279 - 280 - /* 281 - * Some allocation requests often come in with little stack to work on. Push 282 - * them off to a worker thread so there is lots of stack to use. Otherwise just 283 - * call directly to avoid the context switch overhead here. 284 - */ 285 - int 286 - xfs_bmapi_allocate( 287 - struct xfs_bmalloca *args) 288 - { 289 - DECLARE_COMPLETION_ONSTACK(done); 290 - 291 - if (!args->stack_switch) 292 - return __xfs_bmapi_allocate(args); 293 - 294 - 295 - args->done = &done; 296 - args->kswapd = current_is_kswapd(); 297 - INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); 298 - queue_work(xfs_alloc_wq, &args->work); 299 - wait_for_completion(&done); 300 - destroy_work_on_stack(&args->work); 301 - return args->result; 302 - } 303 - 304 - /* 305 252 * Check if the endoff is outside the last extent. If so the caller will grow 306 253 * the allocation to a stripe unit boundary. All offsets are considered outside 307 254 * the end of file for an empty fork, so 1 is returned in *eof in that case.
-4
fs/xfs/xfs_bmap_util.h
··· 55 55 bool userdata;/* set if is user data */ 56 56 bool aeof; /* allocated space at eof */ 57 57 bool conv; /* overwriting unwritten extents */ 58 - bool stack_switch; 59 - bool kswapd; /* allocation in kswapd context */ 60 58 int flags; 61 59 struct completion *done; 62 60 struct work_struct work; ··· 64 66 int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, 65 67 int *committed); 66 68 int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); 67 - int xfs_bmapi_allocate(struct xfs_bmalloca *args); 68 - int __xfs_bmapi_allocate(struct xfs_bmalloca *args); 69 69 int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, 70 70 int whichfork, int *eof); 71 71 int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+81 -1
fs/xfs/xfs_btree.c
··· 33 33 #include "xfs_error.h" 34 34 #include "xfs_trace.h" 35 35 #include "xfs_cksum.h" 36 + #include "xfs_alloc.h" 36 37 37 38 /* 38 39 * Cursor allocation zone. ··· 2324 2323 * record (to be inserted into parent). 2325 2324 */ 2326 2325 STATIC int /* error */ 2327 - xfs_btree_split( 2326 + __xfs_btree_split( 2328 2327 struct xfs_btree_cur *cur, 2329 2328 int level, 2330 2329 union xfs_btree_ptr *ptrp, ··· 2503 2502 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2504 2503 return error; 2505 2504 } 2505 + 2506 + struct xfs_btree_split_args { 2507 + struct xfs_btree_cur *cur; 2508 + int level; 2509 + union xfs_btree_ptr *ptrp; 2510 + union xfs_btree_key *key; 2511 + struct xfs_btree_cur **curp; 2512 + int *stat; /* success/failure */ 2513 + int result; 2514 + bool kswapd; /* allocation in kswapd context */ 2515 + struct completion *done; 2516 + struct work_struct work; 2517 + }; 2518 + 2519 + /* 2520 + * Stack switching interfaces for allocation 2521 + */ 2522 + static void 2523 + xfs_btree_split_worker( 2524 + struct work_struct *work) 2525 + { 2526 + struct xfs_btree_split_args *args = container_of(work, 2527 + struct xfs_btree_split_args, work); 2528 + unsigned long pflags; 2529 + unsigned long new_pflags = PF_FSTRANS; 2530 + 2531 + /* 2532 + * we are in a transaction context here, but may also be doing work 2533 + * in kswapd context, and hence we may need to inherit that state 2534 + * temporarily to ensure that we don't block waiting for memory reclaim 2535 + * in any way. 2536 + */ 2537 + if (args->kswapd) 2538 + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2539 + 2540 + current_set_flags_nested(&pflags, new_pflags); 2541 + 2542 + args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2543 + args->key, args->curp, args->stat); 2544 + complete(args->done); 2545 + 2546 + current_restore_flags_nested(&pflags, new_pflags); 2547 + } 2548 + 2549 + /* 2550 + * BMBT split requests often come in with little stack to work on. Push 2551 + * them off to a worker thread so there is lots of stack to use. For the other 2552 + * btree types, just call directly to avoid the context switch overhead here. 2553 + */ 2554 + STATIC int /* error */ 2555 + xfs_btree_split( 2556 + struct xfs_btree_cur *cur, 2557 + int level, 2558 + union xfs_btree_ptr *ptrp, 2559 + union xfs_btree_key *key, 2560 + struct xfs_btree_cur **curp, 2561 + int *stat) /* success/failure */ 2562 + { 2563 + struct xfs_btree_split_args args; 2564 + DECLARE_COMPLETION_ONSTACK(done); 2565 + 2566 + if (cur->bc_btnum != XFS_BTNUM_BMAP) 2567 + return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2568 + 2569 + args.cur = cur; 2570 + args.level = level; 2571 + args.ptrp = ptrp; 2572 + args.key = key; 2573 + args.curp = curp; 2574 + args.stat = stat; 2575 + args.done = &done; 2576 + args.kswapd = current_is_kswapd(); 2577 + INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2578 + queue_work(xfs_alloc_wq, &args.work); 2579 + wait_for_completion(&done); 2580 + destroy_work_on_stack(&args.work); 2581 + return args.result; 2582 + } 2583 + 2506 2584 2507 2585 /* 2508 2586 * Copy the old inode root contents into a real block and make the
+1 -2
fs/xfs/xfs_iomap.c
··· 749 749 * pointer that the caller gave to us. 750 750 */ 751 751 error = xfs_bmapi_write(tp, ip, map_start_fsb, 752 - count_fsb, 753 - XFS_BMAPI_STACK_SWITCH, 752 + count_fsb, 0, 754 753 &first_block, 1, 755 754 imap, &nimaps, &free_list); 756 755 if (error)
+21 -4
fs/xfs/xfs_sb.c
··· 483 483 } 484 484 485 485 /* 486 - * GQUOTINO and PQUOTINO cannot be used together in versions 487 - * of superblock that do not have pquotino. from->sb_flags 488 - * tells us which quota is active and should be copied to 489 - * disk. 486 + * GQUOTINO and PQUOTINO cannot be used together in versions of 487 + * superblock that do not have pquotino. from->sb_flags tells us which 488 + * quota is active and should be copied to disk. If neither are active, 489 + * make sure we write NULLFSINO to the sb_gquotino field as a quota 490 + * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature 491 + * bit is set. 492 + * 493 + * Note that we don't need to handle the sb_uquotino or sb_pquotino here 494 + * as they do not require any translation. Hence the main sb field loop 495 + * will write them appropriately from the in-core superblock. 490 496 */ 491 497 if ((*fields & XFS_SB_GQUOTINO) && 492 498 (from->sb_qflags & XFS_GQUOTA_ACCT)) ··· 500 494 else if ((*fields & XFS_SB_PQUOTINO) && 501 495 (from->sb_qflags & XFS_PQUOTA_ACCT)) 502 496 to->sb_gquotino = cpu_to_be64(from->sb_pquotino); 497 + else { 498 + /* 499 + * We can't rely on just the fields being logged to tell us 500 + * that it is safe to write NULLFSINO - we should only do that 501 + * if quotas are not actually enabled. Hence only write 502 + * NULLFSINO if both in-core quota inodes are NULL. 503 + */ 504 + if (from->sb_gquotino == NULLFSINO && 505 + from->sb_pquotino == NULLFSINO) 506 + to->sb_gquotino = cpu_to_be64(NULLFSINO); 507 + } 503 508 504 509 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); 505 510 }