Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] O(1) sb list traversing on syncs

This patch removes O(n^2) super block loops in sync_inodes(),
sync_filesystems() etc. in favour of using __put_super_and_need_restart()
which I introduced earlier. We faced a noticably long freezes on sb
syncing when there are thousands of super blocks in the system.

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Kirill Korotaev and committed by
Linus Torvalds
618f0636 4fea2838

+96 -111
+27 -37
fs/fs-writeback.c
··· 485 485 spin_unlock(&sb_lock); 486 486 } 487 487 488 - /* 489 - * Find a superblock with inodes that need to be synced 490 - */ 491 - static struct super_block *get_super_to_sync(void) 492 - { 493 - struct super_block *sb; 494 - restart: 495 - spin_lock(&sb_lock); 496 - sb = sb_entry(super_blocks.prev); 497 - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { 498 - if (sb->s_syncing) 499 - continue; 500 - sb->s_syncing = 1; 501 - sb->s_count++; 502 - spin_unlock(&sb_lock); 503 - down_read(&sb->s_umount); 504 - if (!sb->s_root) { 505 - drop_super(sb); 506 - goto restart; 507 - } 508 - return sb; 509 - } 510 - spin_unlock(&sb_lock); 511 - return NULL; 512 - } 513 - 514 488 /** 515 489 * sync_inodes - writes all inodes to disk 516 490 * @wait: wait for completion ··· 504 530 * outstanding dirty inodes, the writeback goes block-at-a-time within the 505 531 * filesystem's write_inode(). This is extremely slow. 506 532 */ 507 - void sync_inodes(int wait) 533 + static void __sync_inodes(int wait) 508 534 { 509 535 struct super_block *sb; 510 536 511 - set_sb_syncing(0); 512 - while ((sb = get_super_to_sync()) != NULL) { 513 - sync_inodes_sb(sb, 0); 514 - sync_blockdev(sb->s_bdev); 515 - drop_super(sb); 537 + spin_lock(&sb_lock); 538 + restart: 539 + list_for_each_entry(sb, &super_blocks, s_list) { 540 + if (sb->s_syncing) 541 + continue; 542 + sb->s_syncing = 1; 543 + sb->s_count++; 544 + spin_unlock(&sb_lock); 545 + down_read(&sb->s_umount); 546 + if (sb->s_root) { 547 + sync_inodes_sb(sb, wait); 548 + sync_blockdev(sb->s_bdev); 549 + } 550 + up_read(&sb->s_umount); 551 + spin_lock(&sb_lock); 552 + if (__put_super_and_need_restart(sb)) 553 + goto restart; 516 554 } 555 + spin_unlock(&sb_lock); 556 + } 557 + 558 + void sync_inodes(int wait) 559 + { 560 + set_sb_syncing(0); 561 + __sync_inodes(0); 562 + 517 563 if (wait) { 518 564 set_sb_syncing(0); 519 - while ((sb = get_super_to_sync()) != NULL) { 520 - sync_inodes_sb(sb, 1); 521 - sync_blockdev(sb->s_bdev); 522 - drop_super(sb); 523 - } 565 + __sync_inodes(1); 524 566 } 525 567 } 526 568
+24 -36
fs/quota.c
··· 149 149 return error; 150 150 } 151 151 152 - static struct super_block *get_super_to_sync(int type) 153 - { 154 - struct list_head *head; 155 - int cnt, dirty; 156 - 157 - restart: 158 - spin_lock(&sb_lock); 159 - list_for_each(head, &super_blocks) { 160 - struct super_block *sb = list_entry(head, struct super_block, s_list); 161 - 162 - /* This test just improves performance so it needn't be reliable... */ 163 - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 164 - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 165 - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 166 - dirty = 1; 167 - if (!dirty) 168 - continue; 169 - sb->s_count++; 170 - spin_unlock(&sb_lock); 171 - down_read(&sb->s_umount); 172 - if (!sb->s_root) { 173 - drop_super(sb); 174 - goto restart; 175 - } 176 - return sb; 177 - } 178 - spin_unlock(&sb_lock); 179 - return NULL; 180 - } 181 - 182 152 static void quota_sync_sb(struct super_block *sb, int type) 183 153 { 184 154 int cnt; ··· 189 219 190 220 void sync_dquots(struct super_block *sb, int type) 191 221 { 222 + int cnt, dirty; 223 + 192 224 if (sb) { 193 225 if (sb->s_qcop->quota_sync) 194 226 quota_sync_sb(sb, type); 227 + return; 195 228 } 196 - else { 197 - while ((sb = get_super_to_sync(type)) != NULL) { 198 - if (sb->s_qcop->quota_sync) 199 - quota_sync_sb(sb, type); 200 - drop_super(sb); 201 - } 229 + 230 + spin_lock(&sb_lock); 231 + restart: 232 + list_for_each_entry(sb, &super_blocks, s_list) { 233 + /* This test just improves performance so it needn't be reliable... */ 234 + for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 235 + if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 236 + && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 237 + dirty = 1; 238 + if (!dirty) 239 + continue; 240 + sb->s_count++; 241 + spin_unlock(&sb_lock); 242 + down_read(&sb->s_umount); 243 + if (sb->s_root && sb->s_qcop->quota_sync) 244 + quota_sync_sb(sb, type); 245 + up_read(&sb->s_umount); 246 + spin_lock(&sb_lock); 247 + if (__put_super_and_need_restart(sb)) 248 + goto restart; 202 249 } 250 + spin_unlock(&sb_lock); 203 251 } 204 252 205 253 /* Copy parameters and call proper function */
+45 -38
fs/super.c
··· 341 341 */ 342 342 void sync_supers(void) 343 343 { 344 - struct super_block * sb; 345 - restart: 344 + struct super_block *sb; 345 + 346 346 spin_lock(&sb_lock); 347 - sb = sb_entry(super_blocks.next); 348 - while (sb != sb_entry(&super_blocks)) 347 + restart: 348 + list_for_each_entry(sb, &super_blocks, s_list) { 349 349 if (sb->s_dirt) { 350 350 sb->s_count++; 351 351 spin_unlock(&sb_lock); 352 352 down_read(&sb->s_umount); 353 353 write_super(sb); 354 - drop_super(sb); 355 - goto restart; 356 - } else 357 - sb = sb_entry(sb->s_list.next); 354 + up_read(&sb->s_umount); 355 + spin_lock(&sb_lock); 356 + if (__put_super_and_need_restart(sb)) 357 + goto restart; 358 + } 359 + } 358 360 spin_unlock(&sb_lock); 359 361 } 360 362 ··· 383 381 384 382 down(&mutex); /* Could be down_interruptible */ 385 383 spin_lock(&sb_lock); 386 - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); 387 - sb = sb_entry(sb->s_list.next)) { 384 + list_for_each_entry(sb, &super_blocks, s_list) { 388 385 if (!sb->s_op->sync_fs) 389 386 continue; 390 387 if (sb->s_flags & MS_RDONLY) 391 388 continue; 392 389 sb->s_need_sync_fs = 1; 393 390 } 394 - spin_unlock(&sb_lock); 395 391 396 392 restart: 397 - spin_lock(&sb_lock); 398 - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); 399 - sb = sb_entry(sb->s_list.next)) { 393 + list_for_each_entry(sb, &super_blocks, s_list) { 400 394 if (!sb->s_need_sync_fs) 401 395 continue; 402 396 sb->s_need_sync_fs = 0; ··· 403 405 down_read(&sb->s_umount); 404 406 if (sb->s_root && (wait || sb->s_dirt)) 405 407 sb->s_op->sync_fs(sb, wait); 406 - drop_super(sb); 407 - goto restart; 408 + up_read(&sb->s_umount); 409 + /* restart only when sb is no longer on the list */ 410 + spin_lock(&sb_lock); 411 + if (__put_super_and_need_restart(sb)) 412 + goto restart; 408 413 } 409 414 spin_unlock(&sb_lock); 410 415 up(&mutex); ··· 423 422 424 423 struct super_block * get_super(struct block_device *bdev) 425 424 { 426 - struct list_head *p; 425 + struct super_block *sb; 426 + 427 427 if (!bdev) 428 428 return NULL; 429 - rescan: 429 + 430 430 spin_lock(&sb_lock); 431 - list_for_each(p, &super_blocks) { 432 - struct super_block *s = sb_entry(p); 433 - if (s->s_bdev == bdev) { 434 - s->s_count++; 431 + rescan: 432 + list_for_each_entry(sb, &super_blocks, s_list) { 433 + if (sb->s_bdev == bdev) { 434 + sb->s_count++; 435 435 spin_unlock(&sb_lock); 436 - down_read(&s->s_umount); 437 - if (s->s_root) 438 - return s; 439 - drop_super(s); 440 - goto rescan; 436 + down_read(&sb->s_umount); 437 + if (sb->s_root) 438 + return sb; 439 + up_read(&sb->s_umount); 440 + /* restart only when sb is no longer on the list */ 441 + spin_lock(&sb_lock); 442 + if (__put_super_and_need_restart(sb)) 443 + goto rescan; 441 444 } 442 445 } 443 446 spin_unlock(&sb_lock); ··· 452 447 453 448 struct super_block * user_get_super(dev_t dev) 454 449 { 455 - struct list_head *p; 450 + struct super_block *sb; 456 451 457 - rescan: 458 452 spin_lock(&sb_lock); 459 - list_for_each(p, &super_blocks) { 460 - struct super_block *s = sb_entry(p); 461 - if (s->s_dev == dev) { 462 - s->s_count++; 453 + rescan: 454 + list_for_each_entry(sb, &super_blocks, s_list) { 455 + if (sb->s_dev == dev) { 456 + sb->s_count++; 463 457 spin_unlock(&sb_lock); 464 - down_read(&s->s_umount); 465 - if (s->s_root) 466 - return s; 467 - drop_super(s); 468 - goto rescan; 458 + down_read(&sb->s_umount); 459 + if (sb->s_root) 460 + return sb; 461 + up_read(&sb->s_umount); 462 + /* restart only when sb is no longer on the list */ 463 + spin_lock(&sb_lock); 464 + if (__put_super_and_need_restart(sb)) 465 + goto rescan; 469 466 } 470 467 } 471 468 spin_unlock(&sb_lock);