Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: use rcu to protect device->name

Al pointed out that we can just toss out the old name on a device and add a
new one arbitrarily, so anybody who uses device->name in printk could
possibly use free'd memory. Instead of adding locking around all of this he
suggested doing it with RCU, so I've introduced a struct rcu_string that
does just that and have gone through and protected all accesses to
device->name that aren't under the uuid_mutex with rcu_read_lock(). This
protects us and I will use it for dealing with removing the device that we
used to mount the file system in a later patch. Thanks,

Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <josef@redhat.com>

authored by

Josef Bacik and committed by
Chris Mason
606686ee 17ca04af

+162 -64
+9 -7
fs/btrfs/check-integrity.c
··· 93 93 #include "print-tree.h" 94 94 #include "locking.h" 95 95 #include "check-integrity.h" 96 + #include "rcu-string.h" 96 97 97 98 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98 99 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 ··· 844 843 superblock_tmp->never_written = 0; 845 844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 846 845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 847 - printk(KERN_INFO "New initial S-block (bdev %p, %s)" 848 - " @%llu (%s/%llu/%d)\n", 849 - superblock_bdev, device->name, 850 - (unsigned long long)dev_bytenr, 851 - dev_state->name, 852 - (unsigned long long)dev_bytenr, 853 - superblock_mirror_num); 846 + printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 847 + " @%llu (%s/%llu/%d)\n", 848 + superblock_bdev, 849 + rcu_str_deref(device->name), 850 + (unsigned long long)dev_bytenr, 851 + dev_state->name, 852 + (unsigned long long)dev_bytenr, 853 + superblock_mirror_num); 854 854 list_add(&superblock_tmp->all_blocks_node, 855 855 &state->all_blocks_list); 856 856 btrfsic_block_hashtable_add(superblock_tmp,
+6 -4
fs/btrfs/disk-io.c
··· 44 44 #include "free-space-cache.h" 45 45 #include "inode-map.h" 46 46 #include "check-integrity.h" 47 + #include "rcu-string.h" 47 48 48 49 static struct extent_io_ops btree_extent_io_ops; 49 50 static void end_workqueue_fn(struct btrfs_work *work); ··· 2576 2575 struct btrfs_device *device = (struct btrfs_device *) 2577 2576 bh->b_private; 2578 2577 2579 - printk_ratelimited(KERN_WARNING "lost page write due to " 2580 - "I/O error on %s\n", device->name); 2578 + printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " 2579 + "I/O error on %s\n", 2580 + rcu_str_deref(device->name)); 2581 2581 /* note, we dont' set_buffer_write_io_error because we have 2582 2582 * our own ways of dealing with the IO errors 2583 2583 */ ··· 2751 2749 wait_for_completion(&device->flush_wait); 2752 2750 2753 2751 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 2754 - printk("btrfs: disabling barriers on dev %s\n", 2755 - device->name); 2752 + printk_in_rcu("btrfs: disabling barriers on dev %s\n", 2753 + rcu_str_deref(device->name)); 2756 2754 device->nobarriers = 1; 2757 2755 } 2758 2756 if (!bio_flagged(bio, BIO_UPTODATE)) {
+4 -3
fs/btrfs/extent_io.c
··· 20 20 #include "volumes.h" 21 21 #include "check-integrity.h" 22 22 #include "locking.h" 23 + #include "rcu-string.h" 23 24 24 25 static struct kmem_cache *extent_state_cache; 25 26 static struct kmem_cache *extent_buffer_cache; ··· 1918 1917 return -EIO; 1919 1918 } 1920 1919 1921 - printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " 1922 - "sector %llu)\n", page->mapping->host->i_ino, start, 1923 - dev->name, sector); 1920 + printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " 1921 + "(dev %s sector %llu)\n", page->mapping->host->i_ino, 1922 + start, rcu_str_deref(dev->name), sector); 1924 1923 1925 1924 bio_put(bio); 1926 1925 return 0;
+10 -3
fs/btrfs/ioctl.c
··· 52 52 #include "locking.h" 53 53 #include "inode-map.h" 54 54 #include "backref.h" 55 + #include "rcu-string.h" 55 56 56 57 /* Mask out flags that are inappropriate for the given type of inode. */ 57 58 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) ··· 1346 1345 do_div(new_size, root->sectorsize); 1347 1346 new_size *= root->sectorsize; 1348 1347 1349 - printk(KERN_INFO "btrfs: new size for %s is %llu\n", 1350 - device->name, (unsigned long long)new_size); 1348 + printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1349 + rcu_str_deref(device->name), 1350 + (unsigned long long)new_size); 1351 1351 1352 1352 if (new_size > old_size) { 1353 1353 trans = btrfs_start_transaction(root, 0); ··· 2266 2264 di_args->total_bytes = dev->total_bytes; 2267 2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2268 2266 if (dev->name) { 2269 - strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2267 + struct rcu_string *name; 2268 + 2269 + rcu_read_lock(); 2270 + name = rcu_dereference(dev->name); 2271 + strncpy(di_args->path, name->str, sizeof(di_args->path)); 2272 + rcu_read_unlock(); 2270 2273 di_args->path[sizeof(di_args->path) - 1] = 0; 2271 2274 } else { 2272 2275 di_args->path[0] = '\0';
+56
fs/btrfs/rcu-string.h
··· 1 + /* 2 + * Copyright (C) 2012 Red Hat. All rights reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public 6 + * License v2 as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public 14 + * License along with this program; if not, write to the 15 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 + * Boston, MA 021110-1307, USA. 17 + */ 18 + 19 + struct rcu_string { 20 + struct rcu_head rcu; 21 + char str[0]; 22 + }; 23 + 24 + static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) 25 + { 26 + size_t len = strlen(src) + 1; 27 + struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + 28 + (len * sizeof(char)), mask); 29 + if (!ret) 30 + return ret; 31 + strncpy(ret->str, src, len); 32 + return ret; 33 + } 34 + 35 + static inline void rcu_string_free(struct rcu_string *str) 36 + { 37 + if (str) 38 + kfree_rcu(str, rcu); 39 + } 40 + 41 + #define printk_in_rcu(fmt, ...) do { \ 42 + rcu_read_lock(); \ 43 + printk(fmt, __VA_ARGS__); \ 44 + rcu_read_unlock(); \ 45 + } while (0) 46 + 47 + #define printk_ratelimited_in_rcu(fmt, ...) do { \ 48 + rcu_read_lock(); \ 49 + printk_ratelimited(fmt, __VA_ARGS__); \ 50 + rcu_read_unlock(); \ 51 + } while (0) 52 + 53 + #define rcu_str_deref(rcu_str) ({ \ 54 + struct rcu_string *__str = rcu_dereference(rcu_str); \ 55 + __str->str; \ 56 + })
+18 -12
fs/btrfs/scrub.c
··· 26 26 #include "backref.h" 27 27 #include "extent_io.h" 28 28 #include "check-integrity.h" 29 + #include "rcu-string.h" 29 30 30 31 /* 31 32 * This is only the first step towards a full-features scrub. It reads all ··· 321 320 * hold all of the paths here 322 321 */ 323 322 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 324 - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 323 + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 325 324 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 326 325 "length %llu, links %u (path: %s)\n", swarn->errstr, 327 - swarn->logical, swarn->dev->name, 326 + swarn->logical, rcu_str_deref(swarn->dev->name), 328 327 (unsigned long long)swarn->sector, root, inum, offset, 329 328 min(isize - offset, (u64)PAGE_SIZE), nlink, 330 329 (char *)(unsigned long)ipath->fspath->val[i]); ··· 333 332 return 0; 334 333 335 334 err: 336 - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 335 + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 337 336 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 338 337 "resolving failed with ret=%d\n", swarn->errstr, 339 - swarn->logical, swarn->dev->name, 338 + swarn->logical, rcu_str_deref(swarn->dev->name), 340 339 (unsigned long long)swarn->sector, root, inum, offset, ret); 341 340 342 341 free_ipath(ipath); ··· 391 390 do { 392 391 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 393 392 &ref_root, &ref_level); 394 - printk(KERN_WARNING 393 + printk_in_rcu(KERN_WARNING 395 394 "btrfs: %s at logical %llu on dev %s, " 396 395 "sector %llu: metadata %s (level %d) in tree " 397 - "%llu\n", errstr, swarn.logical, dev->name, 396 + "%llu\n", errstr, swarn.logical, 397 + rcu_str_deref(dev->name), 398 398 (unsigned long long)swarn.sector, 399 399 ref_level ? "node" : "leaf", 400 400 ret < 0 ? -1 : ref_level, ··· 582 580 spin_lock(&sdev->stat_lock); 583 581 ++sdev->stat.uncorrectable_errors; 584 582 spin_unlock(&sdev->stat_lock); 585 - printk_ratelimited(KERN_ERR 583 + 584 + printk_ratelimited_in_rcu(KERN_ERR 586 585 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 587 - (unsigned long long)fixup->logical, sdev->dev->name); 586 + (unsigned long long)fixup->logical, 587 + rcu_str_deref(sdev->dev->name)); 588 588 } 589 589 590 590 btrfs_free_path(path); ··· 940 936 spin_lock(&sdev->stat_lock); 941 937 sdev->stat.corrected_errors++; 942 938 spin_unlock(&sdev->stat_lock); 943 - printk_ratelimited(KERN_ERR 939 + printk_ratelimited_in_rcu(KERN_ERR 944 940 "btrfs: fixed up error at logical %llu on dev %s\n", 945 - (unsigned long long)logical, sdev->dev->name); 941 + (unsigned long long)logical, 942 + rcu_str_deref(sdev->dev->name)); 946 943 } 947 944 } else { 948 945 did_not_correct_error: 949 946 spin_lock(&sdev->stat_lock); 950 947 sdev->stat.uncorrectable_errors++; 951 948 spin_unlock(&sdev->stat_lock); 952 - printk_ratelimited(KERN_ERR 949 + printk_ratelimited_in_rcu(KERN_ERR 953 950 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 954 - (unsigned long long)logical, sdev->dev->name); 951 + (unsigned long long)logical, 952 + rcu_str_deref(sdev->dev->name)); 955 953 } 956 954 957 955 out:
+58 -34
fs/btrfs/volumes.c
··· 35 35 #include "volumes.h" 36 36 #include "async-thread.h" 37 37 #include "check-integrity.h" 38 + #include "rcu-string.h" 38 39 39 40 static int init_first_rw_device(struct btrfs_trans_handle *trans, 40 41 struct btrfs_root *root, ··· 65 64 device = list_entry(fs_devices->devices.next, 66 65 struct btrfs_device, dev_list); 67 66 list_del(&device->dev_list); 68 - kfree(device->name); 67 + rcu_string_free(device->name); 69 68 kfree(device); 70 69 } 71 70 kfree(fs_devices); ··· 335 334 { 336 335 struct btrfs_device *device; 337 336 struct btrfs_fs_devices *fs_devices; 337 + struct rcu_string *name; 338 338 u64 found_transid = btrfs_super_generation(disk_super); 339 - char *name; 340 339 341 340 fs_devices = find_fsid(disk_super->fsid); 342 341 if (!fs_devices) { ··· 370 369 memcpy(device->uuid, disk_super->dev_item.uuid, 371 370 BTRFS_UUID_SIZE); 372 371 spin_lock_init(&device->io_lock); 373 - device->name = kstrdup(path, GFP_NOFS); 374 - if (!device->name) { 372 + 373 + name = rcu_string_strdup(path, GFP_NOFS); 374 + if (!name) { 375 375 kfree(device); 376 376 return -ENOMEM; 377 377 } 378 + rcu_assign_pointer(device->name, name); 378 379 INIT_LIST_HEAD(&device->dev_alloc_list); 379 380 380 381 /* init readahead state */ ··· 393 390 394 391 device->fs_devices = fs_devices; 395 392 fs_devices->num_devices++; 396 - } else if (!device->name || strcmp(device->name, path)) { 397 - name = kstrdup(path, GFP_NOFS); 393 + } else if (!device->name || strcmp(device->name->str, path)) { 394 + name = rcu_string_strdup(path, GFP_NOFS); 398 395 if (!name) 399 396 return -ENOMEM; 400 - kfree(device->name); 401 - device->name = name; 397 + rcu_string_free(device->name); 398 + rcu_assign_pointer(device->name, name); 402 399 if (device->missing) { 403 400 fs_devices->missing_devices--; 404 401 device->missing = 0; ··· 433 430 434 431 /* We have held the volume lock, it is safe to get the devices. */ 435 432 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 433 + struct rcu_string *name; 434 + 436 435 device = kzalloc(sizeof(*device), GFP_NOFS); 437 436 if (!device) 438 437 goto error; 439 438 440 - device->name = kstrdup(orig_dev->name, GFP_NOFS); 441 - if (!device->name) { 439 + /* 440 + * This is ok to do without rcu read locked because we hold the 441 + * uuid mutex so nothing we touch in here is going to disappear. 442 + */ 443 + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); 444 + if (!name) { 442 445 kfree(device); 443 446 goto error; 444 447 } 448 + rcu_assign_pointer(device->name, name); 445 449 446 450 device->devid = orig_dev->devid; 447 451 device->work.func = pending_bios_fn; ··· 501 491 } 502 492 list_del_init(&device->dev_list); 503 493 fs_devices->num_devices--; 504 - kfree(device->name); 494 + rcu_string_free(device->name); 505 495 kfree(device); 506 496 } 507 497 ··· 526 516 if (device->bdev) 527 517 blkdev_put(device->bdev, device->mode); 528 518 529 - kfree(device->name); 519 + rcu_string_free(device->name); 530 520 kfree(device); 531 521 } 532 522 ··· 550 540 mutex_lock(&fs_devices->device_list_mutex); 551 541 list_for_each_entry(device, &fs_devices->devices, dev_list) { 552 542 struct btrfs_device *new_device; 543 + struct rcu_string *name; 553 544 554 545 if (device->bdev) 555 546 fs_devices->open_devices--; ··· 566 555 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 567 556 BUG_ON(!new_device); /* -ENOMEM */ 568 557 memcpy(new_device, device, sizeof(*new_device)); 569 - new_device->name = kstrdup(device->name, GFP_NOFS); 570 - BUG_ON(device->name && !new_device->name); /* -ENOMEM */ 558 + 559 + /* Safe because we are under uuid_mutex */ 560 + name = rcu_string_strdup(device->name->str, GFP_NOFS); 561 + BUG_ON(device->name && !name); /* -ENOMEM */ 562 + rcu_assign_pointer(new_device->name, name); 571 563 new_device->bdev = NULL; 572 564 new_device->writeable = 0; 573 565 new_device->in_fs_metadata = 0; ··· 635 621 if (!device->name) 636 622 continue; 637 623 638 - bdev = blkdev_get_by_path(device->name, flags, holder); 624 + bdev = blkdev_get_by_path(device->name->str, flags, holder); 639 625 if (IS_ERR(bdev)) { 640 - printk(KERN_INFO "open %s failed\n", device->name); 626 + printk(KERN_INFO "open %s failed\n", device->name->str); 641 627 goto error; 642 628 } 643 629 filemap_write_and_wait(bdev->bd_inode->i_mapping); ··· 1646 1632 struct block_device *bdev; 1647 1633 struct list_head *devices; 1648 1634 struct super_block *sb = root->fs_info->sb; 1635 + struct rcu_string *name; 1649 1636 u64 total_bytes; 1650 1637 int seeding_dev = 0; 1651 1638 int ret = 0; ··· 1686 1671 goto error; 1687 1672 } 1688 1673 1689 - device->name = kstrdup(device_path, GFP_NOFS); 1690 - if (!device->name) { 1674 + name = rcu_string_strdup(device_path, GFP_NOFS); 1675 + if (!name) { 1691 1676 kfree(device); 1692 1677 ret = -ENOMEM; 1693 1678 goto error; 1694 1679 } 1680 + rcu_assign_pointer(device->name, name); 1695 1681 1696 1682 ret = find_next_devid(root, &device->devid); 1697 1683 if (ret) { 1698 - kfree(device->name); 1684 + rcu_string_free(device->name); 1699 1685 kfree(device); 1700 1686 goto error; 1701 1687 } 1702 1688 1703 1689 trans = btrfs_start_transaction(root, 0); 1704 1690 if (IS_ERR(trans)) { 1705 - kfree(device->name); 1691 + rcu_string_free(device->name); 1706 1692 kfree(device); 1707 1693 ret = PTR_ERR(trans); 1708 1694 goto error; ··· 1812 1796 unlock_chunks(root); 1813 1797 btrfs_abort_transaction(trans, root, ret); 1814 1798 btrfs_end_transaction(trans, root); 1815 - kfree(device->name); 1799 + rcu_string_free(device->name); 1816 1800 kfree(device); 1817 1801 error: 1818 1802 blkdev_put(bdev, FMODE_EXCL); ··· 4220 4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4221 4205 dev = bbio->stripes[dev_nr].dev; 4222 4206 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 4207 + #ifdef DEBUG 4208 + struct rcu_string *name; 4209 + 4210 + rcu_read_lock(); 4211 + name = rcu_dereference(dev->name); 4223 4212 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " 4224 4213 "(%s id %llu), size=%u\n", rw, 4225 4214 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, 4226 - dev->name, dev->devid, bio->bi_size); 4215 + name->str, dev->devid, bio->bi_size); 4216 + rcu_read_unlock(); 4217 + #endif 4227 4218 bio->bi_bdev = dev->bdev; 4228 4219 if (async_submit) 4229 4220 schedule_bio(root, dev, rw, bio); ··· 4717 4694 key.offset = device->devid; 4718 4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 4719 4696 if (ret) { 4720 - printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4721 - device->name, (unsigned long long)device->devid); 4697 + printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4698 + rcu_str_deref(device->name), 4699 + (unsigned long long)device->devid); 4722 4700 __btrfs_reset_dev_stats(device); 4723 4701 device->dev_stats_valid = 1; 4724 4702 btrfs_release_path(path); ··· 4771 4747 BUG_ON(!path); 4772 4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 4773 4749 if (ret < 0) { 4774 - printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4775 - ret, device->name); 4750 + printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4751 + ret, rcu_str_deref(device->name)); 4776 4752 goto out; 4777 4753 } 4778 4754 ··· 4781 4757 /* need to delete old one and insert a new one */ 4782 4758 ret = btrfs_del_item(trans, dev_root, path); 4783 4759 if (ret != 0) { 4784 - printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4785 - device->name, ret); 4760 + printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4761 + rcu_str_deref(device->name), ret); 4786 4762 goto out; 4787 4763 } 4788 4764 ret = 1; ··· 4794 4770 ret = btrfs_insert_empty_item(trans, dev_root, path, 4795 4771 &key, sizeof(*ptr)); 4796 4772 if (ret < 0) { 4797 - printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4798 - device->name, ret); 4773 + printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4774 + rcu_str_deref(device->name), ret); 4799 4775 goto out; 4800 4776 } 4801 4777 } ··· 4847 4823 { 4848 4824 if (!dev->dev_stats_valid) 4849 4825 return; 4850 - printk_ratelimited(KERN_ERR 4826 + printk_ratelimited_in_rcu(KERN_ERR 4851 4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4852 - dev->name, 4828 + rcu_str_deref(dev->name), 4853 4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4854 4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4855 4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), ··· 4861 4837 4862 4838 static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 4863 4839 { 4864 - printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4865 - dev->name, 4840 + printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4841 + rcu_str_deref(dev->name), 4866 4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4867 4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4868 4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
+1 -1
fs/btrfs/volumes.h
··· 58 58 /* the mode sent to blkdev_get */ 59 59 fmode_t mode; 60 60 61 - char *name; 61 + struct rcu_string *name; 62 62 63 63 /* the internal btrfs device id */ 64 64 u64 devid;