Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull more btrfs updates from Chris Mason:
"This has a few fixes since our last pull and a new ioctl for doing
btree searches from userland. It's very similar to the existing
ioctl, but lets us return larger items back down to the app"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
btrfs: fix error handling in create_pending_snapshot
btrfs: fix use of uninit "ret" in end_extent_writepage()
btrfs: free ulist in qgroup_shared_accounting() error path
Btrfs: fix qgroups sanity test crash or hang
btrfs: prevent RCU warning when dereferencing radix tree slot
Btrfs: fix unfinished readahead thread for raid5/6 degraded mounting
btrfs: new ioctl TREE_SEARCH_V2
btrfs: tree_search, search_ioctl: direct copy to userspace
btrfs: new function read_extent_buffer_to_user
btrfs: tree_search, copy_to_sk: return needed size on EOVERFLOW
btrfs: tree_search, copy_to_sk: return EOVERFLOW for too small buffer
btrfs: tree_search, search_ioctl: accept varying buffer
btrfs: tree_search: eliminate redundant nr_items check

Changed files
+193 -37
fs
include
uapi
linux
+38 -1
fs/btrfs/extent_io.c
··· 2354 2354 { 2355 2355 int uptodate = (err == 0); 2356 2356 struct extent_io_tree *tree; 2357 - int ret; 2357 + int ret = 0; 2358 2358 2359 2359 tree = &BTRFS_I(page->mapping->host)->io_tree; 2360 2360 ··· 5066 5066 offset = 0; 5067 5067 i++; 5068 5068 } 5069 + } 5070 + 5071 + int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, 5072 + unsigned long start, 5073 + unsigned long len) 5074 + { 5075 + size_t cur; 5076 + size_t offset; 5077 + struct page *page; 5078 + char *kaddr; 5079 + char __user *dst = (char __user *)dstv; 5080 + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 5081 + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 5082 + int ret = 0; 5083 + 5084 + WARN_ON(start > eb->len); 5085 + WARN_ON(start + len > eb->start + eb->len); 5086 + 5087 + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); 5088 + 5089 + while (len > 0) { 5090 + page = extent_buffer_page(eb, i); 5091 + 5092 + cur = min(len, (PAGE_CACHE_SIZE - offset)); 5093 + kaddr = page_address(page); 5094 + if (copy_to_user(dst, kaddr + offset, cur)) { 5095 + ret = -EFAULT; 5096 + break; 5097 + } 5098 + 5099 + dst += cur; 5100 + len -= cur; 5101 + offset = 0; 5102 + i++; 5103 + } 5104 + 5105 + return ret; 5069 5106 } 5070 5107 5071 5108 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
+3
fs/btrfs/extent_io.h
··· 304 304 void read_extent_buffer(struct extent_buffer *eb, void *dst, 305 305 unsigned long start, 306 306 unsigned long len); 307 + int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, 308 + unsigned long start, 309 + unsigned long len); 307 310 void write_extent_buffer(struct extent_buffer *eb, const void *src, 308 311 unsigned long start, unsigned long len); 309 312 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+122 -27
fs/btrfs/ioctl.c
··· 1957 1957 struct btrfs_path *path, 1958 1958 struct btrfs_key *key, 1959 1959 struct btrfs_ioctl_search_key *sk, 1960 - char *buf, 1960 + size_t *buf_size, 1961 + char __user *ubuf, 1961 1962 unsigned long *sk_offset, 1962 1963 int *num_found) 1963 1964 { ··· 1990 1989 if (!key_in_sk(key, sk)) 1991 1990 continue; 1992 1991 1993 - if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1994 - item_len = 0; 1992 + if (sizeof(sh) + item_len > *buf_size) { 1993 + if (*num_found) { 1994 + ret = 1; 1995 + goto out; 1996 + } 1995 1997 1996 - if (sizeof(sh) + item_len + *sk_offset > 1997 - BTRFS_SEARCH_ARGS_BUFSIZE) { 1998 + /* 1999 + * return one empty item back for v1, which does not 2000 + * handle -EOVERFLOW 2001 + */ 2002 + 2003 + *buf_size = sizeof(sh) + item_len; 2004 + item_len = 0; 2005 + ret = -EOVERFLOW; 2006 + } 2007 + 2008 + if (sizeof(sh) + item_len + *sk_offset > *buf_size) { 1998 2009 ret = 1; 1999 - goto overflow; 2010 + goto out; 2000 2011 } 2001 2012 2002 2013 sh.objectid = key->objectid; ··· 2018 2005 sh.transid = found_transid; 2019 2006 2020 2007 /* copy search result header */ 2021 - memcpy(buf + *sk_offset, &sh, sizeof(sh)); 2008 + if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { 2009 + ret = -EFAULT; 2010 + goto out; 2011 + } 2012 + 2022 2013 *sk_offset += sizeof(sh); 2023 2014 2024 2015 if (item_len) { 2025 - char *p = buf + *sk_offset; 2016 + char __user *up = ubuf + *sk_offset; 2026 2017 /* copy the item */ 2027 - read_extent_buffer(leaf, p, 2028 - item_off, item_len); 2018 + if (read_extent_buffer_to_user(leaf, up, 2019 + item_off, item_len)) { 2020 + ret = -EFAULT; 2021 + goto out; 2022 + } 2023 + 2029 2024 *sk_offset += item_len; 2030 2025 } 2031 2026 (*num_found)++; 2032 2027 2033 - if (*num_found >= sk->nr_items) 2034 - break; 2028 + if (ret) /* -EOVERFLOW from above */ 2029 + goto out; 2030 + 2031 + if (*num_found >= sk->nr_items) { 2032 + ret = 1; 2033 + goto out; 2034 + } 2035 2035 } 2036 2036 advance_key: 2037 2037 ret = 0; ··· 2059 2033 key->objectid++; 2060 2034 } else 2061 2035 ret = 1; 2062 - overflow: 2036 + out: 2037 + /* 2038 + * 0: all items from this leaf copied, continue with next 2039 + * 1: * more items can be copied, but unused buffer is too small 2040 + * * all items were found 2041 + * Either way, it will stops the loop which iterates to the next 2042 + * leaf 2043 + * -EOVERFLOW: item was to large for buffer 2044 + * -EFAULT: could not copy extent buffer back to userspace 2045 + */ 2063 2046 return ret; 2064 2047 } 2065 2048 2066 2049 static noinline int search_ioctl(struct inode *inode, 2067 - struct btrfs_ioctl_search_args *args) 2050 + struct btrfs_ioctl_search_key *sk, 2051 + size_t *buf_size, 2052 + char __user *ubuf) 2068 2053 { 2069 2054 struct btrfs_root *root; 2070 2055 struct btrfs_key key; 2071 2056 struct btrfs_path *path; 2072 - struct btrfs_ioctl_search_key *sk = &args->key; 2073 2057 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 2074 2058 int ret; 2075 2059 int num_found = 0; 2076 2060 unsigned long sk_offset = 0; 2061 + 2062 + if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { 2063 + *buf_size = sizeof(struct btrfs_ioctl_search_header); 2064 + return -EOVERFLOW; 2065 + } 2077 2066 2078 2067 path = btrfs_alloc_path(); 2079 2068 if (!path) ··· 2123 2082 ret = 0; 2124 2083 goto err; 2125 2084 } 2126 - ret = copy_to_sk(root, path, &key, sk, args->buf, 2085 + ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, 2127 2086 &sk_offset, &num_found); 2128 2087 btrfs_release_path(path); 2129 - if (ret || num_found >= sk->nr_items) 2088 + if (ret) 2130 2089 break; 2131 2090 2132 2091 } 2133 - ret = 0; 2092 + if (ret > 0) 2093 + ret = 0; 2134 2094 err: 2135 2095 sk->nr_items = num_found; 2136 2096 btrfs_free_path(path); ··· 2141 2099 static noinline int btrfs_ioctl_tree_search(struct file *file, 2142 2100 void __user *argp) 2143 2101 { 2144 - struct btrfs_ioctl_search_args *args; 2145 - struct inode *inode; 2146 - int ret; 2102 + struct btrfs_ioctl_search_args __user *uargs; 2103 + struct btrfs_ioctl_search_key sk; 2104 + struct inode *inode; 2105 + int ret; 2106 + size_t buf_size; 2147 2107 2148 2108 if (!capable(CAP_SYS_ADMIN)) 2149 2109 return -EPERM; 2150 2110 2151 - args = memdup_user(argp, sizeof(*args)); 2152 - if (IS_ERR(args)) 2153 - return PTR_ERR(args); 2111 + uargs = (struct btrfs_ioctl_search_args __user *)argp; 2112 + 2113 + if (copy_from_user(&sk, &uargs->key, sizeof(sk))) 2114 + return -EFAULT; 2115 + 2116 + buf_size = sizeof(uargs->buf); 2154 2117 2155 2118 inode = file_inode(file); 2156 - ret = search_ioctl(inode, args); 2157 - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2119 + ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); 2120 + 2121 + /* 2122 + * In the origin implementation an overflow is handled by returning a 2123 + * search header with a len of zero, so reset ret. 2124 + */ 2125 + if (ret == -EOVERFLOW) 2126 + ret = 0; 2127 + 2128 + if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) 2158 2129 ret = -EFAULT; 2159 - kfree(args); 2130 + return ret; 2131 + } 2132 + 2133 + static noinline int btrfs_ioctl_tree_search_v2(struct file *file, 2134 + void __user *argp) 2135 + { 2136 + struct btrfs_ioctl_search_args_v2 __user *uarg; 2137 + struct btrfs_ioctl_search_args_v2 args; 2138 + struct inode *inode; 2139 + int ret; 2140 + size_t buf_size; 2141 + const size_t buf_limit = 16 * 1024 * 1024; 2142 + 2143 + if (!capable(CAP_SYS_ADMIN)) 2144 + return -EPERM; 2145 + 2146 + /* copy search header and buffer size */ 2147 + uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; 2148 + if (copy_from_user(&args, uarg, sizeof(args))) 2149 + return -EFAULT; 2150 + 2151 + buf_size = args.buf_size; 2152 + 2153 + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) 2154 + return -EOVERFLOW; 2155 + 2156 + /* limit result size to 16MB */ 2157 + if (buf_size > buf_limit) 2158 + buf_size = buf_limit; 2159 + 2160 + inode = file_inode(file); 2161 + ret = search_ioctl(inode, &args.key, &buf_size, 2162 + (char *)(&uarg->buf[0])); 2163 + if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) 2164 + ret = -EFAULT; 2165 + else if (ret == -EOVERFLOW && 2166 + copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) 2167 + ret = -EFAULT; 2168 + 2160 2169 return ret; 2161 2170 } 2162 2171 ··· 5291 5198 return btrfs_ioctl_trans_end(file); 5292 5199 case BTRFS_IOC_TREE_SEARCH: 5293 5200 return btrfs_ioctl_tree_search(file, argp); 5201 + case BTRFS_IOC_TREE_SEARCH_V2: 5202 + return btrfs_ioctl_tree_search_v2(file, argp); 5294 5203 case BTRFS_IOC_INO_LOOKUP: 5295 5204 return btrfs_ioctl_ino_lookup(file, argp); 5296 5205 case BTRFS_IOC_INO_PATHS:
+3 -1
fs/btrfs/qgroup.c
··· 1798 1798 return -ENOMEM; 1799 1799 1800 1800 tmp = ulist_alloc(GFP_NOFS); 1801 - if (!tmp) 1801 + if (!tmp) { 1802 + ulist_free(qgroups); 1802 1803 return -ENOMEM; 1804 + } 1803 1805 1804 1806 btrfs_get_tree_mod_seq(fs_info, &elem); 1805 1807 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
+7 -2
fs/btrfs/reada.c
··· 428 428 continue; 429 429 } 430 430 if (!dev->bdev) { 431 - /* cannot read ahead on missing device */ 432 - continue; 431 + /* 432 + * cannot read ahead on missing device, but for RAID5/6, 433 + * REQ_GET_READ_MIRRORS return 1. So don't skip missing 434 + * device for such case. 435 + */ 436 + if (nzones > 1) 437 + continue; 433 438 } 434 439 if (dev_replace_is_ongoing && 435 440 dev == fs_info->dev_replace.tgtdev) {
+1 -1
fs/btrfs/tests/btrfs-tests.c
··· 135 135 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { 136 136 struct extent_buffer *eb; 137 137 138 - eb = radix_tree_deref_slot(slot); 138 + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); 139 139 if (!eb) 140 140 continue; 141 141 /* Shouldn't happen but that kind of thinking creates CVE's */
+2
fs/btrfs/tests/qgroup-tests.c
··· 415 415 ret = -ENOMEM; 416 416 goto out; 417 417 } 418 + btrfs_set_header_level(root->node, 0); 419 + btrfs_set_header_nritems(root->node, 0); 418 420 root->alloc_bytenr += 8192; 419 421 420 422 tmp_root = btrfs_alloc_dummy_root();
+7 -5
fs/btrfs/transaction.c
··· 1284 1284 goto fail; 1285 1285 } 1286 1286 1287 - pending->error = btrfs_qgroup_inherit(trans, fs_info, 1288 - root->root_key.objectid, 1289 - objectid, pending->inherit); 1290 - if (pending->error) 1291 - goto no_free_objectid; 1287 + ret = btrfs_qgroup_inherit(trans, fs_info, 1288 + root->root_key.objectid, 1289 + objectid, pending->inherit); 1290 + if (ret) { 1291 + btrfs_abort_transaction(trans, root, ret); 1292 + goto fail; 1293 + } 1292 1294 1293 1295 /* see comments in should_cow_block() */ 1294 1296 set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+10
include/uapi/linux/btrfs.h
··· 306 306 char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; 307 307 }; 308 308 309 + struct btrfs_ioctl_search_args_v2 { 310 + struct btrfs_ioctl_search_key key; /* in/out - search parameters */ 311 + __u64 buf_size; /* in - size of buffer 312 + * out - on EOVERFLOW: needed size 313 + * to store item */ 314 + __u64 buf[0]; /* out - found items */ 315 + }; 316 + 309 317 struct btrfs_ioctl_clone_range_args { 310 318 __s64 src_fd; 311 319 __u64 src_offset, src_length; ··· 566 558 struct btrfs_ioctl_defrag_range_args) 567 559 #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ 568 560 struct btrfs_ioctl_search_args) 561 + #define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \ 562 + struct btrfs_ioctl_search_args_v2) 569 563 #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ 570 564 struct btrfs_ioctl_ino_lookup_args) 571 565 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)