Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

orangefs: copy Orangefs-sized blocks into the pagecache if possible.

->readpage looks in file->private_data to try and find out how the
userspace program set "count" in read(2) or with "dd bs=" or whatever.

->readpage uses "count" and inode->i_size to calculate how much
data Orangefs should deposit in the Orangefs shared buffer, and
remembers which slot the data is in.

After copying data from the Orangefs shared buffer slot into
"the page", readpage tries to increment through the pagecache index
and fill as many pages as it can from the extra data in the shared
buffer. Hopefully these extra pages will soon be needed by the vfs,
and they'll be in the pagecache already.

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
Signed-off-by: Martin Brandenburg <martin@omnibond.com>

+156 -15
+32 -5
fs/orangefs/file.c
··· 54 54 struct orangefs_kernel_op_s *new_op = NULL; 55 55 int buffer_index = -1; 56 56 ssize_t ret; 57 + size_t copy_amount; 57 58 58 59 new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); 59 60 if (!new_op) ··· 213 212 * can futher be kernel-space or user-space addresses. 214 213 * or it can pointers to struct page's 215 214 */ 215 + 216 + /* 217 + * When reading, readahead_size will only be zero when 218 + * we're doing O_DIRECT, otherwise we got here from 219 + * orangefs_readpage. 220 + * 221 + * If we got here from orangefs_readpage we want to 222 + * copy either a page or the whole file into the io 223 + * vector, whichever is smaller. 224 + */ 225 + if (readahead_size) 226 + copy_amount = 227 + min(new_op->downcall.resp.io.amt_complete, 228 + (__s64)PAGE_SIZE); 229 + else 230 + copy_amount = new_op->downcall.resp.io.amt_complete; 231 + 216 232 ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, 217 - new_op->downcall.resp.io.amt_complete); 233 + copy_amount); 218 234 if (ret < 0) { 219 235 gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", 220 236 __func__, (long)ret); ··· 249 231 250 232 out: 251 233 if (buffer_index >= 0) { 252 - orangefs_bufmap_put(buffer_index); 253 - gossip_debug(GOSSIP_FILE_DEBUG, 254 - "%s(%pU): PUT buffer_index %d\n", 255 - __func__, handle, buffer_index); 234 + if ((readahead_size) && (type == ORANGEFS_IO_READ)) { 235 + /* readpage */ 236 + *index_return = buffer_index; 237 + gossip_debug(GOSSIP_FILE_DEBUG, 238 + "%s: hold on to buffer_index :%d:\n", 239 + __func__, buffer_index); 240 + } else { 241 + /* O_DIRECT */ 242 + orangefs_bufmap_put(buffer_index); 243 + gossip_debug(GOSSIP_FILE_DEBUG, 244 + "%s(%pU): PUT buffer_index %d\n", 245 + __func__, handle, buffer_index); 246 + } 256 247 buffer_index = -1; 257 248 } 258 249 op_release(new_op);
+107 -8
fs/orangefs/inode.c
··· 247 247 return ret; 248 248 } 249 249 250 + static int orangefs_launder_page(struct page *); 251 + 250 252 static int orangefs_readpage(struct file *file, struct page *page) 251 253 { 252 254 struct inode *inode = page->mapping->host; 253 255 struct iov_iter iter; 254 256 struct bio_vec bv; 255 257 ssize_t ret; 256 - loff_t off; 258 + loff_t off; /* offset into this page */ 259 + pgoff_t index; /* which page */ 260 + struct page *next_page; 261 + char *kaddr; 262 + struct orangefs_read_options *ro = file->private_data; 263 + loff_t read_size; 264 + loff_t roundedup; 265 + int buffer_index = -1; /* orangefs shared memory slot */ 266 + int slot_index; /* index into slot */ 267 + int remaining; 268 + 269 + /* 270 + * If they set some miniscule size for "count" in read(2) 271 + * (for example) then let's try to read a page, or the whole file 272 + * if it is smaller than a page. Once "count" goes over a page 273 + * then lets round up to the highest page size multiple that is 274 + * less than or equal to "count" and do that much orangefs IO and 275 + * try to fill as many pages as we can from it. 276 + * 277 + * "count" should be represented in ro->blksiz. 278 + * 279 + * inode->i_size = file size. 280 + */ 281 + if (ro) { 282 + if (ro->blksiz < PAGE_SIZE) { 283 + if (inode->i_size < PAGE_SIZE) 284 + read_size = inode->i_size; 285 + else 286 + read_size = PAGE_SIZE; 287 + } else { 288 + roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ? 289 + ((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) : 290 + ro->blksiz; 291 + if (roundedup > inode->i_size) 292 + read_size = inode->i_size; 293 + else 294 + read_size = roundedup; 295 + 296 + } 297 + } else { 298 + read_size = PAGE_SIZE; 299 + } 300 + if (!read_size) 301 + read_size = PAGE_SIZE; 302 + 303 + if (PageDirty(page)) 304 + orangefs_launder_page(page); 257 305 258 306 off = page_offset(page); 307 + index = off >> PAGE_SHIFT; 259 308 bv.bv_page = page; 260 309 bv.bv_len = PAGE_SIZE; 261 310 bv.bv_offset = 0; 262 311 iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE); 263 312 264 - if (PageDirty(page)) 265 - orangefs_launder_page(page); 266 - 267 313 ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, 268 - PAGE_SIZE, inode->i_size, NULL, NULL); 314 + read_size, inode->i_size, NULL, &buffer_index); 315 + remaining = ret; 269 316 /* this will only zero remaining unread portions of the page data */ 270 317 iov_iter_zero(~0U, &iter); 271 318 /* takes care of potential aliasing */ 272 319 flush_dcache_page(page); 273 320 if (ret < 0) { 274 321 SetPageError(page); 322 + unlock_page(page); 323 + goto out; 275 324 } else { 276 325 SetPageUptodate(page); 277 326 if (PageError(page)) ··· 329 280 } 330 281 /* unlock the page after the ->readpage() routine completes */ 331 282 unlock_page(page); 283 + 284 + if (remaining > PAGE_SIZE) { 285 + slot_index = 0; 286 + while ((remaining - PAGE_SIZE) >= PAGE_SIZE) { 287 + remaining -= PAGE_SIZE; 288 + /* 289 + * It is an optimization to try and fill more than one 290 + * page... by now we've already gotten the single 291 + * page we were after, if stuff doesn't seem to 292 + * be going our way at this point just return 293 + * and hope for the best. 294 + * 295 + * If we look for pages and they're already there is 296 + * one reason to give up, and if they're not there 297 + * and we can't create them is another reason. 298 + */ 299 + 300 + index++; 301 + slot_index++; 302 + next_page = find_get_page(inode->i_mapping, index); 303 + if (next_page) { 304 + gossip_debug(GOSSIP_FILE_DEBUG, 305 + "%s: found next page, quitting\n", 306 + __func__); 307 + put_page(next_page); 308 + goto out; 309 + } 310 + next_page = find_or_create_page(inode->i_mapping, 311 + index, 312 + GFP_KERNEL); 313 + /* 314 + * I've never hit this, leave it as a printk for 315 + * now so it will be obvious. 316 + */ 317 + if (!next_page) { 318 + printk("%s: can't create next page, quitting\n", 319 + __func__); 320 + goto out; 321 + } 322 + kaddr = kmap_atomic(next_page); 323 + orangefs_bufmap_page_fill(kaddr, 324 + buffer_index, 325 + slot_index); 326 + kunmap_atomic(kaddr); 327 + SetPageUptodate(next_page); 328 + unlock_page(next_page); 329 + put_page(next_page); 330 + } 331 + } 332 + 333 + out: 334 + if (buffer_index != -1) 335 + orangefs_bufmap_put(buffer_index); 332 336 return ret; 333 337 } 334 - 335 - static int orangefs_launder_page(struct page *); 336 338 337 339 static int orangefs_write_begin(struct file *file, 338 340 struct address_space *mapping, ··· 426 326 if (ret) 427 327 return ret; 428 328 } 429 - 430 329 } 431 330 432 331 wr = kmalloc(sizeof *wr, GFP_KERNEL);
+13
fs/orangefs/orangefs-bufmap.c
··· 538 538 } 539 539 return 0; 540 540 } 541 + 542 + void orangefs_bufmap_page_fill(void *page_to, 543 + int buffer_index, 544 + int slot_index) 545 + { 546 + struct orangefs_bufmap_desc *from; 547 + void *page_from; 548 + 549 + from = &__orangefs_bufmap->desc_array[buffer_index]; 550 + page_from = kmap_atomic(from->page_array[slot_index]); 551 + memcpy(page_to, page_from, PAGE_SIZE); 552 + kunmap_atomic(page_from); 553 + }
+2
fs/orangefs/orangefs-bufmap.h
··· 34 34 int buffer_index, 35 35 size_t size); 36 36 37 + void orangefs_bufmap_page_fill(void *kaddr, int buffer_index, int slot_index); 38 + 37 39 #endif /* __ORANGEFS_BUFMAP_H */
+2 -2
fs/orangefs/orangefs-debugfs.c
··· 963 963 return ret; 964 964 } 965 965 966 - int orangefs_debugfs_new_client_string(void __user *arg) 966 + int orangefs_debugfs_new_client_string(void __user *arg) 967 967 { 968 968 int ret; 969 969 ··· 1016 1016 return 0; 1017 1017 } 1018 1018 1019 - int orangefs_debugfs_new_debug(void __user *arg) 1019 + int orangefs_debugfs_new_debug(void __user *arg) 1020 1020 { 1021 1021 struct dev_mask_info_s mask_info = {0}; 1022 1022 int ret;