Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

Pull pnfs/ore fixes from Boaz Harrosh:
"These are catastrophic fixes to the pnfs objects-layout that were just
discovered. They are also destined for @stable.

I have found these and worked on them at around RC1 time but
unfortunately went to the hospital for kidney stones and had a very
slow recovery. I refrained from sending them as is, before proper
testing, and surly I have found a bug just yesterday.

So now they are all well tested, and have my sign-off. Other then
fixing the problem at hand, and assuming there are no bugs at the new
code, there is low risk to any surrounding code. And in anyway they
affect only these paths that are now broken. That is RAID5 in pnfs
objects-layout code. It does also affect exofs (which was not broken)
but I have tested exofs and it is lower priority then objects-layout
because no one is using exofs, but objects-layout has lots of users."

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
pnfs-obj: Fix __r4w_get_page when offset is beyond i_size
pnfs-obj: don't leak objio_state if ore_write/read fails
ore: Unlock r4w pages in exact reverse order of locking
ore: Remove support of partial IO request (NFS crash)
ore: Fix NFS crash by supporting any unaligned RAID IO

Changed files
+70 -56
fs
exofs
nfs
objlayout
+1 -7
fs/exofs/ore.c
··· 735 735 out: 736 736 ios->numdevs = devs_in_group; 737 737 ios->pages_consumed = cur_pg; 738 - if (unlikely(ret)) { 739 - if (length == ios->length) 740 - return ret; 741 - else 742 - ios->length -= length; 743 - } 744 - return 0; 738 + return ret; 745 739 } 746 740 747 741 int ore_create(struct ore_io_state *ios)
+49 -44
fs/exofs/ore_raid.c
··· 144 144 { 145 145 unsigned data_devs = sp2d->data_devs; 146 146 unsigned group_width = data_devs + sp2d->parity; 147 - unsigned p; 147 + int p, c; 148 148 149 149 if (!sp2d->needed) 150 150 return; 151 151 152 + for (c = data_devs - 1; c >= 0; --c) 153 + for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { 154 + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 155 + 156 + if (_1ps->page_is_read[c]) { 157 + struct page *page = _1ps->pages[c]; 158 + 159 + r4w->put_page(priv, page); 160 + _1ps->page_is_read[c] = false; 161 + } 162 + } 163 + 152 164 for (p = 0; p < sp2d->pages_in_unit; p++) { 153 165 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 154 - 155 - if (_1ps->write_count < group_width) { 156 - unsigned c; 157 - 158 - for (c = 0; c < data_devs; c++) 159 - if (_1ps->page_is_read[c]) { 160 - struct page *page = _1ps->pages[c]; 161 - 162 - r4w->put_page(priv, page); 163 - _1ps->page_is_read[c] = false; 164 - } 165 - } 166 166 167 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 168 168 _1ps->write_count = 0; ··· 461 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 462 462 * allocated/freed and don't go through cache 463 463 */ 464 - static int _read_4_write(struct ore_io_state *ios) 464 + static int _read_4_write_first_stripe(struct ore_io_state *ios) 465 465 { 466 - struct ore_io_state *ios_read; 467 466 struct ore_striping_info read_si; 468 467 struct __stripe_pages_2d *sp2d = ios->sp2d; 469 468 u64 offset = ios->si.first_stripe_start; 470 - u64 last_stripe_end; 471 - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; 472 - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; 473 - int ret; 469 + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; 474 470 475 471 if (offset == ios->offset) /* Go to start collect $200 */ 476 472 goto read_last_stripe; 477 473 478 474 min_p = _sp2d_min_pg(sp2d); 479 475 max_p = _sp2d_max_pg(sp2d); 476 + 477 + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", 478 + offset, ios->offset, min_p, max_p); 480 479 481 480 for (c = 0; ; c++) { 482 481 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); ··· 511 512 } 512 513 513 514 read_last_stripe: 515 + return 0; 516 + } 517 + 518 + static int _read_4_write_last_stripe(struct ore_io_state *ios) 519 + { 520 + struct ore_striping_info read_si; 521 + struct __stripe_pages_2d *sp2d = ios->sp2d; 522 + u64 offset; 523 + u64 last_stripe_end; 524 + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; 525 + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; 526 + 514 527 offset = ios->offset + ios->length; 515 528 if (offset % PAGE_SIZE) 516 529 _add_to_r4w_last_page(ios, &offset); ··· 538 527 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 539 528 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 540 529 541 - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); 542 - /* unaligned IO must be within a single stripe */ 543 - 544 530 if (min_p == sp2d->pages_in_unit) { 545 531 /* Didn't do it yet */ 546 532 min_p = _sp2d_min_pg(sp2d); 547 533 max_p = _sp2d_max_pg(sp2d); 548 534 } 535 + 536 + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", 537 + offset, last_stripe_end, min_p, max_p); 549 538 550 539 while (offset < last_stripe_end) { 551 540 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; ··· 579 568 } 580 569 581 570 read_it: 571 + return 0; 572 + } 573 + 574 + static int _read_4_write_execute(struct ore_io_state *ios) 575 + { 576 + struct ore_io_state *ios_read; 577 + unsigned i; 578 + int ret; 579 + 582 580 ios_read = ios->ios_read_4_write; 583 581 if (!ios_read) 584 582 return 0; ··· 611 591 } 612 592 613 593 _mark_read4write_pages_uptodate(ios_read, ret); 594 + ore_put_io_state(ios_read); 595 + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ 614 596 return 0; 615 597 } 616 598 ··· 648 626 /* If first stripe, Read in all read4write pages 649 627 * (if needed) before we calculate the first parity. 650 628 */ 651 - _read_4_write(ios); 629 + _read_4_write_first_stripe(ios); 652 630 } 631 + if (!cur_len) /* If last stripe r4w pages of last stripe */ 632 + _read_4_write_last_stripe(ios); 633 + _read_4_write_execute(ios); 653 634 654 635 for (i = 0; i < num_pages; i++) { 655 636 pages[i] = _raid_page_alloc(); ··· 679 654 680 655 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 681 656 { 682 - struct ore_layout *layout = ios->layout; 683 - 684 657 if (ios->parity_pages) { 658 + struct ore_layout *layout = ios->layout; 685 659 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 686 - unsigned stripe_size = ios->si.bytes_in_stripe; 687 - u64 last_stripe, first_stripe; 688 660 689 661 if (_sp2d_alloc(pages_in_unit, layout->group_width, 690 662 layout->parity, &ios->sp2d)) { 691 663 return -ENOMEM; 692 - } 693 - 694 - /* Round io down to last full strip */ 695 - first_stripe = div_u64(ios->offset, stripe_size); 696 - last_stripe = div_u64(ios->offset + ios->length, stripe_size); 697 - 698 - /* If an IO spans more then a single stripe it must end at 699 - * a stripe boundary. The reminder at the end is pushed into the 700 - * next IO. 701 - */ 702 - if (last_stripe != first_stripe) { 703 - ios->length = last_stripe * stripe_size - ios->offset; 704 - 705 - BUG_ON(!ios->length); 706 - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / 707 - PAGE_SIZE; 708 - ios->si.length = ios->length; /*make it consistent */ 709 664 } 710 665 } 711 666 return 0;
+20 -5
fs/nfs/objlayout/objio_osd.c
··· 454 454 objios->ios->done = _read_done; 455 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 456 456 rdata->args.offset, rdata->args.count); 457 - return ore_read(objios->ios); 457 + ret = ore_read(objios->ios); 458 + if (unlikely(ret)) 459 + objio_free_result(&objios->oir); 460 + return ret; 458 461 } 459 462 460 463 /* ··· 489 486 struct nfs_write_data *wdata = objios->oir.rpcdata; 490 487 struct address_space *mapping = wdata->header->inode->i_mapping; 491 488 pgoff_t index = offset / PAGE_SIZE; 492 - struct page *page = find_get_page(mapping, index); 489 + struct page *page; 490 + loff_t i_size = i_size_read(wdata->header->inode); 493 491 492 + if (offset >= i_size) { 493 + *uptodate = true; 494 + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); 495 + return ZERO_PAGE(0); 496 + } 497 + 498 + page = find_get_page(mapping, index); 494 499 if (!page) { 495 500 page = find_or_create_page(mapping, index, GFP_NOFS); 496 501 if (unlikely(!page)) { ··· 518 507 519 508 static void __r4w_put_page(void *priv, struct page *page) 520 509 { 521 - dprintk("%s: index=0x%lx\n", __func__, page->index); 522 - page_cache_release(page); 510 + dprintk("%s: index=0x%lx\n", __func__, 511 + (page == ZERO_PAGE(0)) ? -1UL : page->index); 512 + if (ZERO_PAGE(0) != page) 513 + page_cache_release(page); 523 514 return; 524 515 } 525 516 ··· 552 539 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 553 540 wdata->args.offset, wdata->args.count); 554 541 ret = ore_write(objios->ios); 555 - if (unlikely(ret)) 542 + if (unlikely(ret)) { 543 + objio_free_result(&objios->oir); 556 544 return ret; 545 + } 557 546 558 547 if (objios->sync) 559 548 _write_done(objios->ios, objios);