Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

Pull pnfs/ore fixes from Boaz Harrosh:
"These are catastrophic fixes to the pnfs objects-layout that were just
discovered. They are also destined for @stable.

I have found these and worked on them at around RC1 time but
unfortunately went to the hospital for kidney stones and had a very
slow recovery. I refrained from sending them as is, before proper
testing, and surly I have found a bug just yesterday.

So now they are all well tested, and have my sign-off. Other then
fixing the problem at hand, and assuming there are no bugs at the new
code, there is low risk to any surrounding code. And in anyway they
affect only these paths that are now broken. That is RAID5 in pnfs
objects-layout code. It does also affect exofs (which was not broken)
but I have tested exofs and it is lower priority then objects-layout
because no one is using exofs, but objects-layout has lots of users."

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
pnfs-obj: Fix __r4w_get_page when offset is beyond i_size
pnfs-obj: don't leak objio_state if ore_write/read fails
ore: Unlock r4w pages in exact reverse order of locking
ore: Remove support of partial IO request (NFS crash)
ore: Fix NFS crash by supporting any unaligned RAID IO

+70 -56
+1 -7
fs/exofs/ore.c
··· 735 out: 736 ios->numdevs = devs_in_group; 737 ios->pages_consumed = cur_pg; 738 - if (unlikely(ret)) { 739 - if (length == ios->length) 740 - return ret; 741 - else 742 - ios->length -= length; 743 - } 744 - return 0; 745 } 746 747 int ore_create(struct ore_io_state *ios)
··· 735 out: 736 ios->numdevs = devs_in_group; 737 ios->pages_consumed = cur_pg; 738 + return ret; 739 } 740 741 int ore_create(struct ore_io_state *ios)
+49 -44
fs/exofs/ore_raid.c
··· 144 { 145 unsigned data_devs = sp2d->data_devs; 146 unsigned group_width = data_devs + sp2d->parity; 147 - unsigned p; 148 149 if (!sp2d->needed) 150 return; 151 152 for (p = 0; p < sp2d->pages_in_unit; p++) { 153 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 154 - 155 - if (_1ps->write_count < group_width) { 156 - unsigned c; 157 - 158 - for (c = 0; c < data_devs; c++) 159 - if (_1ps->page_is_read[c]) { 160 - struct page *page = _1ps->pages[c]; 161 - 162 - r4w->put_page(priv, page); 163 - _1ps->page_is_read[c] = false; 164 - } 165 - } 166 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 168 _1ps->write_count = 0; ··· 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 462 * allocated/freed and don't go through cache 463 */ 464 - static int _read_4_write(struct ore_io_state *ios) 465 { 466 - struct ore_io_state *ios_read; 467 struct ore_striping_info read_si; 468 struct __stripe_pages_2d *sp2d = ios->sp2d; 469 u64 offset = ios->si.first_stripe_start; 470 - u64 last_stripe_end; 471 - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; 472 - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; 473 - int ret; 474 475 if (offset == ios->offset) /* Go to start collect $200 */ 476 goto read_last_stripe; 477 478 min_p = _sp2d_min_pg(sp2d); 479 max_p = _sp2d_max_pg(sp2d); 480 481 for (c = 0; ; c++) { 482 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); ··· 511 } 512 513 read_last_stripe: 514 offset = ios->offset + ios->length; 515 if (offset % PAGE_SIZE) 516 _add_to_r4w_last_page(ios, &offset); ··· 538 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 539 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 540 541 - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); 542 - /* unaligned IO must be within a single stripe */ 543 - 544 if (min_p == sp2d->pages_in_unit) { 545 /* Didn't do it yet */ 546 min_p = _sp2d_min_pg(sp2d); 547 max_p = _sp2d_max_pg(sp2d); 548 } 549 550 while (offset < last_stripe_end) { 551 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; ··· 579 } 580 581 read_it: 582 ios_read = ios->ios_read_4_write; 583 if (!ios_read) 584 return 0; ··· 611 } 612 613 _mark_read4write_pages_uptodate(ios_read, ret); 614 return 0; 615 } 616 ··· 648 /* If first stripe, Read in all read4write pages 649 * (if needed) before we calculate the first parity. 650 */ 651 - _read_4_write(ios); 652 } 653 654 for (i = 0; i < num_pages; i++) { 655 pages[i] = _raid_page_alloc(); ··· 679 680 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 681 { 682 - struct ore_layout *layout = ios->layout; 683 - 684 if (ios->parity_pages) { 685 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 686 - unsigned stripe_size = ios->si.bytes_in_stripe; 687 - u64 last_stripe, first_stripe; 688 689 if (_sp2d_alloc(pages_in_unit, layout->group_width, 690 layout->parity, &ios->sp2d)) { 691 return -ENOMEM; 692 - } 693 - 694 - /* Round io down to last full strip */ 695 - first_stripe = div_u64(ios->offset, stripe_size); 696 - last_stripe = div_u64(ios->offset + ios->length, stripe_size); 697 - 698 - /* If an IO spans more then a single stripe it must end at 699 - * a stripe boundary. The reminder at the end is pushed into the 700 - * next IO. 701 - */ 702 - if (last_stripe != first_stripe) { 703 - ios->length = last_stripe * stripe_size - ios->offset; 704 - 705 - BUG_ON(!ios->length); 706 - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / 707 - PAGE_SIZE; 708 - ios->si.length = ios->length; /*make it consistent */ 709 } 710 } 711 return 0;
··· 144 { 145 unsigned data_devs = sp2d->data_devs; 146 unsigned group_width = data_devs + sp2d->parity; 147 + int p, c; 148 149 if (!sp2d->needed) 150 return; 151 152 + for (c = data_devs - 1; c >= 0; --c) 153 + for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { 154 + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 155 + 156 + if (_1ps->page_is_read[c]) { 157 + struct page *page = _1ps->pages[c]; 158 + 159 + r4w->put_page(priv, page); 160 + _1ps->page_is_read[c] = false; 161 + } 162 + } 163 + 164 for (p = 0; p < sp2d->pages_in_unit; p++) { 165 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 166 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 168 _1ps->write_count = 0; ··· 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 462 * allocated/freed and don't go through cache 463 */ 464 + static int _read_4_write_first_stripe(struct ore_io_state *ios) 465 { 466 struct ore_striping_info read_si; 467 struct __stripe_pages_2d *sp2d = ios->sp2d; 468 u64 offset = ios->si.first_stripe_start; 469 + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; 470 471 if (offset == ios->offset) /* Go to start collect $200 */ 472 goto read_last_stripe; 473 474 min_p = _sp2d_min_pg(sp2d); 475 max_p = _sp2d_max_pg(sp2d); 476 + 477 + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", 478 + offset, ios->offset, min_p, max_p); 479 480 for (c = 0; ; c++) { 481 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); ··· 512 } 513 514 read_last_stripe: 515 + return 0; 516 + } 517 + 518 + static int _read_4_write_last_stripe(struct ore_io_state *ios) 519 + { 520 + struct ore_striping_info read_si; 521 + struct __stripe_pages_2d *sp2d = ios->sp2d; 522 + u64 offset; 523 + u64 last_stripe_end; 524 + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; 525 + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; 526 + 527 offset = ios->offset + ios->length; 528 if (offset % PAGE_SIZE) 529 _add_to_r4w_last_page(ios, &offset); ··· 527 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 528 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 529 530 if (min_p == sp2d->pages_in_unit) { 531 /* Didn't do it yet */ 532 min_p = _sp2d_min_pg(sp2d); 533 max_p = _sp2d_max_pg(sp2d); 534 } 535 + 536 + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", 537 + offset, last_stripe_end, min_p, max_p); 538 539 while (offset < last_stripe_end) { 540 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; ··· 568 } 569 570 read_it: 571 + return 0; 572 + } 573 + 574 + static int _read_4_write_execute(struct ore_io_state *ios) 575 + { 576 + struct ore_io_state *ios_read; 577 + unsigned i; 578 + int ret; 579 + 580 ios_read = ios->ios_read_4_write; 581 if (!ios_read) 582 return 0; ··· 591 } 592 593 _mark_read4write_pages_uptodate(ios_read, ret); 594 + ore_put_io_state(ios_read); 595 + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ 596 return 0; 597 } 598 ··· 626 /* If first stripe, Read in all read4write pages 627 * (if needed) before we calculate the first parity. 628 */ 629 + _read_4_write_first_stripe(ios); 630 } 631 + if (!cur_len) /* If last stripe r4w pages of last stripe */ 632 + _read_4_write_last_stripe(ios); 633 + _read_4_write_execute(ios); 634 635 for (i = 0; i < num_pages; i++) { 636 pages[i] = _raid_page_alloc(); ··· 654 655 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 656 { 657 if (ios->parity_pages) { 658 + struct ore_layout *layout = ios->layout; 659 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 660 661 if (_sp2d_alloc(pages_in_unit, layout->group_width, 662 layout->parity, &ios->sp2d)) { 663 return -ENOMEM; 664 } 665 } 666 return 0;
+20 -5
fs/nfs/objlayout/objio_osd.c
··· 454 objios->ios->done = _read_done; 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 456 rdata->args.offset, rdata->args.count); 457 - return ore_read(objios->ios); 458 } 459 460 /* ··· 489 struct nfs_write_data *wdata = objios->oir.rpcdata; 490 struct address_space *mapping = wdata->header->inode->i_mapping; 491 pgoff_t index = offset / PAGE_SIZE; 492 - struct page *page = find_get_page(mapping, index); 493 494 if (!page) { 495 page = find_or_create_page(mapping, index, GFP_NOFS); 496 if (unlikely(!page)) { ··· 518 519 static void __r4w_put_page(void *priv, struct page *page) 520 { 521 - dprintk("%s: index=0x%lx\n", __func__, page->index); 522 - page_cache_release(page); 523 return; 524 } 525 ··· 552 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 553 wdata->args.offset, wdata->args.count); 554 ret = ore_write(objios->ios); 555 - if (unlikely(ret)) 556 return ret; 557 558 if (objios->sync) 559 _write_done(objios->ios, objios);
··· 454 objios->ios->done = _read_done; 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 456 rdata->args.offset, rdata->args.count); 457 + ret = ore_read(objios->ios); 458 + if (unlikely(ret)) 459 + objio_free_result(&objios->oir); 460 + return ret; 461 } 462 463 /* ··· 486 struct nfs_write_data *wdata = objios->oir.rpcdata; 487 struct address_space *mapping = wdata->header->inode->i_mapping; 488 pgoff_t index = offset / PAGE_SIZE; 489 + struct page *page; 490 + loff_t i_size = i_size_read(wdata->header->inode); 491 492 + if (offset >= i_size) { 493 + *uptodate = true; 494 + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); 495 + return ZERO_PAGE(0); 496 + } 497 + 498 + page = find_get_page(mapping, index); 499 if (!page) { 500 page = find_or_create_page(mapping, index, GFP_NOFS); 501 if (unlikely(!page)) { ··· 507 508 static void __r4w_put_page(void *priv, struct page *page) 509 { 510 + dprintk("%s: index=0x%lx\n", __func__, 511 + (page == ZERO_PAGE(0)) ? -1UL : page->index); 512 + if (ZERO_PAGE(0) != page) 513 + page_cache_release(page); 514 return; 515 } 516 ··· 539 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 540 wdata->args.offset, wdata->args.count); 541 ret = ore_write(objios->ios); 542 + if (unlikely(ret)) { 543 + objio_free_result(&objios->oir); 544 return ret; 545 + } 546 547 if (objios->sync) 548 _write_done(objios->ios, objios);