fs/exofs/inode.c at 4dfd459b738cf1f65b3eac4e0a9b19bc93cc91c6

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / exofs / inode.c
at 4dfd459b738cf1f65b3eac4e0a9b19bc93cc91c6 1314 lines 32 kB view raw
wrap content
   1/*
   2 * Copyright (C) 2005, 2006
   3 * Avishay Traeger (avishay@gmail.com)
   4 * Copyright (C) 2008, 2009
   5 * Boaz Harrosh <bharrosh@panasas.com>
   6 *
   7 * Copyrights for code taken from ext2:
   8 *     Copyright (C) 1992, 1993, 1994, 1995
   9 *     Remy Card (card@masi.ibp.fr)
  10 *     Laboratoire MASI - Institut Blaise Pascal
  11 *     Universite Pierre et Marie Curie (Paris VI)
  12 *     from
  13 *     linux/fs/minix/inode.c
  14 *     Copyright (C) 1991, 1992  Linus Torvalds
  15 *
  16 * This file is part of exofs.
  17 *
  18 * exofs is free software; you can redistribute it and/or modify
  19 * it under the terms of the GNU General Public License as published by
  20 * the Free Software Foundation.  Since it is based on ext2, and the only
  21 * valid version of GPL for the Linux kernel is version 2, the only valid
  22 * version of GPL for exofs is version 2.
  23 *
  24 * exofs is distributed in the hope that it will be useful,
  25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  27 * GNU General Public License for more details.
  28 *
  29 * You should have received a copy of the GNU General Public License
  30 * along with exofs; if not, write to the Free Software
  31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  32 */
  33
  34#include <linux/writeback.h>
  35#include <linux/buffer_head.h>
  36#include <scsi/scsi_device.h>
  37
  38#include "exofs.h"
  39
  40#define EXOFS_DBGMSG2(M...) do {} while (0)
  41
  42enum { BIO_MAX_PAGES_KMALLOC =
  43		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
  44};
  45
  46struct page_collect {
  47	struct exofs_sb_info *sbi;
  48	struct request_queue *req_q;
  49	struct inode *inode;
  50	unsigned expected_pages;
  51	struct exofs_io_state *ios;
  52
  53	struct bio *bio;
  54	unsigned nr_pages;
  55	unsigned long length;
  56	loff_t pg_first; /* keep 64bit also in 32-arches */
  57};
  58
  59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
  60		       struct inode *inode)
  61{
  62	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
  63
  64	pcol->sbi = sbi;
  65	/* Create master bios on first Q, later on cloning, each clone will be
  66	 * allocated on it's destination Q
  67	 */
  68	pcol->req_q = osd_request_queue(sbi->s_ods[0]);
  69	pcol->inode = inode;
  70	pcol->expected_pages = expected_pages;
  71
  72	pcol->ios = NULL;
  73	pcol->bio = NULL;
  74	pcol->nr_pages = 0;
  75	pcol->length = 0;
  76	pcol->pg_first = -1;
  77}
  78
  79static void _pcol_reset(struct page_collect *pcol)
  80{
  81	pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
  82
  83	pcol->bio = NULL;
  84	pcol->nr_pages = 0;
  85	pcol->length = 0;
  86	pcol->pg_first = -1;
  87	pcol->ios = NULL;
  88
  89	/* this is probably the end of the loop but in writes
  90	 * it might not end here. don't be left with nothing
  91	 */
  92	if (!pcol->expected_pages)
  93		pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
  94}
  95
  96static int pcol_try_alloc(struct page_collect *pcol)
  97{
  98	int pages = min_t(unsigned, pcol->expected_pages,
  99			  BIO_MAX_PAGES_KMALLOC);
 100
 101	if (!pcol->ios) { /* First time allocate io_state */
 102		int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
 103
 104		if (ret)
 105			return ret;
 106	}
 107
 108	for (; pages; pages >>= 1) {
 109		pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
 110		if (likely(pcol->bio))
 111			return 0;
 112	}
 113
 114	EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
 115		  pcol->expected_pages);
 116	return -ENOMEM;
 117}
 118
 119static void pcol_free(struct page_collect *pcol)
 120{
 121	if (pcol->bio) {
 122		bio_put(pcol->bio);
 123		pcol->bio = NULL;
 124	}
 125
 126	if (pcol->ios) {
 127		exofs_put_io_state(pcol->ios);
 128		pcol->ios = NULL;
 129	}
 130}
 131
 132static int pcol_add_page(struct page_collect *pcol, struct page *page,
 133			 unsigned len)
 134{
 135	int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
 136	if (unlikely(len != added_len))
 137		return -ENOMEM;
 138
 139	++pcol->nr_pages;
 140	pcol->length += len;
 141	return 0;
 142}
 143
 144static int update_read_page(struct page *page, int ret)
 145{
 146	if (ret == 0) {
 147		/* Everything is OK */
 148		SetPageUptodate(page);
 149		if (PageError(page))
 150			ClearPageError(page);
 151	} else if (ret == -EFAULT) {
 152		/* In this case we were trying to read something that wasn't on
 153		 * disk yet - return a page full of zeroes.  This should be OK,
 154		 * because the object should be empty (if there was a write
 155		 * before this read, the read would be waiting with the page
 156		 * locked */
 157		clear_highpage(page);
 158
 159		SetPageUptodate(page);
 160		if (PageError(page))
 161			ClearPageError(page);
 162		ret = 0; /* recovered error */
 163		EXOFS_DBGMSG("recovered read error\n");
 164	} else /* Error */
 165		SetPageError(page);
 166
 167	return ret;
 168}
 169
 170static void update_write_page(struct page *page, int ret)
 171{
 172	if (ret) {
 173		mapping_set_error(page->mapping, ret);
 174		SetPageError(page);
 175	}
 176	end_page_writeback(page);
 177}
 178
 179/* Called at the end of reads, to optionally unlock pages and update their
 180 * status.
 181 */
 182static int __readpages_done(struct page_collect *pcol, bool do_unlock)
 183{
 184	struct bio_vec *bvec;
 185	int i;
 186	u64 resid;
 187	u64 good_bytes;
 188	u64 length = 0;
 189	int ret = exofs_check_io(pcol->ios, &resid);
 190
 191	if (likely(!ret))
 192		good_bytes = pcol->length;
 193	else
 194		good_bytes = pcol->length - resid;
 195
 196	EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
 197		     " length=0x%lx nr_pages=%u\n",
 198		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
 199		     pcol->nr_pages);
 200
 201	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
 202		struct page *page = bvec->bv_page;
 203		struct inode *inode = page->mapping->host;
 204		int page_stat;
 205
 206		if (inode != pcol->inode)
 207			continue; /* osd might add more pages at end */
 208
 209		if (likely(length < good_bytes))
 210			page_stat = 0;
 211		else
 212			page_stat = ret;
 213
 214		EXOFS_DBGMSG2("    readpages_done(0x%lx, 0x%lx) %s\n",
 215			  inode->i_ino, page->index,
 216			  page_stat ? "bad_bytes" : "good_bytes");
 217
 218		ret = update_read_page(page, page_stat);
 219		if (do_unlock)
 220			unlock_page(page);
 221		length += bvec->bv_len;
 222	}
 223
 224	pcol_free(pcol);
 225	EXOFS_DBGMSG("readpages_done END\n");
 226	return ret;
 227}
 228
 229/* callback of async reads */
 230static void readpages_done(struct exofs_io_state *ios, void *p)
 231{
 232	struct page_collect *pcol = p;
 233
 234	__readpages_done(pcol, true);
 235	atomic_dec(&pcol->sbi->s_curr_pending);
 236	kfree(pcol);
 237}
 238
 239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
 240{
 241	struct bio_vec *bvec;
 242	int i;
 243
 244	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
 245		struct page *page = bvec->bv_page;
 246
 247		if (rw == READ)
 248			update_read_page(page, ret);
 249		else
 250			update_write_page(page, ret);
 251
 252		unlock_page(page);
 253	}
 254}
 255
 256static int read_exec(struct page_collect *pcol, bool is_sync)
 257{
 258	struct exofs_i_info *oi = exofs_i(pcol->inode);
 259	struct exofs_io_state *ios = pcol->ios;
 260	struct page_collect *pcol_copy = NULL;
 261	int ret;
 262
 263	if (!pcol->bio)
 264		return 0;
 265
 266	/* see comment in _readpage() about sync reads */
 267	WARN_ON(is_sync && (pcol->nr_pages != 1));
 268
 269	ios->bio = pcol->bio;
 270	ios->length = pcol->length;
 271	ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
 272
 273	if (is_sync) {
 274		exofs_oi_read(oi, pcol->ios);
 275		return __readpages_done(pcol, false);
 276	}
 277
 278	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
 279	if (!pcol_copy) {
 280		ret = -ENOMEM;
 281		goto err;
 282	}
 283
 284	*pcol_copy = *pcol;
 285	ios->done = readpages_done;
 286	ios->private = pcol_copy;
 287	ret = exofs_oi_read(oi, ios);
 288	if (unlikely(ret))
 289		goto err;
 290
 291	atomic_inc(&pcol->sbi->s_curr_pending);
 292
 293	EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
 294		  ios->obj.id, _LLU(ios->offset), pcol->length);
 295
 296	/* pages ownership was passed to pcol_copy */
 297	_pcol_reset(pcol);
 298	return 0;
 299
 300err:
 301	if (!is_sync)
 302		_unlock_pcol_pages(pcol, ret, READ);
 303
 304	pcol_free(pcol);
 305
 306	kfree(pcol_copy);
 307	return ret;
 308}
 309
 310/* readpage_strip is called either directly from readpage() or by the VFS from
 311 * within read_cache_pages(), to add one more page to be read. It will try to
 312 * collect as many contiguous pages as posible. If a discontinuity is
 313 * encountered, or it runs out of resources, it will submit the previous segment
 314 * and will start a new collection. Eventually caller must submit the last
 315 * segment if present.
 316 */
 317static int readpage_strip(void *data, struct page *page)
 318{
 319	struct page_collect *pcol = data;
 320	struct inode *inode = pcol->inode;
 321	struct exofs_i_info *oi = exofs_i(inode);
 322	loff_t i_size = i_size_read(inode);
 323	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
 324	size_t len;
 325	int ret;
 326
 327	/* FIXME: Just for debugging, will be removed */
 328	if (PageUptodate(page))
 329		EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
 330			  page->index);
 331
 332	if (page->index < end_index)
 333		len = PAGE_CACHE_SIZE;
 334	else if (page->index == end_index)
 335		len = i_size & ~PAGE_CACHE_MASK;
 336	else
 337		len = 0;
 338
 339	if (!len || !obj_created(oi)) {
 340		/* this will be out of bounds, or doesn't exist yet.
 341		 * Current page is cleared and the request is split
 342		 */
 343		clear_highpage(page);
 344
 345		SetPageUptodate(page);
 346		if (PageError(page))
 347			ClearPageError(page);
 348
 349		unlock_page(page);
 350		EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
 351			     " splitting\n", inode->i_ino, page->index);
 352
 353		return read_exec(pcol, false);
 354	}
 355
 356try_again:
 357
 358	if (unlikely(pcol->pg_first == -1)) {
 359		pcol->pg_first = page->index;
 360	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
 361		   page->index)) {
 362		/* Discontinuity detected, split the request */
 363		ret = read_exec(pcol, false);
 364		if (unlikely(ret))
 365			goto fail;
 366		goto try_again;
 367	}
 368
 369	if (!pcol->bio) {
 370		ret = pcol_try_alloc(pcol);
 371		if (unlikely(ret))
 372			goto fail;
 373	}
 374
 375	if (len != PAGE_CACHE_SIZE)
 376		zero_user(page, len, PAGE_CACHE_SIZE - len);
 377
 378	EXOFS_DBGMSG2("    readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
 379		     inode->i_ino, page->index, len);
 380
 381	ret = pcol_add_page(pcol, page, len);
 382	if (ret) {
 383		EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
 384			  "this_len=0x%zx nr_pages=%u length=0x%lx\n",
 385			  page, len, pcol->nr_pages, pcol->length);
 386
 387		/* split the request, and start again with current page */
 388		ret = read_exec(pcol, false);
 389		if (unlikely(ret))
 390			goto fail;
 391
 392		goto try_again;
 393	}
 394
 395	return 0;
 396
 397fail:
 398	/* SetPageError(page); ??? */
 399	unlock_page(page);
 400	return ret;
 401}
 402
 403static int exofs_readpages(struct file *file, struct address_space *mapping,
 404			   struct list_head *pages, unsigned nr_pages)
 405{
 406	struct page_collect pcol;
 407	int ret;
 408
 409	_pcol_init(&pcol, nr_pages, mapping->host);
 410
 411	ret = read_cache_pages(mapping, pages, readpage_strip, &pcol);
 412	if (ret) {
 413		EXOFS_ERR("read_cache_pages => %d\n", ret);
 414		return ret;
 415	}
 416
 417	return read_exec(&pcol, false);
 418}
 419
 420static int _readpage(struct page *page, bool is_sync)
 421{
 422	struct page_collect pcol;
 423	int ret;
 424
 425	_pcol_init(&pcol, 1, page->mapping->host);
 426
 427	/* readpage_strip might call read_exec(,is_sync==false) at several
 428	 * places but not if we have a single page.
 429	 */
 430	ret = readpage_strip(&pcol, page);
 431	if (ret) {
 432		EXOFS_ERR("_readpage => %d\n", ret);
 433		return ret;
 434	}
 435
 436	return read_exec(&pcol, is_sync);
 437}
 438
 439/*
 440 * We don't need the file
 441 */
 442static int exofs_readpage(struct file *file, struct page *page)
 443{
 444	return _readpage(page, false);
 445}
 446
 447/* Callback for osd_write. All writes are asynchronous */
 448static void writepages_done(struct exofs_io_state *ios, void *p)
 449{
 450	struct page_collect *pcol = p;
 451	struct bio_vec *bvec;
 452	int i;
 453	u64 resid;
 454	u64  good_bytes;
 455	u64  length = 0;
 456	int ret = exofs_check_io(ios, &resid);
 457
 458	atomic_dec(&pcol->sbi->s_curr_pending);
 459
 460	if (likely(!ret))
 461		good_bytes = pcol->length;
 462	else
 463		good_bytes = pcol->length - resid;
 464
 465	EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
 466		     " length=0x%lx nr_pages=%u\n",
 467		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
 468		     pcol->nr_pages);
 469
 470	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
 471		struct page *page = bvec->bv_page;
 472		struct inode *inode = page->mapping->host;
 473		int page_stat;
 474
 475		if (inode != pcol->inode)
 476			continue; /* osd might add more pages to a bio */
 477
 478		if (likely(length < good_bytes))
 479			page_stat = 0;
 480		else
 481			page_stat = ret;
 482
 483		update_write_page(page, page_stat);
 484		unlock_page(page);
 485		EXOFS_DBGMSG2("    writepages_done(0x%lx, 0x%lx) status=%d\n",
 486			     inode->i_ino, page->index, page_stat);
 487
 488		length += bvec->bv_len;
 489	}
 490
 491	pcol_free(pcol);
 492	kfree(pcol);
 493	EXOFS_DBGMSG("writepages_done END\n");
 494}
 495
 496static int write_exec(struct page_collect *pcol)
 497{
 498	struct exofs_i_info *oi = exofs_i(pcol->inode);
 499	struct exofs_io_state *ios = pcol->ios;
 500	struct page_collect *pcol_copy = NULL;
 501	int ret;
 502
 503	if (!pcol->bio)
 504		return 0;
 505
 506	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
 507	if (!pcol_copy) {
 508		EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
 509		ret = -ENOMEM;
 510		goto err;
 511	}
 512
 513	*pcol_copy = *pcol;
 514
 515	pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
 516
 517	ios->bio = pcol_copy->bio;
 518	ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
 519	ios->length = pcol_copy->length;
 520	ios->done = writepages_done;
 521	ios->private = pcol_copy;
 522
 523	ret = exofs_oi_write(oi, ios);
 524	if (unlikely(ret)) {
 525		EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
 526		goto err;
 527	}
 528
 529	atomic_inc(&pcol->sbi->s_curr_pending);
 530	EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
 531		  pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
 532		  pcol->length);
 533	/* pages ownership was passed to pcol_copy */
 534	_pcol_reset(pcol);
 535	return 0;
 536
 537err:
 538	_unlock_pcol_pages(pcol, ret, WRITE);
 539	pcol_free(pcol);
 540	kfree(pcol_copy);
 541
 542	return ret;
 543}
 544
 545/* writepage_strip is called either directly from writepage() or by the VFS from
 546 * within write_cache_pages(), to add one more page to be written to storage.
 547 * It will try to collect as many contiguous pages as possible. If a
 548 * discontinuity is encountered or it runs out of resources it will submit the
 549 * previous segment and will start a new collection.
 550 * Eventually caller must submit the last segment if present.
 551 */
 552static int writepage_strip(struct page *page,
 553			   struct writeback_control *wbc_unused, void *data)
 554{
 555	struct page_collect *pcol = data;
 556	struct inode *inode = pcol->inode;
 557	struct exofs_i_info *oi = exofs_i(inode);
 558	loff_t i_size = i_size_read(inode);
 559	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
 560	size_t len;
 561	int ret;
 562
 563	BUG_ON(!PageLocked(page));
 564
 565	ret = wait_obj_created(oi);
 566	if (unlikely(ret))
 567		goto fail;
 568
 569	if (page->index < end_index)
 570		/* in this case, the page is within the limits of the file */
 571		len = PAGE_CACHE_SIZE;
 572	else {
 573		len = i_size & ~PAGE_CACHE_MASK;
 574
 575		if (page->index > end_index || !len) {
 576			/* in this case, the page is outside the limits
 577			 * (truncate in progress)
 578			 */
 579			ret = write_exec(pcol);
 580			if (unlikely(ret))
 581				goto fail;
 582			if (PageError(page))
 583				ClearPageError(page);
 584			unlock_page(page);
 585			EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
 586				     "outside the limits\n",
 587				     inode->i_ino, page->index);
 588			return 0;
 589		}
 590	}
 591
 592try_again:
 593
 594	if (unlikely(pcol->pg_first == -1)) {
 595		pcol->pg_first = page->index;
 596	} else if (unlikely((pcol->pg_first + pcol->nr_pages) !=
 597		   page->index)) {
 598		/* Discontinuity detected, split the request */
 599		ret = write_exec(pcol);
 600		if (unlikely(ret))
 601			goto fail;
 602
 603		EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
 604			     inode->i_ino, page->index);
 605		goto try_again;
 606	}
 607
 608	if (!pcol->bio) {
 609		ret = pcol_try_alloc(pcol);
 610		if (unlikely(ret))
 611			goto fail;
 612	}
 613
 614	EXOFS_DBGMSG2("    writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
 615		     inode->i_ino, page->index, len);
 616
 617	ret = pcol_add_page(pcol, page, len);
 618	if (unlikely(ret)) {
 619		EXOFS_DBGMSG("Failed pcol_add_page "
 620			     "nr_pages=%u total_length=0x%lx\n",
 621			     pcol->nr_pages, pcol->length);
 622
 623		/* split the request, next loop will start again */
 624		ret = write_exec(pcol);
 625		if (unlikely(ret)) {
 626			EXOFS_DBGMSG("write_exec faild => %d", ret);
 627			goto fail;
 628		}
 629
 630		goto try_again;
 631	}
 632
 633	BUG_ON(PageWriteback(page));
 634	set_page_writeback(page);
 635
 636	return 0;
 637
 638fail:
 639	EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
 640		     inode->i_ino, page->index, ret);
 641	set_bit(AS_EIO, &page->mapping->flags);
 642	unlock_page(page);
 643	return ret;
 644}
 645
 646static int exofs_writepages(struct address_space *mapping,
 647		       struct writeback_control *wbc)
 648{
 649	struct page_collect pcol;
 650	long start, end, expected_pages;
 651	int ret;
 652
 653	start = wbc->range_start >> PAGE_CACHE_SHIFT;
 654	end = (wbc->range_end == LLONG_MAX) ?
 655			start + mapping->nrpages :
 656			wbc->range_end >> PAGE_CACHE_SHIFT;
 657
 658	if (start || end)
 659		expected_pages = end - start + 1;
 660	else
 661		expected_pages = mapping->nrpages;
 662
 663	if (expected_pages < 32L)
 664		expected_pages = 32L;
 665
 666	EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
 667		     "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
 668		     mapping->host->i_ino, wbc->range_start, wbc->range_end,
 669		     mapping->nrpages, start, end, expected_pages);
 670
 671	_pcol_init(&pcol, expected_pages, mapping->host);
 672
 673	ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
 674	if (ret) {
 675		EXOFS_ERR("write_cache_pages => %d\n", ret);
 676		return ret;
 677	}
 678
 679	return write_exec(&pcol);
 680}
 681
 682static int exofs_writepage(struct page *page, struct writeback_control *wbc)
 683{
 684	struct page_collect pcol;
 685	int ret;
 686
 687	_pcol_init(&pcol, 1, page->mapping->host);
 688
 689	ret = writepage_strip(page, NULL, &pcol);
 690	if (ret) {
 691		EXOFS_ERR("exofs_writepage => %d\n", ret);
 692		return ret;
 693	}
 694
 695	return write_exec(&pcol);
 696}
 697
 698int exofs_write_begin(struct file *file, struct address_space *mapping,
 699		loff_t pos, unsigned len, unsigned flags,
 700		struct page **pagep, void **fsdata)
 701{
 702	int ret = 0;
 703	struct page *page;
 704
 705	page = *pagep;
 706	if (page == NULL) {
 707		ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
 708					 fsdata);
 709		if (ret) {
 710			EXOFS_DBGMSG("simple_write_begin faild\n");
 711			return ret;
 712		}
 713
 714		page = *pagep;
 715	}
 716
 717	 /* read modify write */
 718	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
 719		ret = _readpage(page, true);
 720		if (ret) {
 721			/*SetPageError was done by _readpage. Is it ok?*/
 722			unlock_page(page);
 723			EXOFS_DBGMSG("__readpage_filler faild\n");
 724		}
 725	}
 726
 727	return ret;
 728}
 729
 730static int exofs_write_begin_export(struct file *file,
 731		struct address_space *mapping,
 732		loff_t pos, unsigned len, unsigned flags,
 733		struct page **pagep, void **fsdata)
 734{
 735	*pagep = NULL;
 736
 737	return exofs_write_begin(file, mapping, pos, len, flags, pagep,
 738					fsdata);
 739}
 740
 741static int exofs_write_end(struct file *file, struct address_space *mapping,
 742			loff_t pos, unsigned len, unsigned copied,
 743			struct page *page, void *fsdata)
 744{
 745	struct inode *inode = mapping->host;
 746	/* According to comment in simple_write_end i_mutex is held */
 747	loff_t i_size = inode->i_size;
 748	int ret;
 749
 750	ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
 751	if (i_size != inode->i_size)
 752		mark_inode_dirty(inode);
 753	return ret;
 754}
 755
 756const struct address_space_operations exofs_aops = {
 757	.readpage	= exofs_readpage,
 758	.readpages	= exofs_readpages,
 759	.writepage	= exofs_writepage,
 760	.writepages	= exofs_writepages,
 761	.write_begin	= exofs_write_begin_export,
 762	.write_end	= exofs_write_end,
 763};
 764
 765/******************************************************************************
 766 * INODE OPERATIONS
 767 *****************************************************************************/
 768
 769/*
 770 * Test whether an inode is a fast symlink.
 771 */
 772static inline int exofs_inode_is_fast_symlink(struct inode *inode)
 773{
 774	struct exofs_i_info *oi = exofs_i(inode);
 775
 776	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
 777}
 778
 779/*
 780 * get_block_t - Fill in a buffer_head
 781 * An OSD takes care of block allocation so we just fake an allocation by
 782 * putting in the inode's sector_t in the buffer_head.
 783 * TODO: What about the case of create==0 and @iblock does not exist in the
 784 * object?
 785 */
 786static int exofs_get_block(struct inode *inode, sector_t iblock,
 787		    struct buffer_head *bh_result, int create)
 788{
 789	map_bh(bh_result, inode->i_sb, iblock);
 790	return 0;
 791}
 792
 793const struct osd_attr g_attr_logical_length = ATTR_DEF(
 794	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
 795
 796static int _do_truncate(struct inode *inode)
 797{
 798	struct exofs_i_info *oi = exofs_i(inode);
 799	loff_t isize = i_size_read(inode);
 800	int ret;
 801
 802	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 803
 804	nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
 805
 806	ret = exofs_oi_truncate(oi, (u64)isize);
 807	EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
 808	return ret;
 809}
 810
 811/*
 812 * Truncate a file to the specified size - all we have to do is set the size
 813 * attribute.  We make sure the object exists first.
 814 */
 815void exofs_truncate(struct inode *inode)
 816{
 817	struct exofs_i_info *oi = exofs_i(inode);
 818	int ret;
 819
 820	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 821	     || S_ISLNK(inode->i_mode)))
 822		return;
 823	if (exofs_inode_is_fast_symlink(inode))
 824		return;
 825	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 826		return;
 827
 828	/* if we are about to truncate an object, and it hasn't been
 829	 * created yet, wait
 830	 */
 831	if (unlikely(wait_obj_created(oi)))
 832		goto fail;
 833
 834	ret = _do_truncate(inode);
 835	if (ret)
 836		goto fail;
 837
 838out:
 839	mark_inode_dirty(inode);
 840	return;
 841fail:
 842	make_bad_inode(inode);
 843	goto out;
 844}
 845
 846/*
 847 * Set inode attributes - just call generic functions.
 848 */
 849int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
 850{
 851	struct inode *inode = dentry->d_inode;
 852	int error;
 853
 854	error = inode_change_ok(inode, iattr);
 855	if (error)
 856		return error;
 857
 858	error = inode_setattr(inode, iattr);
 859	return error;
 860}
 861
 862/*
 863 * Read an inode from the OSD, and return it as is.  We also return the size
 864 * attribute in the 'obj_size' argument.
 865 */
 866static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
 867		    struct exofs_fcb *inode, uint64_t *obj_size)
 868{
 869	struct exofs_sb_info *sbi = sb->s_fs_info;
 870	struct osd_attr attrs[2];
 871	struct exofs_io_state *ios;
 872	int ret;
 873
 874	*obj_size = ~0;
 875	ret = exofs_get_io_state(sbi, &ios);
 876	if (unlikely(ret)) {
 877		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
 878		return ret;
 879	}
 880
 881	ios->obj.id = exofs_oi_objno(oi);
 882	exofs_make_credential(oi->i_cred, &ios->obj);
 883	ios->cred = oi->i_cred;
 884
 885	attrs[0] = g_attr_inode_data;
 886	attrs[1] = g_attr_logical_length;
 887	ios->in_attr = attrs;
 888	ios->in_attr_len = ARRAY_SIZE(attrs);
 889
 890	ret = exofs_sbi_read(ios);
 891	if (ret)
 892		goto out;
 893
 894	ret = extract_attr_from_ios(ios, &attrs[0]);
 895	if (ret) {
 896		EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
 897		goto out;
 898	}
 899	WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
 900	memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
 901
 902	ret = extract_attr_from_ios(ios, &attrs[1]);
 903	if (ret) {
 904		EXOFS_ERR("%s: extract_attr of logical_length failed\n",
 905			  __func__);
 906		goto out;
 907	}
 908	*obj_size = get_unaligned_be64(attrs[1].val_ptr);
 909
 910out:
 911	exofs_put_io_state(ios);
 912	return ret;
 913}
 914
 915static void __oi_init(struct exofs_i_info *oi)
 916{
 917	init_waitqueue_head(&oi->i_wq);
 918	oi->i_flags = 0;
 919}
 920/*
 921 * Fill in an inode read from the OSD and set it up for use
 922 */
 923struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
 924{
 925	struct exofs_i_info *oi;
 926	struct exofs_fcb fcb;
 927	struct inode *inode;
 928	uint64_t obj_size;
 929	int ret;
 930
 931	inode = iget_locked(sb, ino);
 932	if (!inode)
 933		return ERR_PTR(-ENOMEM);
 934	if (!(inode->i_state & I_NEW))
 935		return inode;
 936	oi = exofs_i(inode);
 937	__oi_init(oi);
 938
 939	/* read the inode from the osd */
 940	ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
 941	if (ret)
 942		goto bad_inode;
 943
 944	set_obj_created(oi);
 945
 946	/* copy stuff from on-disk struct to in-memory struct */
 947	inode->i_mode = le16_to_cpu(fcb.i_mode);
 948	inode->i_uid = le32_to_cpu(fcb.i_uid);
 949	inode->i_gid = le32_to_cpu(fcb.i_gid);
 950	inode->i_nlink = le16_to_cpu(fcb.i_links_count);
 951	inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
 952	inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
 953	inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
 954	inode->i_ctime.tv_nsec =
 955		inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 956	oi->i_commit_size = le64_to_cpu(fcb.i_size);
 957	i_size_write(inode, oi->i_commit_size);
 958	inode->i_blkbits = EXOFS_BLKSHIFT;
 959	inode->i_generation = le32_to_cpu(fcb.i_generation);
 960
 961	if ((inode->i_size != obj_size) &&
 962		(!exofs_inode_is_fast_symlink(inode))) {
 963		EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
 964			  inode->i_size, _LLU(obj_size));
 965		/* FIXME: call exofs_inode_recovery() */
 966	}
 967
 968	oi->i_dir_start_lookup = 0;
 969
 970	if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
 971		ret = -ESTALE;
 972		goto bad_inode;
 973	}
 974
 975	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 976		if (fcb.i_data[0])
 977			inode->i_rdev =
 978				old_decode_dev(le32_to_cpu(fcb.i_data[0]));
 979		else
 980			inode->i_rdev =
 981				new_decode_dev(le32_to_cpu(fcb.i_data[1]));
 982	} else {
 983		memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
 984	}
 985
 986	if (S_ISREG(inode->i_mode)) {
 987		inode->i_op = &exofs_file_inode_operations;
 988		inode->i_fop = &exofs_file_operations;
 989		inode->i_mapping->a_ops = &exofs_aops;
 990	} else if (S_ISDIR(inode->i_mode)) {
 991		inode->i_op = &exofs_dir_inode_operations;
 992		inode->i_fop = &exofs_dir_operations;
 993		inode->i_mapping->a_ops = &exofs_aops;
 994	} else if (S_ISLNK(inode->i_mode)) {
 995		if (exofs_inode_is_fast_symlink(inode))
 996			inode->i_op = &exofs_fast_symlink_inode_operations;
 997		else {
 998			inode->i_op = &exofs_symlink_inode_operations;
 999			inode->i_mapping->a_ops = &exofs_aops;
1000		}
1001	} else {
1002		inode->i_op = &exofs_special_inode_operations;
1003		if (fcb.i_data[0])
1004			init_special_inode(inode, inode->i_mode,
1005			   old_decode_dev(le32_to_cpu(fcb.i_data[0])));
1006		else
1007			init_special_inode(inode, inode->i_mode,
1008			   new_decode_dev(le32_to_cpu(fcb.i_data[1])));
1009	}
1010
1011	unlock_new_inode(inode);
1012	return inode;
1013
1014bad_inode:
1015	iget_failed(inode);
1016	return ERR_PTR(ret);
1017}
1018
1019int __exofs_wait_obj_created(struct exofs_i_info *oi)
1020{
1021	if (!obj_created(oi)) {
1022		BUG_ON(!obj_2bcreated(oi));
1023		wait_event(oi->i_wq, obj_created(oi));
1024	}
1025	return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1026}
1027/*
1028 * Callback function from exofs_new_inode().  The important thing is that we
1029 * set the obj_created flag so that other methods know that the object exists on
1030 * the OSD.
1031 */
1032static void create_done(struct exofs_io_state *ios, void *p)
1033{
1034	struct inode *inode = p;
1035	struct exofs_i_info *oi = exofs_i(inode);
1036	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1037	int ret;
1038
1039	ret = exofs_check_io(ios, NULL);
1040	exofs_put_io_state(ios);
1041
1042	atomic_dec(&sbi->s_curr_pending);
1043
1044	if (unlikely(ret)) {
1045		EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1046			  _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
1047		/*TODO: When FS is corrupted creation can fail, object already
1048		 * exist. Get rid of this asynchronous creation, if exist
1049		 * increment the obj counter and try the next object. Until we
1050		 * succeed. All these dangling objects will be made into lost
1051		 * files by chkfs.exofs
1052		 */
1053	}
1054
1055	set_obj_created(oi);
1056
1057	atomic_dec(&inode->i_count);
1058	wake_up(&oi->i_wq);
1059}
1060
1061/*
1062 * Set up a new inode and create an object for it on the OSD
1063 */
1064struct inode *exofs_new_inode(struct inode *dir, int mode)
1065{
1066	struct super_block *sb;
1067	struct inode *inode;
1068	struct exofs_i_info *oi;
1069	struct exofs_sb_info *sbi;
1070	struct exofs_io_state *ios;
1071	int ret;
1072
1073	sb = dir->i_sb;
1074	inode = new_inode(sb);
1075	if (!inode)
1076		return ERR_PTR(-ENOMEM);
1077
1078	oi = exofs_i(inode);
1079	__oi_init(oi);
1080
1081	set_obj_2bcreated(oi);
1082
1083	sbi = sb->s_fs_info;
1084
1085	sb->s_dirt = 1;
1086	inode->i_uid = current->cred->fsuid;
1087	if (dir->i_mode & S_ISGID) {
1088		inode->i_gid = dir->i_gid;
1089		if (S_ISDIR(mode))
1090			mode |= S_ISGID;
1091	} else {
1092		inode->i_gid = current->cred->fsgid;
1093	}
1094	inode->i_mode = mode;
1095
1096	inode->i_ino = sbi->s_nextid++;
1097	inode->i_blkbits = EXOFS_BLKSHIFT;
1098	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1099	oi->i_commit_size = inode->i_size = 0;
1100	spin_lock(&sbi->s_next_gen_lock);
1101	inode->i_generation = sbi->s_next_generation++;
1102	spin_unlock(&sbi->s_next_gen_lock);
1103	insert_inode_hash(inode);
1104
1105	mark_inode_dirty(inode);
1106
1107	ret = exofs_get_io_state(sbi, &ios);
1108	if (unlikely(ret)) {
1109		EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1110		return ERR_PTR(ret);
1111	}
1112
1113	ios->obj.id = exofs_oi_objno(oi);
1114	exofs_make_credential(oi->i_cred, &ios->obj);
1115
1116	/* increment the refcount so that the inode will still be around when we
1117	 * reach the callback
1118	 */
1119	atomic_inc(&inode->i_count);
1120
1121	ios->done = create_done;
1122	ios->private = inode;
1123	ios->cred = oi->i_cred;
1124	ret = exofs_sbi_create(ios);
1125	if (ret) {
1126		atomic_dec(&inode->i_count);
1127		exofs_put_io_state(ios);
1128		return ERR_PTR(ret);
1129	}
1130	atomic_inc(&sbi->s_curr_pending);
1131
1132	return inode;
1133}
1134
1135/*
1136 * struct to pass two arguments to update_inode's callback
1137 */
1138struct updatei_args {
1139	struct exofs_sb_info	*sbi;
1140	struct exofs_fcb	fcb;
1141};
1142
1143/*
1144 * Callback function from exofs_update_inode().
1145 */
1146static void updatei_done(struct exofs_io_state *ios, void *p)
1147{
1148	struct updatei_args *args = p;
1149
1150	exofs_put_io_state(ios);
1151
1152	atomic_dec(&args->sbi->s_curr_pending);
1153
1154	kfree(args);
1155}
1156
1157/*
1158 * Write the inode to the OSD.  Just fill up the struct, and set the attribute
1159 * synchronously or asynchronously depending on the do_sync flag.
1160 */
1161static int exofs_update_inode(struct inode *inode, int do_sync)
1162{
1163	struct exofs_i_info *oi = exofs_i(inode);
1164	struct super_block *sb = inode->i_sb;
1165	struct exofs_sb_info *sbi = sb->s_fs_info;
1166	struct exofs_io_state *ios;
1167	struct osd_attr attr;
1168	struct exofs_fcb *fcb;
1169	struct updatei_args *args;
1170	int ret;
1171
1172	args = kzalloc(sizeof(*args), GFP_KERNEL);
1173	if (!args)
1174		return -ENOMEM;
1175
1176	fcb = &args->fcb;
1177
1178	fcb->i_mode = cpu_to_le16(inode->i_mode);
1179	fcb->i_uid = cpu_to_le32(inode->i_uid);
1180	fcb->i_gid = cpu_to_le32(inode->i_gid);
1181	fcb->i_links_count = cpu_to_le16(inode->i_nlink);
1182	fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
1183	fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
1184	fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
1185	oi->i_commit_size = i_size_read(inode);
1186	fcb->i_size = cpu_to_le64(oi->i_commit_size);
1187	fcb->i_generation = cpu_to_le32(inode->i_generation);
1188
1189	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1190		if (old_valid_dev(inode->i_rdev)) {
1191			fcb->i_data[0] =
1192				cpu_to_le32(old_encode_dev(inode->i_rdev));
1193			fcb->i_data[1] = 0;
1194		} else {
1195			fcb->i_data[0] = 0;
1196			fcb->i_data[1] =
1197				cpu_to_le32(new_encode_dev(inode->i_rdev));
1198			fcb->i_data[2] = 0;
1199		}
1200	} else
1201		memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1202
1203	ret = exofs_get_io_state(sbi, &ios);
1204	if (unlikely(ret)) {
1205		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1206		goto free_args;
1207	}
1208
1209	attr = g_attr_inode_data;
1210	attr.val_ptr = fcb;
1211	ios->out_attr_len = 1;
1212	ios->out_attr = &attr;
1213
1214	if (!obj_created(oi)) {
1215		EXOFS_DBGMSG("!obj_created\n");
1216		BUG_ON(!obj_2bcreated(oi));
1217		wait_event(oi->i_wq, obj_created(oi));
1218		EXOFS_DBGMSG("wait_event done\n");
1219	}
1220
1221	if (!do_sync) {
1222		args->sbi = sbi;
1223		ios->done = updatei_done;
1224		ios->private = args;
1225	}
1226
1227	ret = exofs_oi_write(oi, ios);
1228	if (!do_sync && !ret) {
1229		atomic_inc(&sbi->s_curr_pending);
1230		goto out; /* deallocation in updatei_done */
1231	}
1232
1233	exofs_put_io_state(ios);
1234free_args:
1235	kfree(args);
1236out:
1237	EXOFS_DBGMSG("ret=>%d\n", ret);
1238	return ret;
1239}
1240
1241int exofs_write_inode(struct inode *inode, int wait)
1242{
1243	return exofs_update_inode(inode, wait);
1244}
1245
1246/*
1247 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1248 * do.
1249 */
1250static void delete_done(struct exofs_io_state *ios, void *p)
1251{
1252	struct exofs_sb_info *sbi = p;
1253
1254	exofs_put_io_state(ios);
1255
1256	atomic_dec(&sbi->s_curr_pending);
1257}
1258
1259/*
1260 * Called when the refcount of an inode reaches zero.  We remove the object
1261 * from the OSD here.  We make sure the object was created before we try and
1262 * delete it.
1263 */
1264void exofs_delete_inode(struct inode *inode)
1265{
1266	struct exofs_i_info *oi = exofs_i(inode);
1267	struct super_block *sb = inode->i_sb;
1268	struct exofs_sb_info *sbi = sb->s_fs_info;
1269	struct exofs_io_state *ios;
1270	int ret;
1271
1272	truncate_inode_pages(&inode->i_data, 0);
1273
1274	if (is_bad_inode(inode))
1275		goto no_delete;
1276
1277	mark_inode_dirty(inode);
1278	exofs_update_inode(inode, inode_needs_sync(inode));
1279
1280	inode->i_size = 0;
1281	if (inode->i_blocks)
1282		exofs_truncate(inode);
1283
1284	clear_inode(inode);
1285
1286	ret = exofs_get_io_state(sbi, &ios);
1287	if (unlikely(ret)) {
1288		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1289		return;
1290	}
1291
1292	/* if we are deleting an obj that hasn't been created yet, wait */
1293	if (!obj_created(oi)) {
1294		BUG_ON(!obj_2bcreated(oi));
1295		wait_event(oi->i_wq, obj_created(oi));
1296	}
1297
1298	ios->obj.id = exofs_oi_objno(oi);
1299	ios->done = delete_done;
1300	ios->private = sbi;
1301	ios->cred = oi->i_cred;
1302	ret = exofs_sbi_remove(ios);
1303	if (ret) {
1304		EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
1305		exofs_put_io_state(ios);
1306		return;
1307	}
1308	atomic_inc(&sbi->s_curr_pending);
1309
1310	return;
1311
1312no_delete:
1313	clear_inode(inode);
1314}
Configure Feed

Configure Feed